LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD:
825 case ISD::MUL:
826 case ISD::SMIN:
827 case ISD::SMAX:
828 case ISD::UMIN:
829 case ISD::UMAX: {
830 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(1),
831 DemandedElts, 1, Depth + 1))
832 return Op.getOperand(0);
833
834 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(0),
835 DemandedElts, 0, Depth + 1))
836 return Op.getOperand(1);
837 break;
838 }
839 case ISD::SHL: {
840 // If we are only demanding sign bits then we can use the shift source
841 // directly.
842 if (std::optional<unsigned> MaxSA =
843 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
844 SDValue Op0 = Op.getOperand(0);
845 unsigned ShAmt = *MaxSA;
846 unsigned NumSignBits =
847 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
848 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
849 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
850 return Op0;
851 }
852 break;
853 }
854 case ISD::SRL: {
855 // If we are only demanding sign bits then we can use the shift source
856 // directly.
857 if (std::optional<unsigned> MaxSA =
858 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
859 SDValue Op0 = Op.getOperand(0);
860 unsigned ShAmt = *MaxSA;
861 // Must already be signbits in DemandedBits bounds, and can't demand any
862 // shifted in zeroes.
863 if (DemandedBits.countl_zero() >= ShAmt) {
864 unsigned NumSignBits =
865 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
866 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
867 return Op0;
868 }
869 }
870 break;
871 }
872 case ISD::SETCC: {
873 SDValue Op0 = Op.getOperand(0);
874 SDValue Op1 = Op.getOperand(1);
875 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
876 // If (1) we only need the sign-bit, (2) the setcc operands are the same
877 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
878 // -1, we may be able to bypass the setcc.
879 if (DemandedBits.isSignMask() &&
883 // If we're testing X < 0, then this compare isn't needed - just use X!
884 // FIXME: We're limiting to integer types here, but this should also work
885 // if we don't care about FP signed-zero. The use of SETLT with FP means
886 // that we don't care about NaNs.
887 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
889 return Op0;
890 }
891 break;
892 }
894 // If none of the extended bits are demanded, eliminate the sextinreg.
895 SDValue Op0 = Op.getOperand(0);
896 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
897 unsigned ExBits = ExVT.getScalarSizeInBits();
898 if (DemandedBits.getActiveBits() <= ExBits &&
900 return Op0;
901 // If the input is already sign extended, just drop the extension.
902 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
903 if (NumSignBits >= (BitWidth - ExBits + 1))
904 return Op0;
905 break;
906 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 // If we only want the lowest element and none of extended bits, then we can
914 // return the bitcasted source vector.
915 SDValue Src = Op.getOperand(0);
916 EVT SrcVT = Src.getValueType();
917 EVT DstVT = Op.getValueType();
918 if (IsLE && DemandedElts == 1 &&
919 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
920 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
921 return DAG.getBitcast(DstVT, Src);
922 }
923 break;
924 }
926 if (VT.isScalableVector())
927 return SDValue();
928
929 // If we don't demand the inserted element, return the base vector.
930 SDValue Vec = Op.getOperand(0);
931 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
932 EVT VecVT = Vec.getValueType();
933 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
934 !DemandedElts[CIdx->getZExtValue()])
935 return Vec;
936 break;
937 }
939 if (VT.isScalableVector())
940 return SDValue();
941
942 SDValue Vec = Op.getOperand(0);
943 SDValue Sub = Op.getOperand(1);
944 uint64_t Idx = Op.getConstantOperandVal(2);
945 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
946 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
947 // If we don't demand the inserted subvector, return the base vector.
948 if (DemandedSubElts == 0)
949 return Vec;
950 break;
951 }
952 case ISD::VECTOR_SHUFFLE: {
954 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
955
956 // If all the demanded elts are from one operand and are inline,
957 // then we can use the operand directly.
958 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
959 for (unsigned i = 0; i != NumElts; ++i) {
960 int M = ShuffleMask[i];
961 if (M < 0 || !DemandedElts[i])
962 continue;
963 AllUndef = false;
964 IdentityLHS &= (M == (int)i);
965 IdentityRHS &= ((M - NumElts) == i);
966 }
967
968 if (AllUndef)
969 return DAG.getUNDEF(Op.getValueType());
970 if (IdentityLHS)
971 return Op.getOperand(0);
972 if (IdentityRHS)
973 return Op.getOperand(1);
974 break;
975 }
976 default:
977 // TODO: Probably okay to remove after audit; here to reduce change size
978 // in initial enablement patch for scalable vectors
979 if (VT.isScalableVector())
980 return SDValue();
981
982 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
984 Op, DemandedBits, DemandedElts, DAG, Depth))
985 return V;
986 break;
987 }
988 return SDValue();
989}
990
993 unsigned Depth) const {
994 EVT VT = Op.getValueType();
995 // Since the number of lanes in a scalable vector is unknown at compile time,
996 // we track one bit which is implicitly broadcast to all lanes. This means
997 // that all lanes in a scalable vector are considered demanded.
998 APInt DemandedElts = VT.isFixedLengthVector()
1000 : APInt(1, 1);
1001 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1002 Depth);
1003}
1004
1006 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1007 unsigned Depth) const {
1008 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1009 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1010 Depth);
1011}
1012
1013// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1014// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1017 const TargetLowering &TLI,
1018 const APInt &DemandedBits,
1019 const APInt &DemandedElts, unsigned Depth) {
1020 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1021 "SRL or SRA node is required here!");
1022 // Is the right shift using an immediate value of 1?
1023 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1024 if (!N1C || !N1C->isOne())
1025 return SDValue();
1026
1027 // We are looking for an avgfloor
1028 // add(ext, ext)
1029 // or one of these as a avgceil
1030 // add(add(ext, ext), 1)
1031 // add(add(ext, 1), ext)
1032 // add(ext, add(ext, 1))
1033 SDValue Add = Op.getOperand(0);
1034 if (Add.getOpcode() != ISD::ADD)
1035 return SDValue();
1036
1037 SDValue ExtOpA = Add.getOperand(0);
1038 SDValue ExtOpB = Add.getOperand(1);
1039 SDValue Add2;
1040 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1041 ConstantSDNode *ConstOp;
1042 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op3;
1046 Add2 = A;
1047 return true;
1048 }
1049 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1050 ConstOp->isOne()) {
1051 ExtOpA = Op1;
1052 ExtOpB = Op2;
1053 Add2 = A;
1054 return true;
1055 }
1056 return false;
1057 };
1058 bool IsCeil =
1059 (ExtOpA.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1061 (ExtOpB.getOpcode() == ISD::ADD &&
1062 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1063
1064 // If the shift is signed (sra):
1065 // - Needs >= 2 sign bit for both operands.
1066 // - Needs >= 2 zero bits.
1067 // If the shift is unsigned (srl):
1068 // - Needs >= 1 zero bit for both operands.
1069 // - Needs 1 demanded bit zero and >= 2 sign bits.
1070 SelectionDAG &DAG = TLO.DAG;
1071 unsigned ShiftOpc = Op.getOpcode();
1072 bool IsSigned = false;
1073 unsigned KnownBits;
1074 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1075 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1076 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1077 unsigned NumZeroA =
1078 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZeroB =
1080 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1081 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1082
1083 switch (ShiftOpc) {
1084 default:
1085 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1086 case ISD::SRA: {
1087 if (NumZero >= 2 && NumSigned < NumZero) {
1088 IsSigned = false;
1089 KnownBits = NumZero;
1090 break;
1091 }
1092 if (NumSigned >= 1) {
1093 IsSigned = true;
1094 KnownBits = NumSigned;
1095 break;
1096 }
1097 return SDValue();
1098 }
1099 case ISD::SRL: {
1100 if (NumZero >= 1 && NumSigned < NumZero) {
1101 IsSigned = false;
1102 KnownBits = NumZero;
1103 break;
1104 }
1105 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1106 IsSigned = true;
1107 KnownBits = NumSigned;
1108 break;
1109 }
1110 return SDValue();
1111 }
1112 }
1113
1114 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1115 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1116
1117 // Find the smallest power-2 type that is legal for this vector size and
1118 // operation, given the original type size and the number of known sign/zero
1119 // bits.
1120 EVT VT = Op.getValueType();
1121 unsigned MinWidth =
1122 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1123 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1125 return SDValue();
1126 if (VT.isVector())
1127 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1128 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1129 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1130 // larger type size to do the transform.
1131 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1132 return SDValue();
1133 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1134 Add.getOperand(1)) &&
1135 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1136 Add2.getOperand(1))))
1137 NVT = VT;
1138 else
1139 return SDValue();
1140 }
1141
1142 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1143 // this is likely to stop other folds (reassociation, value tracking etc.)
1144 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1145 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1146 return SDValue();
1147
1148 SDLoc DL(Op);
1149 SDValue ResultAVG =
1150 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1151 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1152 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1153}
1154
1155/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1156/// result of Op are ever used downstream. If we can use this information to
1157/// simplify Op, create a new simplified DAG node and return true, returning the
1158/// original and new nodes in Old and New. Otherwise, analyze the expression and
1159/// return a mask of Known bits for the expression (used to simplify the
1160/// caller). The Known bits may only be accurate for those bits in the
1161/// OriginalDemandedBits and OriginalDemandedElts.
1163 SDValue Op, const APInt &OriginalDemandedBits,
1164 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1165 unsigned Depth, bool AssumeSingleUse) const {
1166 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1167 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1168 "Mask size mismatches value type size!");
1169
1170 // Don't know anything.
1171 Known = KnownBits(BitWidth);
1172
1173 EVT VT = Op.getValueType();
1174 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1175 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1176 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1177 "Unexpected vector size");
1178
1179 APInt DemandedBits = OriginalDemandedBits;
1180 APInt DemandedElts = OriginalDemandedElts;
1181 SDLoc dl(Op);
1182
1183 // Undef operand.
1184 if (Op.isUndef())
1185 return false;
1186
1187 // We can't simplify target constants.
1188 if (Op.getOpcode() == ISD::TargetConstant)
1189 return false;
1190
1191 if (Op.getOpcode() == ISD::Constant) {
1192 // We know all of the bits for a constant!
1193 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1194 return false;
1195 }
1196
1197 if (Op.getOpcode() == ISD::ConstantFP) {
1198 // We know all of the bits for a floating point constant!
1200 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1201 return false;
1202 }
1203
1204 // Other users may use these bits.
1205 bool HasMultiUse = false;
1206 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1208 // Limit search depth.
1209 return false;
1210 }
1211 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1213 DemandedElts = APInt::getAllOnes(NumElts);
1214 HasMultiUse = true;
1215 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1216 // Not demanding any bits/elts from Op.
1217 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1218 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1219 // Limit search depth.
1220 return false;
1221 }
1222
1223 KnownBits Known2;
1224 switch (Op.getOpcode()) {
1225 case ISD::SCALAR_TO_VECTOR: {
1226 if (VT.isScalableVector())
1227 return false;
1228 if (!DemandedElts[0])
1229 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1230
1231 KnownBits SrcKnown;
1232 SDValue Src = Op.getOperand(0);
1233 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1234 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1235 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1236 return true;
1237
1238 // Upper elements are undef, so only get the knownbits if we just demand
1239 // the bottom element.
1240 if (DemandedElts == 1)
1241 Known = SrcKnown.anyextOrTrunc(BitWidth);
1242 break;
1243 }
1244 case ISD::BUILD_VECTOR:
1245 // Collect the known bits that are shared by every demanded element.
1246 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1247 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1248 return false; // Don't fall through, will infinitely loop.
1249 case ISD::SPLAT_VECTOR: {
1250 SDValue Scl = Op.getOperand(0);
1251 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1252 KnownBits KnownScl;
1253 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1254 return true;
1255
1256 // Implicitly truncate the bits to match the official semantics of
1257 // SPLAT_VECTOR.
1258 Known = KnownScl.trunc(BitWidth);
1259 break;
1260 }
1261 case ISD::FREEZE: {
1262 SDValue N0 = Op.getOperand(0);
1264 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
1265 return TLO.CombineTo(Op, N0);
1266 break;
1267 }
1268 case ISD::LOAD: {
1269 auto *LD = cast<LoadSDNode>(Op);
1270 if (getTargetConstantFromLoad(LD)) {
1271 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1272 return false; // Don't fall through, will infinitely loop.
1273 }
1274 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1275 // If this is a ZEXTLoad and we are looking at the loaded value.
1276 EVT MemVT = LD->getMemoryVT();
1277 unsigned MemBits = MemVT.getScalarSizeInBits();
1278 Known.Zero.setBitsFrom(MemBits);
1279 return false; // Don't fall through, will infinitely loop.
1280 }
1281 break;
1282 }
1284 if (VT.isScalableVector())
1285 return false;
1286 SDValue Vec = Op.getOperand(0);
1287 SDValue Scl = Op.getOperand(1);
1288 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1289 EVT VecVT = Vec.getValueType();
1290
1291 // If index isn't constant, assume we need all vector elements AND the
1292 // inserted element.
1293 APInt DemandedVecElts(DemandedElts);
1294 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1295 unsigned Idx = CIdx->getZExtValue();
1296 DemandedVecElts.clearBit(Idx);
1297
1298 // Inserted element is not required.
1299 if (!DemandedElts[Idx])
1300 return TLO.CombineTo(Op, Vec);
1301 }
1302
1303 KnownBits KnownScl;
1304 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1305 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1306 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1307 return true;
1308
1309 Known = KnownScl.anyextOrTrunc(BitWidth);
1310
1311 KnownBits KnownVec;
1312 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1313 Depth + 1))
1314 return true;
1315
1316 if (!!DemandedVecElts)
1317 Known = Known.intersectWith(KnownVec);
1318
1319 return false;
1320 }
1321 case ISD::INSERT_SUBVECTOR: {
1322 if (VT.isScalableVector())
1323 return false;
1324 // Demand any elements from the subvector and the remainder from the src its
1325 // inserted into.
1326 SDValue Src = Op.getOperand(0);
1327 SDValue Sub = Op.getOperand(1);
1328 uint64_t Idx = Op.getConstantOperandVal(2);
1329 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1330 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1331 APInt DemandedSrcElts = DemandedElts;
1332 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1333
1334 KnownBits KnownSub, KnownSrc;
1335 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1336 Depth + 1))
1337 return true;
1338 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1339 Depth + 1))
1340 return true;
1341
1342 Known.setAllConflict();
1343 if (!!DemandedSubElts)
1344 Known = Known.intersectWith(KnownSub);
1345 if (!!DemandedSrcElts)
1346 Known = Known.intersectWith(KnownSrc);
1347
1348 // Attempt to avoid multi-use src if we don't need anything from it.
1349 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1350 !DemandedSrcElts.isAllOnes()) {
1352 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1354 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1355 if (NewSub || NewSrc) {
1356 NewSub = NewSub ? NewSub : Sub;
1357 NewSrc = NewSrc ? NewSrc : Src;
1358 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1359 Op.getOperand(2));
1360 return TLO.CombineTo(Op, NewOp);
1361 }
1362 }
1363 break;
1364 }
1366 if (VT.isScalableVector())
1367 return false;
1368 // Offset the demanded elts by the subvector index.
1369 SDValue Src = Op.getOperand(0);
1370 if (Src.getValueType().isScalableVector())
1371 break;
1372 uint64_t Idx = Op.getConstantOperandVal(1);
1373 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1374 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1375
1376 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1377 Depth + 1))
1378 return true;
1379
1380 // Attempt to avoid multi-use src if we don't need anything from it.
1381 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1383 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1384 if (DemandedSrc) {
1385 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1386 Op.getOperand(1));
1387 return TLO.CombineTo(Op, NewOp);
1388 }
1389 }
1390 break;
1391 }
1392 case ISD::CONCAT_VECTORS: {
1393 if (VT.isScalableVector())
1394 return false;
1395 Known.setAllConflict();
1396 EVT SubVT = Op.getOperand(0).getValueType();
1397 unsigned NumSubVecs = Op.getNumOperands();
1398 unsigned NumSubElts = SubVT.getVectorNumElements();
1399 for (unsigned i = 0; i != NumSubVecs; ++i) {
1400 APInt DemandedSubElts =
1401 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1402 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1403 Known2, TLO, Depth + 1))
1404 return true;
1405 // Known bits are shared by every demanded subvector element.
1406 if (!!DemandedSubElts)
1407 Known = Known.intersectWith(Known2);
1408 }
1409 break;
1410 }
1411 case ISD::VECTOR_SHUFFLE: {
1412 assert(!VT.isScalableVector());
1413 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1414
1415 // Collect demanded elements from shuffle operands..
1416 APInt DemandedLHS, DemandedRHS;
1417 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1418 DemandedRHS))
1419 break;
1420
1421 if (!!DemandedLHS || !!DemandedRHS) {
1422 SDValue Op0 = Op.getOperand(0);
1423 SDValue Op1 = Op.getOperand(1);
1424
1425 Known.setAllConflict();
1426 if (!!DemandedLHS) {
1427 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1428 Depth + 1))
1429 return true;
1430 Known = Known.intersectWith(Known2);
1431 }
1432 if (!!DemandedRHS) {
1433 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1434 Depth + 1))
1435 return true;
1436 Known = Known.intersectWith(Known2);
1437 }
1438
1439 // Attempt to avoid multi-use ops if we don't need anything from them.
1441 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1443 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1444 if (DemandedOp0 || DemandedOp1) {
1445 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1446 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1447 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1448 return TLO.CombineTo(Op, NewOp);
1449 }
1450 }
1451 break;
1452 }
1453 case ISD::AND: {
1454 SDValue Op0 = Op.getOperand(0);
1455 SDValue Op1 = Op.getOperand(1);
1456
1457 // If the RHS is a constant, check to see if the LHS would be zero without
1458 // using the bits from the RHS. Below, we use knowledge about the RHS to
1459 // simplify the LHS, here we're using information from the LHS to simplify
1460 // the RHS.
1461 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1462 // Do not increment Depth here; that can cause an infinite loop.
1463 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1464 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1465 if ((LHSKnown.Zero & DemandedBits) ==
1466 (~RHSC->getAPIntValue() & DemandedBits))
1467 return TLO.CombineTo(Op, Op0);
1468
1469 // If any of the set bits in the RHS are known zero on the LHS, shrink
1470 // the constant.
1471 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1472 DemandedElts, TLO))
1473 return true;
1474
1475 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1476 // constant, but if this 'and' is only clearing bits that were just set by
1477 // the xor, then this 'and' can be eliminated by shrinking the mask of
1478 // the xor. For example, for a 32-bit X:
1479 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1480 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1481 LHSKnown.One == ~RHSC->getAPIntValue()) {
1482 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1483 return TLO.CombineTo(Op, Xor);
1484 }
1485 }
1486
1487 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1488 SDValue X, Y;
1489 if (sd_match(Op,
1490 m_And(m_Value(Y),
1492 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1493 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1494 return TLO.CombineTo(
1495 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1496 }
1497
1498 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1499 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1500 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1501 (Op0.getOperand(0).isUndef() ||
1503 Op0->hasOneUse()) {
1504 unsigned NumSubElts =
1506 unsigned SubIdx = Op0.getConstantOperandVal(2);
1507 APInt DemandedSub =
1508 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1509 KnownBits KnownSubMask =
1510 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1511 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1512 SDValue NewAnd =
1513 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1514 SDValue NewInsert =
1515 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1516 Op0.getOperand(1), Op0.getOperand(2));
1517 return TLO.CombineTo(Op, NewInsert);
1518 }
1519 }
1520
1521 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1522 Depth + 1))
1523 return true;
1524 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1525 Known2, TLO, Depth + 1))
1526 return true;
1527
1528 // If all of the demanded bits are known one on one side, return the other.
1529 // These bits cannot contribute to the result of the 'and'.
1530 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1531 return TLO.CombineTo(Op, Op0);
1532 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1533 return TLO.CombineTo(Op, Op1);
1534 // If all of the demanded bits in the inputs are known zeros, return zero.
1535 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1536 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1537 // If the RHS is a constant, see if we can simplify it.
1538 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1539 TLO))
1540 return true;
1541 // If the operation can be done in a smaller type, do so.
1543 return true;
1544
1545 // Attempt to avoid multi-use ops if we don't need anything from them.
1546 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1548 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1550 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1551 if (DemandedOp0 || DemandedOp1) {
1552 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1553 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1554 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1555 return TLO.CombineTo(Op, NewOp);
1556 }
1557 }
1558
1559 Known &= Known2;
1560 break;
1561 }
1562 case ISD::OR: {
1563 SDValue Op0 = Op.getOperand(0);
1564 SDValue Op1 = Op.getOperand(1);
1565 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1566 Depth + 1)) {
1567 Op->dropFlags(SDNodeFlags::Disjoint);
1568 return true;
1569 }
1570
1571 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1572 Known2, TLO, Depth + 1)) {
1573 Op->dropFlags(SDNodeFlags::Disjoint);
1574 return true;
1575 }
1576
1577 // If all of the demanded bits are known zero on one side, return the other.
1578 // These bits cannot contribute to the result of the 'or'.
1579 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1580 return TLO.CombineTo(Op, Op0);
1581 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1582 return TLO.CombineTo(Op, Op1);
1583 // If the RHS is a constant, see if we can simplify it.
1584 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1585 return true;
1586 // If the operation can be done in a smaller type, do so.
1588 return true;
1589
1590 // Attempt to avoid multi-use ops if we don't need anything from them.
1591 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1593 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1595 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1596 if (DemandedOp0 || DemandedOp1) {
1597 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1598 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1599 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1600 return TLO.CombineTo(Op, NewOp);
1601 }
1602 }
1603
1604 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1605 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1606 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1607 Op0->hasOneUse() && Op1->hasOneUse()) {
1608 // Attempt to match all commutations - m_c_Or would've been useful!
1609 for (int I = 0; I != 2; ++I) {
1610 SDValue X = Op.getOperand(I).getOperand(0);
1611 SDValue C1 = Op.getOperand(I).getOperand(1);
1612 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1613 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1614 if (Alt.getOpcode() == ISD::OR) {
1615 for (int J = 0; J != 2; ++J) {
1616 if (X == Alt.getOperand(J)) {
1617 SDValue Y = Alt.getOperand(1 - J);
1618 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1619 {C1, C2})) {
1620 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1621 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1622 return TLO.CombineTo(
1623 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1624 }
1625 }
1626 }
1627 }
1628 }
1629 }
1630
1631 Known |= Known2;
1632 break;
1633 }
1634 case ISD::XOR: {
1635 SDValue Op0 = Op.getOperand(0);
1636 SDValue Op1 = Op.getOperand(1);
1637
1638 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1639 Depth + 1))
1640 return true;
1641 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1642 Depth + 1))
1643 return true;
1644
1645 // If all of the demanded bits are known zero on one side, return the other.
1646 // These bits cannot contribute to the result of the 'xor'.
1647 if (DemandedBits.isSubsetOf(Known.Zero))
1648 return TLO.CombineTo(Op, Op0);
1649 if (DemandedBits.isSubsetOf(Known2.Zero))
1650 return TLO.CombineTo(Op, Op1);
1651 // If the operation can be done in a smaller type, do so.
1653 return true;
1654
1655 // If all of the unknown bits are known to be zero on one side or the other
1656 // turn this into an *inclusive* or.
1657 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1658 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1659 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1660
1661 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1662 if (C) {
1663 // If one side is a constant, and all of the set bits in the constant are
1664 // also known set on the other side, turn this into an AND, as we know
1665 // the bits will be cleared.
1666 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1667 // NB: it is okay if more bits are known than are requested
1668 if (C->getAPIntValue() == Known2.One) {
1669 SDValue ANDC =
1670 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1671 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1672 }
1673
1674 // If the RHS is a constant, see if we can change it. Don't alter a -1
1675 // constant because that's a 'not' op, and that is better for combining
1676 // and codegen.
1677 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1678 // We're flipping all demanded bits. Flip the undemanded bits too.
1679 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1680 return TLO.CombineTo(Op, New);
1681 }
1682
1683 unsigned Op0Opcode = Op0.getOpcode();
1684 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1685 if (ConstantSDNode *ShiftC =
1686 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1687 // Don't crash on an oversized shift. We can not guarantee that a
1688 // bogus shift has been simplified to undef.
1689 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1690 uint64_t ShiftAmt = ShiftC->getZExtValue();
1692 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1693 : Ones.lshr(ShiftAmt);
1694 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1696 // If the xor constant is a demanded mask, do a 'not' before the
1697 // shift:
1698 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1699 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1700 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1701 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1702 Op0.getOperand(1)));
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 // If we can't turn this into a 'not', try to shrink the constant.
1710 if (!C || !C->isAllOnes())
1711 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1712 return true;
1713
1714 // Attempt to avoid multi-use ops if we don't need anything from them.
1715 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1717 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1719 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1720 if (DemandedOp0 || DemandedOp1) {
1721 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1722 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1723 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1724 return TLO.CombineTo(Op, NewOp);
1725 }
1726 }
1727
1728 Known ^= Known2;
1729 break;
1730 }
1731 case ISD::SELECT:
1732 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1733 Known, TLO, Depth + 1))
1734 return true;
1735 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1736 Known2, TLO, Depth + 1))
1737 return true;
1738
1739 // If the operands are constants, see if we can simplify them.
1740 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1741 return true;
1742
1743 // Only known if known in both the LHS and RHS.
1744 Known = Known.intersectWith(Known2);
1745 break;
1746 case ISD::VSELECT:
1747 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1748 Known, TLO, Depth + 1))
1749 return true;
1750 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1751 Known2, TLO, Depth + 1))
1752 return true;
1753
1754 // Only known if known in both the LHS and RHS.
1755 Known = Known.intersectWith(Known2);
1756 break;
1757 case ISD::SELECT_CC:
1758 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1759 Known, TLO, Depth + 1))
1760 return true;
1761 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1762 Known2, TLO, Depth + 1))
1763 return true;
1764
1765 // If the operands are constants, see if we can simplify them.
1766 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1767 return true;
1768
1769 // Only known if known in both the LHS and RHS.
1770 Known = Known.intersectWith(Known2);
1771 break;
1772 case ISD::SETCC: {
1773 SDValue Op0 = Op.getOperand(0);
1774 SDValue Op1 = Op.getOperand(1);
1775 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1776 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1777 // (X is of integer type) then we only need the sign mask of the previous
1778 // result
1779 if (Op1.getValueType().isInteger() &&
1780 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1781 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1782 isAllOnesOrAllOnesSplat(Op1)))) {
1783 KnownBits KnownOp0;
1786 DemandedElts, KnownOp0, TLO, Depth + 1))
1787 return true;
1788 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1789 // width as the setcc result, and (3) the result of a setcc conforms to 0
1790 // or -1, we may be able to bypass the setcc.
1791 if (DemandedBits.isSignMask() &&
1795 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1796 // NOT Operation
1797 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1798 SDLoc DL(Op);
1799 EVT VT = Op0.getValueType();
1800 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1801 return TLO.CombineTo(Op, NotOp0);
1802 }
1803 return TLO.CombineTo(Op, Op0);
1804 }
1805 }
1806 if (getBooleanContents(Op0.getValueType()) ==
1808 BitWidth > 1)
1809 Known.Zero.setBitsFrom(1);
1810 break;
1811 }
1812 case ISD::SHL: {
1813 SDValue Op0 = Op.getOperand(0);
1814 SDValue Op1 = Op.getOperand(1);
1815 EVT ShiftVT = Op1.getValueType();
1816
1817 if (std::optional<unsigned> KnownSA =
1818 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1819 unsigned ShAmt = *KnownSA;
1820 if (ShAmt == 0)
1821 return TLO.CombineTo(Op, Op0);
1822
1823 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1824 // single shift. We can do this if the bottom bits (which are shifted
1825 // out) are never demanded.
1826 // TODO - support non-uniform vector amounts.
1827 if (Op0.getOpcode() == ISD::SRL) {
1828 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1829 if (std::optional<unsigned> InnerSA =
1830 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1831 unsigned C1 = *InnerSA;
1832 unsigned Opc = ISD::SHL;
1833 int Diff = ShAmt - C1;
1834 if (Diff < 0) {
1835 Diff = -Diff;
1836 Opc = ISD::SRL;
1837 }
1838 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1839 return TLO.CombineTo(
1840 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1841 }
1842 }
1843 }
1844
1845 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1846 // are not demanded. This will likely allow the anyext to be folded away.
1847 // TODO - support non-uniform vector amounts.
1848 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1849 SDValue InnerOp = Op0.getOperand(0);
1850 EVT InnerVT = InnerOp.getValueType();
1851 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1852 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1853 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1854 SDValue NarrowShl = TLO.DAG.getNode(
1855 ISD::SHL, dl, InnerVT, InnerOp,
1856 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1857 return TLO.CombineTo(
1858 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1859 }
1860
1861 // Repeat the SHL optimization above in cases where an extension
1862 // intervenes: (shl (anyext (shr x, c1)), c2) to
1863 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1864 // aren't demanded (as above) and that the shifted upper c1 bits of
1865 // x aren't demanded.
1866 // TODO - support non-uniform vector amounts.
1867 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1868 InnerOp.hasOneUse()) {
1869 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1870 InnerOp, DemandedElts, Depth + 2)) {
1871 unsigned InnerShAmt = *SA2;
1872 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1873 DemandedBits.getActiveBits() <=
1874 (InnerBits - InnerShAmt + ShAmt) &&
1875 DemandedBits.countr_zero() >= ShAmt) {
1876 SDValue NewSA =
1877 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1878 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1879 InnerOp.getOperand(0));
1880 return TLO.CombineTo(
1881 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1882 }
1883 }
1884 }
1885 }
1886
1887 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1888 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1889 Depth + 1)) {
1890 // Disable the nsw and nuw flags. We can no longer guarantee that we
1891 // won't wrap after simplification.
1892 Op->dropFlags(SDNodeFlags::NoWrap);
1893 return true;
1894 }
1895 Known <<= ShAmt;
1896 // low bits known zero.
1897 Known.Zero.setLowBits(ShAmt);
1898
1899 // Attempt to avoid multi-use ops if we don't need anything from them.
1900 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1902 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1903 if (DemandedOp0) {
1904 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1905 return TLO.CombineTo(Op, NewOp);
1906 }
1907 }
1908
1909 // TODO: Can we merge this fold with the one below?
1910 // Try shrinking the operation as long as the shift amount will still be
1911 // in range.
1912 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1913 Op.getNode()->hasOneUse()) {
1914 // Search for the smallest integer type with free casts to and from
1915 // Op's type. For expedience, just check power-of-2 integer types.
1916 unsigned DemandedSize = DemandedBits.getActiveBits();
1917 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1918 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1919 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1920 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1921 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1922 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1923 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1924 assert(DemandedSize <= SmallVTBits &&
1925 "Narrowed below demanded bits?");
1926 // We found a type with free casts.
1927 SDValue NarrowShl = TLO.DAG.getNode(
1928 ISD::SHL, dl, SmallVT,
1929 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1930 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1931 return TLO.CombineTo(
1932 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1933 }
1934 }
1935 }
1936
1937 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1938 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1939 // Only do this if we demand the upper half so the knownbits are correct.
1940 unsigned HalfWidth = BitWidth / 2;
1941 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1942 DemandedBits.countLeadingOnes() >= HalfWidth) {
1943 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1944 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1945 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1946 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1947 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1948 // If we're demanding the upper bits at all, we must ensure
1949 // that the upper bits of the shift result are known to be zero,
1950 // which is equivalent to the narrow shift being NUW.
1951 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1952 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1953 SDNodeFlags Flags;
1954 Flags.setNoSignedWrap(IsNSW);
1955 Flags.setNoUnsignedWrap(IsNUW);
1956 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1957 SDValue NewShiftAmt =
1958 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1959 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1960 NewShiftAmt, Flags);
1961 SDValue NewExt =
1962 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1963 return TLO.CombineTo(Op, NewExt);
1964 }
1965 }
1966 }
1967 } else {
1968 // This is a variable shift, so we can't shift the demand mask by a known
1969 // amount. But if we are not demanding high bits, then we are not
1970 // demanding those bits from the pre-shifted operand either.
1971 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1972 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1973 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1974 Depth + 1)) {
1975 // Disable the nsw and nuw flags. We can no longer guarantee that we
1976 // won't wrap after simplification.
1977 Op->dropFlags(SDNodeFlags::NoWrap);
1978 return true;
1979 }
1980 Known.resetAll();
1981 }
1982 }
1983
1984 // If we are only demanding sign bits then we can use the shift source
1985 // directly.
1986 if (std::optional<unsigned> MaxSA =
1987 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1988 unsigned ShAmt = *MaxSA;
1989 unsigned NumSignBits =
1990 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1991 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1992 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1993 return TLO.CombineTo(Op, Op0);
1994 }
1995 break;
1996 }
1997 case ISD::SRL: {
1998 SDValue Op0 = Op.getOperand(0);
1999 SDValue Op1 = Op.getOperand(1);
2000 EVT ShiftVT = Op1.getValueType();
2001
2002 if (std::optional<unsigned> KnownSA =
2003 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2004 unsigned ShAmt = *KnownSA;
2005 if (ShAmt == 0)
2006 return TLO.CombineTo(Op, Op0);
2007
2008 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2009 // single shift. We can do this if the top bits (which are shifted out)
2010 // are never demanded.
2011 // TODO - support non-uniform vector amounts.
2012 if (Op0.getOpcode() == ISD::SHL) {
2013 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2014 if (std::optional<unsigned> InnerSA =
2015 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2016 unsigned C1 = *InnerSA;
2017 unsigned Opc = ISD::SRL;
2018 int Diff = ShAmt - C1;
2019 if (Diff < 0) {
2020 Diff = -Diff;
2021 Opc = ISD::SHL;
2022 }
2023 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2024 return TLO.CombineTo(
2025 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2026 }
2027 }
2028 }
2029
2030 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2031 // single sra. We can do this if the top bits are never demanded.
2032 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2033 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2034 if (std::optional<unsigned> InnerSA =
2035 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2036 unsigned C1 = *InnerSA;
2037 // Clamp the combined shift amount if it exceeds the bit width.
2038 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2039 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2040 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2041 Op0.getOperand(0), NewSA));
2042 }
2043 }
2044 }
2045
2046 APInt InDemandedMask = (DemandedBits << ShAmt);
2047
2048 // If the shift is exact, then it does demand the low bits (and knows that
2049 // they are zero).
2050 if (Op->getFlags().hasExact())
2051 InDemandedMask.setLowBits(ShAmt);
2052
2053 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2054 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2055 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2057 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2058 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2059 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2060 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2061 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2062 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2063 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2064 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2065 SDValue NewShiftAmt =
2066 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2067 SDValue NewShift =
2068 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2069 return TLO.CombineTo(
2070 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2071 }
2072 }
2073
2074 // Compute the new bits that are at the top now.
2075 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2076 Depth + 1))
2077 return true;
2078 Known >>= ShAmt;
2079 // High bits known zero.
2080 Known.Zero.setHighBits(ShAmt);
2081
2082 // Attempt to avoid multi-use ops if we don't need anything from them.
2083 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2085 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2086 if (DemandedOp0) {
2087 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2088 return TLO.CombineTo(Op, NewOp);
2089 }
2090 }
2091 } else {
2092 // Use generic knownbits computation as it has support for non-uniform
2093 // shift amounts.
2094 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2095 }
2096
2097 // If we are only demanding sign bits then we can use the shift source
2098 // directly.
2099 if (std::optional<unsigned> MaxSA =
2100 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2101 unsigned ShAmt = *MaxSA;
2102 // Must already be signbits in DemandedBits bounds, and can't demand any
2103 // shifted in zeroes.
2104 if (DemandedBits.countl_zero() >= ShAmt) {
2105 unsigned NumSignBits =
2106 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2107 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2108 return TLO.CombineTo(Op, Op0);
2109 }
2110 }
2111
2112 // Try to match AVG patterns (after shift simplification).
2113 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2114 DemandedElts, Depth + 1))
2115 return TLO.CombineTo(Op, AVG);
2116
2117 break;
2118 }
2119 case ISD::SRA: {
2120 SDValue Op0 = Op.getOperand(0);
2121 SDValue Op1 = Op.getOperand(1);
2122 EVT ShiftVT = Op1.getValueType();
2123
2124 // If we only want bits that already match the signbit then we don't need
2125 // to shift.
2126 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2127 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2128 NumHiDemandedBits)
2129 return TLO.CombineTo(Op, Op0);
2130
2131 // If this is an arithmetic shift right and only the low-bit is set, we can
2132 // always convert this into a logical shr, even if the shift amount is
2133 // variable. The low bit of the shift cannot be an input sign bit unless
2134 // the shift amount is >= the size of the datatype, which is undefined.
2135 if (DemandedBits.isOne())
2136 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2137
2138 if (std::optional<unsigned> KnownSA =
2139 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2140 unsigned ShAmt = *KnownSA;
2141 if (ShAmt == 0)
2142 return TLO.CombineTo(Op, Op0);
2143
2144 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2145 // supports sext_inreg.
2146 if (Op0.getOpcode() == ISD::SHL) {
2147 if (std::optional<unsigned> InnerSA =
2148 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2149 unsigned LowBits = BitWidth - ShAmt;
2150 EVT ExtVT = VT.changeElementType(
2151 *TLO.DAG.getContext(),
2152 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2153
2154 if (*InnerSA == ShAmt) {
2155 if (!TLO.LegalOperations() ||
2157 return TLO.CombineTo(
2158 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2159 Op0.getOperand(0),
2160 TLO.DAG.getValueType(ExtVT)));
2161
2162 // Even if we can't convert to sext_inreg, we might be able to
2163 // remove this shift pair if the input is already sign extended.
2164 unsigned NumSignBits =
2165 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2166 if (NumSignBits > ShAmt)
2167 return TLO.CombineTo(Op, Op0.getOperand(0));
2168 }
2169 }
2170 }
2171
2172 APInt InDemandedMask = (DemandedBits << ShAmt);
2173
2174 // If the shift is exact, then it does demand the low bits (and knows that
2175 // they are zero).
2176 if (Op->getFlags().hasExact())
2177 InDemandedMask.setLowBits(ShAmt);
2178
2179 // If any of the demanded bits are produced by the sign extension, we also
2180 // demand the input sign bit.
2181 if (DemandedBits.countl_zero() < ShAmt)
2182 InDemandedMask.setSignBit();
2183
2184 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2185 Depth + 1))
2186 return true;
2187 Known >>= ShAmt;
2188
2189 // If the input sign bit is known to be zero, or if none of the top bits
2190 // are demanded, turn this into an unsigned shift right.
2191 if (Known.Zero[BitWidth - ShAmt - 1] ||
2192 DemandedBits.countl_zero() >= ShAmt) {
2193 SDNodeFlags Flags;
2194 Flags.setExact(Op->getFlags().hasExact());
2195 return TLO.CombineTo(
2196 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2197 }
2198
2199 int Log2 = DemandedBits.exactLogBase2();
2200 if (Log2 >= 0) {
2201 // The bit must come from the sign.
2202 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2203 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2204 }
2205
2206 if (Known.One[BitWidth - ShAmt - 1])
2207 // New bits are known one.
2208 Known.One.setHighBits(ShAmt);
2209
2210 // Attempt to avoid multi-use ops if we don't need anything from them.
2211 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2213 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2214 if (DemandedOp0) {
2215 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2216 return TLO.CombineTo(Op, NewOp);
2217 }
2218 }
2219 }
2220
2221 // Try to match AVG patterns (after shift simplification).
2222 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2223 DemandedElts, Depth + 1))
2224 return TLO.CombineTo(Op, AVG);
2225
2226 break;
2227 }
2228 case ISD::FSHL:
2229 case ISD::FSHR: {
2230 SDValue Op0 = Op.getOperand(0);
2231 SDValue Op1 = Op.getOperand(1);
2232 SDValue Op2 = Op.getOperand(2);
2233 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2234
2235 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2236 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2237
2238 // For fshl, 0-shift returns the 1st arg.
2239 // For fshr, 0-shift returns the 2nd arg.
2240 if (Amt == 0) {
2241 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2242 Known, TLO, Depth + 1))
2243 return true;
2244 break;
2245 }
2246
2247 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2248 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2249 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2250 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2251 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2252 Depth + 1))
2253 return true;
2254 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2255 Depth + 1))
2256 return true;
2257
2258 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2259 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2260 Known = Known.unionWith(Known2);
2261
2262 // Attempt to avoid multi-use ops if we don't need anything from them.
2263 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2264 !DemandedElts.isAllOnes()) {
2266 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2268 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2269 if (DemandedOp0 || DemandedOp1) {
2270 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2271 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2272 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2273 DemandedOp1, Op2);
2274 return TLO.CombineTo(Op, NewOp);
2275 }
2276 }
2277 }
2278
2279 if (isPowerOf2_32(BitWidth)) {
2280 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2281 // iff we're guaranteed not to use Op0.
2282 // TODO: Add FSHL equivalent?
2283 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2284 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2285 KnownBits KnownAmt =
2286 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2287 unsigned MaxShiftAmt =
2288 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2289 // Check we don't demand any shifted bits outside Op1.
2290 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2291 EVT AmtVT = Op2.getValueType();
2292 SDValue NewAmt =
2293 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2294 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2295 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2296 return TLO.CombineTo(Op, NewOp);
2297 }
2298 }
2299
2300 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2301 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2302 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2303 Depth + 1))
2304 return true;
2305 }
2306 break;
2307 }
2308 case ISD::ROTL:
2309 case ISD::ROTR: {
2310 SDValue Op0 = Op.getOperand(0);
2311 SDValue Op1 = Op.getOperand(1);
2312 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2313
2314 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2315 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2316 return TLO.CombineTo(Op, Op0);
2317
2318 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2319 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2320 unsigned RevAmt = BitWidth - Amt;
2321
2322 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2323 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2324 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2325 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2326 Depth + 1))
2327 return true;
2328
2329 // rot*(x, 0) --> x
2330 if (Amt == 0)
2331 return TLO.CombineTo(Op, Op0);
2332
2333 // See if we don't demand either half of the rotated bits.
2334 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2335 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2336 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2337 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2338 }
2339 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2340 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2341 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2343 }
2344 }
2345
2346 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2347 if (isPowerOf2_32(BitWidth)) {
2348 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2349 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2350 Depth + 1))
2351 return true;
2352 }
2353 break;
2354 }
2355 case ISD::SMIN:
2356 case ISD::SMAX:
2357 case ISD::UMIN:
2358 case ISD::UMAX: {
2359 unsigned Opc = Op.getOpcode();
2360 SDValue Op0 = Op.getOperand(0);
2361 SDValue Op1 = Op.getOperand(1);
2362
2363 // If we're only demanding signbits, then we can simplify to OR/AND node.
2364 unsigned BitOp =
2365 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2366 unsigned NumSignBits =
2367 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2368 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2369 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2370 if (NumSignBits >= NumDemandedUpperBits)
2371 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2372
2373 // Check if one arg is always less/greater than (or equal) to the other arg.
2374 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2375 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2376 switch (Opc) {
2377 case ISD::SMIN:
2378 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2379 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2380 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2382 Known = KnownBits::smin(Known0, Known1);
2383 break;
2384 case ISD::SMAX:
2385 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2386 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2387 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2389 Known = KnownBits::smax(Known0, Known1);
2390 break;
2391 case ISD::UMIN:
2392 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2393 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2394 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2395 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2396 Known = KnownBits::umin(Known0, Known1);
2397 break;
2398 case ISD::UMAX:
2399 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2400 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2401 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2402 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2403 Known = KnownBits::umax(Known0, Known1);
2404 break;
2405 }
2406 break;
2407 }
2408 case ISD::BITREVERSE: {
2409 SDValue Src = Op.getOperand(0);
2410 APInt DemandedSrcBits = DemandedBits.reverseBits();
2411 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2412 Depth + 1))
2413 return true;
2414 Known = Known2.reverseBits();
2415 break;
2416 }
2417 case ISD::BSWAP: {
2418 SDValue Src = Op.getOperand(0);
2419
2420 // If the only bits demanded come from one byte of the bswap result,
2421 // just shift the input byte into position to eliminate the bswap.
2422 unsigned NLZ = DemandedBits.countl_zero();
2423 unsigned NTZ = DemandedBits.countr_zero();
2424
2425 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2426 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2427 // have 14 leading zeros, round to 8.
2428 NLZ = alignDown(NLZ, 8);
2429 NTZ = alignDown(NTZ, 8);
2430 // If we need exactly one byte, we can do this transformation.
2431 if (BitWidth - NLZ - NTZ == 8) {
2432 // Replace this with either a left or right shift to get the byte into
2433 // the right place.
2434 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2435 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2436 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2437 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2438 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2439 return TLO.CombineTo(Op, NewOp);
2440 }
2441 }
2442
2443 APInt DemandedSrcBits = DemandedBits.byteSwap();
2444 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2445 Depth + 1))
2446 return true;
2447 Known = Known2.byteSwap();
2448 break;
2449 }
2450 case ISD::CTPOP: {
2451 // If only 1 bit is demanded, replace with PARITY as long as we're before
2452 // op legalization.
2453 // FIXME: Limit to scalars for now.
2454 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2455 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2456 Op.getOperand(0)));
2457
2458 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2459 break;
2460 }
2462 SDValue Op0 = Op.getOperand(0);
2463 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2464 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2465
2466 // If we only care about the highest bit, don't bother shifting right.
2467 if (DemandedBits.isSignMask()) {
2468 unsigned MinSignedBits =
2469 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2470 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2471 // However if the input is already sign extended we expect the sign
2472 // extension to be dropped altogether later and do not simplify.
2473 if (!AlreadySignExtended) {
2474 // Compute the correct shift amount type, which must be getShiftAmountTy
2475 // for scalar types after legalization.
2476 SDValue ShiftAmt =
2477 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2478 return TLO.CombineTo(Op,
2479 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2480 }
2481 }
2482
2483 // If none of the extended bits are demanded, eliminate the sextinreg.
2484 if (DemandedBits.getActiveBits() <= ExVTBits)
2485 return TLO.CombineTo(Op, Op0);
2486
2487 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2488
2489 // Since the sign extended bits are demanded, we know that the sign
2490 // bit is demanded.
2491 InputDemandedBits.setBit(ExVTBits - 1);
2492
2493 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2494 Depth + 1))
2495 return true;
2496
2497 // If the sign bit of the input is known set or clear, then we know the
2498 // top bits of the result.
2499
2500 // If the input sign bit is known zero, convert this into a zero extension.
2501 if (Known.Zero[ExVTBits - 1])
2502 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2503
2504 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2505 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2506 Known.One.setBitsFrom(ExVTBits);
2507 Known.Zero &= Mask;
2508 } else { // Input sign bit unknown
2509 Known.Zero &= Mask;
2510 Known.One &= Mask;
2511 }
2512 break;
2513 }
2514 case ISD::BUILD_PAIR: {
2515 EVT HalfVT = Op.getOperand(0).getValueType();
2516 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2517
2518 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2519 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2520
2521 KnownBits KnownLo, KnownHi;
2522
2523 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2524 return true;
2525
2526 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2527 return true;
2528
2529 Known = KnownHi.concat(KnownLo);
2530 break;
2531 }
2533 if (VT.isScalableVector())
2534 return false;
2535 [[fallthrough]];
2536 case ISD::ZERO_EXTEND: {
2537 SDValue Src = Op.getOperand(0);
2538 EVT SrcVT = Src.getValueType();
2539 unsigned InBits = SrcVT.getScalarSizeInBits();
2540 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2541 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2542
2543 // If none of the top bits are demanded, convert this into an any_extend.
2544 if (DemandedBits.getActiveBits() <= InBits) {
2545 // If we only need the non-extended bits of the bottom element
2546 // then we can just bitcast to the result.
2547 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2548 VT.getSizeInBits() == SrcVT.getSizeInBits())
2549 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2550
2551 unsigned Opc =
2553 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2554 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2555 }
2556
2557 APInt InDemandedBits = DemandedBits.trunc(InBits);
2558 APInt InDemandedElts = DemandedElts.zext(InElts);
2559 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2560 Depth + 1)) {
2561 Op->dropFlags(SDNodeFlags::NonNeg);
2562 return true;
2563 }
2564 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2565 Known = Known.zext(BitWidth);
2566
2567 // Attempt to avoid multi-use ops if we don't need anything from them.
2569 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2570 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2571 break;
2572 }
2574 if (VT.isScalableVector())
2575 return false;
2576 [[fallthrough]];
2577 case ISD::SIGN_EXTEND: {
2578 SDValue Src = Op.getOperand(0);
2579 EVT SrcVT = Src.getValueType();
2580 unsigned InBits = SrcVT.getScalarSizeInBits();
2581 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2582 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2583
2584 APInt InDemandedElts = DemandedElts.zext(InElts);
2585 APInt InDemandedBits = DemandedBits.trunc(InBits);
2586
2587 // Since some of the sign extended bits are demanded, we know that the sign
2588 // bit is demanded.
2589 InDemandedBits.setBit(InBits - 1);
2590
2591 // If none of the top bits are demanded, convert this into an any_extend.
2592 if (DemandedBits.getActiveBits() <= InBits) {
2593 // If we only need the non-extended bits of the bottom element
2594 // then we can just bitcast to the result.
2595 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2596 VT.getSizeInBits() == SrcVT.getSizeInBits())
2597 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2598
2599 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2601 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2602 InBits) {
2603 unsigned Opc =
2605 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2606 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2607 }
2608 }
2609
2610 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2611 Depth + 1))
2612 return true;
2613 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2614
2615 // If the sign bit is known one, the top bits match.
2616 Known = Known.sext(BitWidth);
2617
2618 // If the sign bit is known zero, convert this to a zero extend.
2619 if (Known.isNonNegative()) {
2620 unsigned Opc =
2622 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2623 SDNodeFlags Flags;
2624 if (!IsVecInReg)
2625 Flags |= SDNodeFlags::NonNeg;
2626 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2627 }
2628 }
2629
2630 // Attempt to avoid multi-use ops if we don't need anything from them.
2632 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2633 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2634 break;
2635 }
2637 if (VT.isScalableVector())
2638 return false;
2639 [[fallthrough]];
2640 case ISD::ANY_EXTEND: {
2641 SDValue Src = Op.getOperand(0);
2642 EVT SrcVT = Src.getValueType();
2643 unsigned InBits = SrcVT.getScalarSizeInBits();
2644 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2645 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2646
2647 // If we only need the bottom element then we can just bitcast.
2648 // TODO: Handle ANY_EXTEND?
2649 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2650 VT.getSizeInBits() == SrcVT.getSizeInBits())
2651 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2652
2653 APInt InDemandedBits = DemandedBits.trunc(InBits);
2654 APInt InDemandedElts = DemandedElts.zext(InElts);
2655 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2656 Depth + 1))
2657 return true;
2658 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2659 Known = Known.anyext(BitWidth);
2660
2661 // Attempt to avoid multi-use ops if we don't need anything from them.
2663 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2664 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2665 break;
2666 }
2667 case ISD::TRUNCATE: {
2668 SDValue Src = Op.getOperand(0);
2669
2670 // Simplify the input, using demanded bit information, and compute the known
2671 // zero/one bits live out.
2672 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2673 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2674 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2675 Depth + 1)) {
2676 // Disable the nsw and nuw flags. We can no longer guarantee that we
2677 // won't wrap after simplification.
2678 Op->dropFlags(SDNodeFlags::NoWrap);
2679 return true;
2680 }
2681 Known = Known.trunc(BitWidth);
2682
2683 // Attempt to avoid multi-use ops if we don't need anything from them.
2685 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2686 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2687
2688 // If the input is only used by this truncate, see if we can shrink it based
2689 // on the known demanded bits.
2690 switch (Src.getOpcode()) {
2691 default:
2692 break;
2693 case ISD::SRL:
2694 // Shrink SRL by a constant if none of the high bits shifted in are
2695 // demanded.
2696 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2697 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2698 // undesirable.
2699 break;
2700
2701 if (Src.getNode()->hasOneUse()) {
2702 if (isTruncateFree(Src, VT) &&
2703 !isTruncateFree(Src.getValueType(), VT)) {
2704 // If truncate is only free at trunc(srl), do not turn it into
2705 // srl(trunc). The check is done by first check the truncate is free
2706 // at Src's opcode(srl), then check the truncate is not done by
2707 // referencing sub-register. In test, if both trunc(srl) and
2708 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2709 // trunc(srl)'s trunc is free, trunc(srl) is better.
2710 break;
2711 }
2712
2713 std::optional<unsigned> ShAmtC =
2714 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2715 if (!ShAmtC || *ShAmtC >= BitWidth)
2716 break;
2717 unsigned ShVal = *ShAmtC;
2718
2719 APInt HighBits =
2720 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2721 HighBits.lshrInPlace(ShVal);
2722 HighBits = HighBits.trunc(BitWidth);
2723 if (!(HighBits & DemandedBits)) {
2724 // None of the shifted in bits are needed. Add a truncate of the
2725 // shift input, then shift it.
2726 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2727 SDValue NewTrunc =
2728 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2729 return TLO.CombineTo(
2730 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2731 }
2732 }
2733 break;
2734 }
2735
2736 break;
2737 }
2738 case ISD::AssertZext: {
2739 // AssertZext demands all of the high bits, plus any of the low bits
2740 // demanded by its users.
2741 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2743 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2744 TLO, Depth + 1))
2745 return true;
2746
2747 Known.Zero |= ~InMask;
2748 Known.One &= (~Known.Zero);
2749 break;
2750 }
2752 SDValue Src = Op.getOperand(0);
2753 SDValue Idx = Op.getOperand(1);
2754 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2755 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2756
2757 if (SrcEltCnt.isScalable())
2758 return false;
2759
2760 // Demand the bits from every vector element without a constant index.
2761 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2762 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2763 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2764 if (CIdx->getAPIntValue().ult(NumSrcElts))
2765 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2766
2767 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2768 // anything about the extended bits.
2769 APInt DemandedSrcBits = DemandedBits;
2770 if (BitWidth > EltBitWidth)
2771 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2772
2773 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2774 Depth + 1))
2775 return true;
2776
2777 // Attempt to avoid multi-use ops if we don't need anything from them.
2778 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2779 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2780 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2781 SDValue NewOp =
2782 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2783 return TLO.CombineTo(Op, NewOp);
2784 }
2785 }
2786
2787 Known = Known2;
2788 if (BitWidth > EltBitWidth)
2789 Known = Known.anyext(BitWidth);
2790 break;
2791 }
2792 case ISD::BITCAST: {
2793 if (VT.isScalableVector())
2794 return false;
2795 SDValue Src = Op.getOperand(0);
2796 EVT SrcVT = Src.getValueType();
2797 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2798
2799 // If this is an FP->Int bitcast and if the sign bit is the only
2800 // thing demanded, turn this into a FGETSIGN.
2801 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2802 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2803 SrcVT.isFloatingPoint()) {
2805 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2806 // place. We expect the SHL to be eliminated by other optimizations.
2807 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2808 unsigned ShVal = Op.getValueSizeInBits() - 1;
2809 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2810 return TLO.CombineTo(Op,
2811 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2812 }
2813 }
2814
2815 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2816 // Demand the elt/bit if any of the original elts/bits are demanded.
2817 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2818 unsigned Scale = BitWidth / NumSrcEltBits;
2819 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2820 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2821 for (unsigned i = 0; i != Scale; ++i) {
2822 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2823 unsigned BitOffset = EltOffset * NumSrcEltBits;
2824 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2825 }
2826 // Recursive calls below may turn not demanded elements into poison, so we
2827 // need to demand all smaller source elements that maps to a demanded
2828 // destination element.
2829 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2830
2831 APInt KnownSrcUndef, KnownSrcZero;
2832 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2833 KnownSrcZero, TLO, Depth + 1))
2834 return true;
2835
2836 KnownBits KnownSrcBits;
2837 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2838 KnownSrcBits, TLO, Depth + 1))
2839 return true;
2840 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2841 // TODO - bigendian once we have test coverage.
2842 unsigned Scale = NumSrcEltBits / BitWidth;
2843 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2844 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2845 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2846 for (unsigned i = 0; i != NumElts; ++i)
2847 if (DemandedElts[i]) {
2848 unsigned Offset = (i % Scale) * BitWidth;
2849 DemandedSrcBits.insertBits(DemandedBits, Offset);
2850 DemandedSrcElts.setBit(i / Scale);
2851 }
2852
2853 if (SrcVT.isVector()) {
2854 APInt KnownSrcUndef, KnownSrcZero;
2855 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2856 KnownSrcZero, TLO, Depth + 1))
2857 return true;
2858 }
2859
2860 KnownBits KnownSrcBits;
2861 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2862 KnownSrcBits, TLO, Depth + 1))
2863 return true;
2864
2865 // Attempt to avoid multi-use ops if we don't need anything from them.
2866 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2867 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2868 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2869 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2870 return TLO.CombineTo(Op, NewOp);
2871 }
2872 }
2873 }
2874
2875 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2876 // recursive call where Known may be useful to the caller.
2877 if (Depth > 0) {
2878 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2879 return false;
2880 }
2881 break;
2882 }
2883 case ISD::MUL:
2884 if (DemandedBits.isPowerOf2()) {
2885 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2886 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2887 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2888 unsigned CTZ = DemandedBits.countr_zero();
2889 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2890 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2891 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2892 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2893 return TLO.CombineTo(Op, Shl);
2894 }
2895 }
2896 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2897 // X * X is odd iff X is odd.
2898 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2899 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2900 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2901 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2902 return TLO.CombineTo(Op, And1);
2903 }
2904 [[fallthrough]];
2905 case ISD::PTRADD:
2906 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2907 break;
2908 // PTRADD behaves like ADD if pointers are represented as integers.
2909 [[fallthrough]];
2910 case ISD::ADD:
2911 case ISD::SUB: {
2912 // Add, Sub, and Mul don't demand any bits in positions beyond that
2913 // of the highest bit demanded of them.
2914 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2915 SDNodeFlags Flags = Op.getNode()->getFlags();
2916 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2917 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2918 KnownBits KnownOp0, KnownOp1;
2919 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2920 const KnownBits &KnownRHS) {
2921 if (Op.getOpcode() == ISD::MUL)
2922 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2923 return Demanded;
2924 };
2925 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2926 Depth + 1) ||
2927 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2928 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2929 // See if the operation should be performed at a smaller bit width.
2931 // Disable the nsw and nuw flags. We can no longer guarantee that we
2932 // won't wrap after simplification.
2933 Op->dropFlags(SDNodeFlags::NoWrap);
2934 return true;
2935 }
2936
2937 // neg x with only low bit demanded is simply x.
2938 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2939 isNullConstant(Op0))
2940 return TLO.CombineTo(Op, Op1);
2941
2942 // Attempt to avoid multi-use ops if we don't need anything from them.
2943 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2945 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2947 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2948 if (DemandedOp0 || DemandedOp1) {
2949 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2950 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2951 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2952 Flags & ~SDNodeFlags::NoWrap);
2953 return TLO.CombineTo(Op, NewOp);
2954 }
2955 }
2956
2957 // If we have a constant operand, we may be able to turn it into -1 if we
2958 // do not demand the high bits. This can make the constant smaller to
2959 // encode, allow more general folding, or match specialized instruction
2960 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2961 // is probably not useful (and could be detrimental).
2963 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2964 if (C && !C->isAllOnes() && !C->isOne() &&
2965 (C->getAPIntValue() | HighMask).isAllOnes()) {
2966 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2967 // Disable the nsw and nuw flags. We can no longer guarantee that we
2968 // won't wrap after simplification.
2969 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2970 Flags & ~SDNodeFlags::NoWrap);
2971 return TLO.CombineTo(Op, NewOp);
2972 }
2973
2974 // Match a multiply with a disguised negated-power-of-2 and convert to a
2975 // an equivalent shift-left amount.
2976 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2977 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2978 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2979 return 0;
2980
2981 // Don't touch opaque constants. Also, ignore zero and power-of-2
2982 // multiplies. Those will get folded later.
2983 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2984 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2985 !MulC->getAPIntValue().isPowerOf2()) {
2986 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2987 if (UnmaskedC.isNegatedPowerOf2())
2988 return (-UnmaskedC).logBase2();
2989 }
2990 return 0;
2991 };
2992
2993 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2994 unsigned ShlAmt) {
2995 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2996 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2997 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2998 return TLO.CombineTo(Op, Res);
2999 };
3000
3002 if (Op.getOpcode() == ISD::ADD) {
3003 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3004 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3005 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
3006 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3007 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3008 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3009 }
3010 if (Op.getOpcode() == ISD::SUB) {
3011 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3012 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3013 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3014 }
3015 }
3016
3017 if (Op.getOpcode() == ISD::MUL) {
3018 Known = KnownBits::mul(KnownOp0, KnownOp1);
3019 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3021 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3022 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3023 }
3024 break;
3025 }
3026 case ISD::FABS: {
3027 SDValue Op0 = Op.getOperand(0);
3028 APInt SignMask = APInt::getSignMask(BitWidth);
3029
3030 if (!DemandedBits.intersects(SignMask))
3031 return TLO.CombineTo(Op, Op0);
3032
3033 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3034 Depth + 1))
3035 return true;
3036
3037 if (Known.isNonNegative())
3038 return TLO.CombineTo(Op, Op0);
3039 if (Known.isNegative())
3040 return TLO.CombineTo(
3041 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3042
3043 Known.Zero |= SignMask;
3044 Known.One &= ~SignMask;
3045
3046 break;
3047 }
3048 case ISD::FCOPYSIGN: {
3049 SDValue Op0 = Op.getOperand(0);
3050 SDValue Op1 = Op.getOperand(1);
3051
3052 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3053 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3054 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3055 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3056
3057 if (!DemandedBits.intersects(SignMask0))
3058 return TLO.CombineTo(Op, Op0);
3059
3060 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3061 Known, TLO, Depth + 1) ||
3062 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3063 Depth + 1))
3064 return true;
3065
3066 if (Known2.isNonNegative())
3067 return TLO.CombineTo(
3068 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3069
3070 if (Known2.isNegative())
3071 return TLO.CombineTo(
3072 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3073 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3074
3075 Known.Zero &= ~SignMask0;
3076 Known.One &= ~SignMask0;
3077 break;
3078 }
3079 case ISD::FNEG: {
3080 SDValue Op0 = Op.getOperand(0);
3081 APInt SignMask = APInt::getSignMask(BitWidth);
3082
3083 if (!DemandedBits.intersects(SignMask))
3084 return TLO.CombineTo(Op, Op0);
3085
3086 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3087 Depth + 1))
3088 return true;
3089
3090 if (!Known.isSignUnknown()) {
3091 Known.Zero ^= SignMask;
3092 Known.One ^= SignMask;
3093 }
3094
3095 break;
3096 }
3097 default:
3098 // We also ask the target about intrinsics (which could be specific to it).
3099 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3100 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3101 // TODO: Probably okay to remove after audit; here to reduce change size
3102 // in initial enablement patch for scalable vectors
3103 if (Op.getValueType().isScalableVector())
3104 break;
3106 Known, TLO, Depth))
3107 return true;
3108 break;
3109 }
3110
3111 // Just use computeKnownBits to compute output bits.
3112 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3113 break;
3114 }
3115
3116 // If we know the value of all of the demanded bits, return this as a
3117 // constant.
3119 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3120 // Avoid folding to a constant if any OpaqueConstant is involved.
3121 if (llvm::any_of(Op->ops(), [](SDValue V) {
3122 auto *C = dyn_cast<ConstantSDNode>(V);
3123 return C && C->isOpaque();
3124 }))
3125 return false;
3126 if (VT.isInteger())
3127 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3128 if (VT.isFloatingPoint())
3129 return TLO.CombineTo(
3130 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3131 dl, VT));
3132 }
3133
3134 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3135 // Try again just for the original demanded elts.
3136 // Ensure we do this AFTER constant folding above.
3137 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3138 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3139
3140 return false;
3141}
3142
3144 const APInt &DemandedElts,
3145 DAGCombinerInfo &DCI) const {
3146 SelectionDAG &DAG = DCI.DAG;
3147 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3148 !DCI.isBeforeLegalizeOps());
3149
3150 APInt KnownUndef, KnownZero;
3151 bool Simplified =
3152 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3153 if (Simplified) {
3154 DCI.AddToWorklist(Op.getNode());
3155 DCI.CommitTargetLoweringOpt(TLO);
3156 }
3157
3158 return Simplified;
3159}
3160
3161/// Given a vector binary operation and known undefined elements for each input
3162/// operand, compute whether each element of the output is undefined.
3164 const APInt &UndefOp0,
3165 const APInt &UndefOp1) {
3166 EVT VT = BO.getValueType();
3168 "Vector binop only");
3169
3170 EVT EltVT = VT.getVectorElementType();
3171 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3172 assert(UndefOp0.getBitWidth() == NumElts &&
3173 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3174
3175 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3176 const APInt &UndefVals) {
3177 if (UndefVals[Index])
3178 return DAG.getUNDEF(EltVT);
3179
3180 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3181 // Try hard to make sure that the getNode() call is not creating temporary
3182 // nodes. Ignore opaque integers because they do not constant fold.
3183 SDValue Elt = BV->getOperand(Index);
3184 auto *C = dyn_cast<ConstantSDNode>(Elt);
3185 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3186 return Elt;
3187 }
3188
3189 return SDValue();
3190 };
3191
3192 APInt KnownUndef = APInt::getZero(NumElts);
3193 for (unsigned i = 0; i != NumElts; ++i) {
3194 // If both inputs for this element are either constant or undef and match
3195 // the element type, compute the constant/undef result for this element of
3196 // the vector.
3197 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3198 // not handle FP constants. The code within getNode() should be refactored
3199 // to avoid the danger of creating a bogus temporary node here.
3200 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3201 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3202 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3203 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3204 KnownUndef.setBit(i);
3205 }
3206 return KnownUndef;
3207}
3208
3210 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3211 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3212 bool AssumeSingleUse) const {
3213 EVT VT = Op.getValueType();
3214 unsigned Opcode = Op.getOpcode();
3215 APInt DemandedElts = OriginalDemandedElts;
3216 unsigned NumElts = DemandedElts.getBitWidth();
3217 assert(VT.isVector() && "Expected vector op");
3218
3219 KnownUndef = KnownZero = APInt::getZero(NumElts);
3220
3222 return false;
3223
3224 // TODO: For now we assume we know nothing about scalable vectors.
3225 if (VT.isScalableVector())
3226 return false;
3227
3228 assert(VT.getVectorNumElements() == NumElts &&
3229 "Mask size mismatches value type element count!");
3230
3231 // Undef operand.
3232 if (Op.isUndef()) {
3233 KnownUndef.setAllBits();
3234 return false;
3235 }
3236
3237 // If Op has other users, assume that all elements are needed.
3238 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3239 DemandedElts.setAllBits();
3240
3241 // Not demanding any elements from Op.
3242 if (DemandedElts == 0) {
3243 KnownUndef.setAllBits();
3244 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3245 }
3246
3247 // Limit search depth.
3249 return false;
3250
3251 SDLoc DL(Op);
3252 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3253 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3254
3255 // Helper for demanding the specified elements and all the bits of both binary
3256 // operands.
3257 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3258 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3259 TLO.DAG, Depth + 1);
3260 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3261 TLO.DAG, Depth + 1);
3262 if (NewOp0 || NewOp1) {
3263 SDValue NewOp =
3264 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3265 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3266 return TLO.CombineTo(Op, NewOp);
3267 }
3268 return false;
3269 };
3270
3271 switch (Opcode) {
3272 case ISD::SCALAR_TO_VECTOR: {
3273 if (!DemandedElts[0]) {
3274 KnownUndef.setAllBits();
3275 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3276 }
3277 KnownUndef.setHighBits(NumElts - 1);
3278 break;
3279 }
3280 case ISD::BITCAST: {
3281 SDValue Src = Op.getOperand(0);
3282 EVT SrcVT = Src.getValueType();
3283
3284 if (!SrcVT.isVector()) {
3285 // TODO - bigendian once we have test coverage.
3286 if (IsLE) {
3287 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3288 unsigned EltSize = VT.getScalarSizeInBits();
3289 for (unsigned I = 0; I != NumElts; ++I) {
3290 if (DemandedElts[I]) {
3291 unsigned Offset = I * EltSize;
3292 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3293 }
3294 }
3295 KnownBits Known;
3296 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3297 return true;
3298 }
3299 break;
3300 }
3301
3302 // Fast handling of 'identity' bitcasts.
3303 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3304 if (NumSrcElts == NumElts)
3305 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3306 KnownZero, TLO, Depth + 1);
3307
3308 APInt SrcDemandedElts, SrcZero, SrcUndef;
3309
3310 // Bitcast from 'large element' src vector to 'small element' vector, we
3311 // must demand a source element if any DemandedElt maps to it.
3312 if ((NumElts % NumSrcElts) == 0) {
3313 unsigned Scale = NumElts / NumSrcElts;
3314 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3315 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3316 TLO, Depth + 1))
3317 return true;
3318
3319 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3320 // of the large element.
3321 // TODO - bigendian once we have test coverage.
3322 if (IsLE) {
3323 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3324 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3325 for (unsigned i = 0; i != NumElts; ++i)
3326 if (DemandedElts[i]) {
3327 unsigned Ofs = (i % Scale) * EltSizeInBits;
3328 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3329 }
3330
3331 KnownBits Known;
3332 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3333 TLO, Depth + 1))
3334 return true;
3335
3336 // The bitcast has split each wide element into a number of
3337 // narrow subelements. We have just computed the Known bits
3338 // for wide elements. See if element splitting results in
3339 // some subelements being zero. Only for demanded elements!
3340 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3341 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3342 .isAllOnes())
3343 continue;
3344 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3345 unsigned Elt = Scale * SrcElt + SubElt;
3346 if (DemandedElts[Elt])
3347 KnownZero.setBit(Elt);
3348 }
3349 }
3350 }
3351
3352 // If the src element is zero/undef then all the output elements will be -
3353 // only demanded elements are guaranteed to be correct.
3354 for (unsigned i = 0; i != NumSrcElts; ++i) {
3355 if (SrcDemandedElts[i]) {
3356 if (SrcZero[i])
3357 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3358 if (SrcUndef[i])
3359 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3360 }
3361 }
3362 }
3363
3364 // Bitcast from 'small element' src vector to 'large element' vector, we
3365 // demand all smaller source elements covered by the larger demanded element
3366 // of this vector.
3367 if ((NumSrcElts % NumElts) == 0) {
3368 unsigned Scale = NumSrcElts / NumElts;
3369 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3370 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3371 TLO, Depth + 1))
3372 return true;
3373
3374 // If all the src elements covering an output element are zero/undef, then
3375 // the output element will be as well, assuming it was demanded.
3376 for (unsigned i = 0; i != NumElts; ++i) {
3377 if (DemandedElts[i]) {
3378 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3379 KnownZero.setBit(i);
3380 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3381 KnownUndef.setBit(i);
3382 }
3383 }
3384 }
3385 break;
3386 }
3387 case ISD::FREEZE: {
3388 SDValue N0 = Op.getOperand(0);
3390 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3391 return TLO.CombineTo(Op, N0);
3392
3393 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3394 // freeze(op(x, ...)) -> op(freeze(x), ...).
3395 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3396 return TLO.CombineTo(
3398 TLO.DAG.getFreeze(N0.getOperand(0))));
3399 break;
3400 }
3401 case ISD::BUILD_VECTOR: {
3402 // Check all elements and simplify any unused elements with UNDEF.
3403 if (!DemandedElts.isAllOnes()) {
3404 // Don't simplify BROADCASTS.
3405 if (llvm::any_of(Op->op_values(),
3406 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3408 bool Updated = false;
3409 for (unsigned i = 0; i != NumElts; ++i) {
3410 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3411 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3412 KnownUndef.setBit(i);
3413 Updated = true;
3414 }
3415 }
3416 if (Updated)
3417 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3418 }
3419 }
3420 for (unsigned i = 0; i != NumElts; ++i) {
3421 SDValue SrcOp = Op.getOperand(i);
3422 if (SrcOp.isUndef()) {
3423 KnownUndef.setBit(i);
3424 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3426 KnownZero.setBit(i);
3427 }
3428 }
3429 break;
3430 }
3431 case ISD::CONCAT_VECTORS: {
3432 EVT SubVT = Op.getOperand(0).getValueType();
3433 unsigned NumSubVecs = Op.getNumOperands();
3434 unsigned NumSubElts = SubVT.getVectorNumElements();
3435 for (unsigned i = 0; i != NumSubVecs; ++i) {
3436 SDValue SubOp = Op.getOperand(i);
3437 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3438 APInt SubUndef, SubZero;
3439 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3440 Depth + 1))
3441 return true;
3442 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3443 KnownZero.insertBits(SubZero, i * NumSubElts);
3444 }
3445
3446 // Attempt to avoid multi-use ops if we don't need anything from them.
3447 if (!DemandedElts.isAllOnes()) {
3448 bool FoundNewSub = false;
3449 SmallVector<SDValue, 2> DemandedSubOps;
3450 for (unsigned i = 0; i != NumSubVecs; ++i) {
3451 SDValue SubOp = Op.getOperand(i);
3452 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3454 SubOp, SubElts, TLO.DAG, Depth + 1);
3455 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3456 FoundNewSub = NewSubOp ? true : FoundNewSub;
3457 }
3458 if (FoundNewSub) {
3459 SDValue NewOp =
3460 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3461 return TLO.CombineTo(Op, NewOp);
3462 }
3463 }
3464 break;
3465 }
3466 case ISD::INSERT_SUBVECTOR: {
3467 // Demand any elements from the subvector and the remainder from the src it
3468 // is inserted into.
3469 SDValue Src = Op.getOperand(0);
3470 SDValue Sub = Op.getOperand(1);
3471 uint64_t Idx = Op.getConstantOperandVal(2);
3472 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3473 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3474 APInt DemandedSrcElts = DemandedElts;
3475 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3476
3477 // If none of the sub operand elements are demanded, bypass the insert.
3478 if (!DemandedSubElts)
3479 return TLO.CombineTo(Op, Src);
3480
3481 APInt SubUndef, SubZero;
3482 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3483 Depth + 1))
3484 return true;
3485
3486 // If none of the src operand elements are demanded, replace it with undef.
3487 if (!DemandedSrcElts && !Src.isUndef())
3488 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3489 TLO.DAG.getUNDEF(VT), Sub,
3490 Op.getOperand(2)));
3491
3492 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3493 TLO, Depth + 1))
3494 return true;
3495 KnownUndef.insertBits(SubUndef, Idx);
3496 KnownZero.insertBits(SubZero, Idx);
3497
3498 // Attempt to avoid multi-use ops if we don't need anything from them.
3499 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3501 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3503 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3504 if (NewSrc || NewSub) {
3505 NewSrc = NewSrc ? NewSrc : Src;
3506 NewSub = NewSub ? NewSub : Sub;
3507 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3508 NewSub, Op.getOperand(2));
3509 return TLO.CombineTo(Op, NewOp);
3510 }
3511 }
3512 break;
3513 }
3515 // Offset the demanded elts by the subvector index.
3516 SDValue Src = Op.getOperand(0);
3517 if (Src.getValueType().isScalableVector())
3518 break;
3519 uint64_t Idx = Op.getConstantOperandVal(1);
3520 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3521 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3522
3523 APInt SrcUndef, SrcZero;
3524 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3525 Depth + 1))
3526 return true;
3527 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3528 KnownZero = SrcZero.extractBits(NumElts, Idx);
3529
3530 // Attempt to avoid multi-use ops if we don't need anything from them.
3531 if (!DemandedElts.isAllOnes()) {
3533 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3534 if (NewSrc) {
3535 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3536 Op.getOperand(1));
3537 return TLO.CombineTo(Op, NewOp);
3538 }
3539 }
3540 break;
3541 }
3543 SDValue Vec = Op.getOperand(0);
3544 SDValue Scl = Op.getOperand(1);
3545 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3546
3547 // For a legal, constant insertion index, if we don't need this insertion
3548 // then strip it, else remove it from the demanded elts.
3549 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3550 unsigned Idx = CIdx->getZExtValue();
3551 if (!DemandedElts[Idx])
3552 return TLO.CombineTo(Op, Vec);
3553
3554 APInt DemandedVecElts(DemandedElts);
3555 DemandedVecElts.clearBit(Idx);
3556 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3557 KnownZero, TLO, Depth + 1))
3558 return true;
3559
3560 KnownUndef.setBitVal(Idx, Scl.isUndef());
3561
3562 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3563 break;
3564 }
3565
3566 APInt VecUndef, VecZero;
3567 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3568 Depth + 1))
3569 return true;
3570 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3571 break;
3572 }
3573 case ISD::VSELECT: {
3574 SDValue Sel = Op.getOperand(0);
3575 SDValue LHS = Op.getOperand(1);
3576 SDValue RHS = Op.getOperand(2);
3577
3578 // Try to transform the select condition based on the current demanded
3579 // elements.
3580 APInt UndefSel, ZeroSel;
3581 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3582 Depth + 1))
3583 return true;
3584
3585 // See if we can simplify either vselect operand.
3586 APInt DemandedLHS(DemandedElts);
3587 APInt DemandedRHS(DemandedElts);
3588 APInt UndefLHS, ZeroLHS;
3589 APInt UndefRHS, ZeroRHS;
3590 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3591 Depth + 1))
3592 return true;
3593 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3594 Depth + 1))
3595 return true;
3596
3597 KnownUndef = UndefLHS & UndefRHS;
3598 KnownZero = ZeroLHS & ZeroRHS;
3599
3600 // If we know that the selected element is always zero, we don't need the
3601 // select value element.
3602 APInt DemandedSel = DemandedElts & ~KnownZero;
3603 if (DemandedSel != DemandedElts)
3604 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3605 Depth + 1))
3606 return true;
3607
3608 break;
3609 }
3610 case ISD::VECTOR_SHUFFLE: {
3611 SDValue LHS = Op.getOperand(0);
3612 SDValue RHS = Op.getOperand(1);
3613 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3614
3615 // Collect demanded elements from shuffle operands..
3616 APInt DemandedLHS(NumElts, 0);
3617 APInt DemandedRHS(NumElts, 0);
3618 for (unsigned i = 0; i != NumElts; ++i) {
3619 int M = ShuffleMask[i];
3620 if (M < 0 || !DemandedElts[i])
3621 continue;
3622 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3623 if (M < (int)NumElts)
3624 DemandedLHS.setBit(M);
3625 else
3626 DemandedRHS.setBit(M - NumElts);
3627 }
3628
3629 // If either side isn't demanded, replace it by UNDEF. We handle this
3630 // explicitly here to also simplify in case of multiple uses (on the
3631 // contrary to the SimplifyDemandedVectorElts calls below).
3632 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3633 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3634 if (FoldLHS || FoldRHS) {
3635 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3636 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3637 SDValue NewOp =
3638 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3639 return TLO.CombineTo(Op, NewOp);
3640 }
3641
3642 // See if we can simplify either shuffle operand.
3643 APInt UndefLHS, ZeroLHS;
3644 APInt UndefRHS, ZeroRHS;
3645 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3646 Depth + 1))
3647 return true;
3648 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3649 Depth + 1))
3650 return true;
3651
3652 // Simplify mask using undef elements from LHS/RHS.
3653 bool Updated = false;
3654 bool IdentityLHS = true, IdentityRHS = true;
3655 SmallVector<int, 32> NewMask(ShuffleMask);
3656 for (unsigned i = 0; i != NumElts; ++i) {
3657 int &M = NewMask[i];
3658 if (M < 0)
3659 continue;
3660 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3661 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3662 Updated = true;
3663 M = -1;
3664 }
3665 IdentityLHS &= (M < 0) || (M == (int)i);
3666 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3667 }
3668
3669 // Update legal shuffle masks based on demanded elements if it won't reduce
3670 // to Identity which can cause premature removal of the shuffle mask.
3671 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3672 SDValue LegalShuffle =
3673 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3674 if (LegalShuffle)
3675 return TLO.CombineTo(Op, LegalShuffle);
3676 }
3677
3678 // Propagate undef/zero elements from LHS/RHS.
3679 for (unsigned i = 0; i != NumElts; ++i) {
3680 int M = ShuffleMask[i];
3681 if (M < 0) {
3682 KnownUndef.setBit(i);
3683 } else if (M < (int)NumElts) {
3684 if (UndefLHS[M])
3685 KnownUndef.setBit(i);
3686 if (ZeroLHS[M])
3687 KnownZero.setBit(i);
3688 } else {
3689 if (UndefRHS[M - NumElts])
3690 KnownUndef.setBit(i);
3691 if (ZeroRHS[M - NumElts])
3692 KnownZero.setBit(i);
3693 }
3694 }
3695 break;
3696 }
3700 APInt SrcUndef, SrcZero;
3701 SDValue Src = Op.getOperand(0);
3702 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3703 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3704 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3705 Depth + 1))
3706 return true;
3707 KnownZero = SrcZero.zextOrTrunc(NumElts);
3708 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3709
3710 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3711 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3712 DemandedSrcElts == 1) {
3713 // aext - if we just need the bottom element then we can bitcast.
3714 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3715 }
3716
3717 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3718 // zext(undef) upper bits are guaranteed to be zero.
3719 if (DemandedElts.isSubsetOf(KnownUndef))
3720 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3721 KnownUndef.clearAllBits();
3722
3723 // zext - if we just need the bottom element then we can mask:
3724 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3725 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3726 Op->isOnlyUserOf(Src.getNode()) &&
3727 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3728 SDLoc DL(Op);
3729 EVT SrcVT = Src.getValueType();
3730 EVT SrcSVT = SrcVT.getScalarType();
3731
3732 // If we're after type legalization and SrcSVT is not legal, use the
3733 // promoted type for creating constants to avoid creating nodes with
3734 // illegal types.
3736 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3737
3738 SmallVector<SDValue> MaskElts;
3739 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3740 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3741 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3742 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3743 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3744 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3745 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3746 }
3747 }
3748 }
3749 break;
3750 }
3751
3752 // TODO: There are more binop opcodes that could be handled here - MIN,
3753 // MAX, saturated math, etc.
3754 case ISD::ADD: {
3755 SDValue Op0 = Op.getOperand(0);
3756 SDValue Op1 = Op.getOperand(1);
3757 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3758 APInt UndefLHS, ZeroLHS;
3759 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3760 Depth + 1, /*AssumeSingleUse*/ true))
3761 return true;
3762 }
3763 [[fallthrough]];
3764 }
3765 case ISD::AVGCEILS:
3766 case ISD::AVGCEILU:
3767 case ISD::AVGFLOORS:
3768 case ISD::AVGFLOORU:
3769 case ISD::OR:
3770 case ISD::XOR:
3771 case ISD::SUB:
3772 case ISD::FADD:
3773 case ISD::FSUB:
3774 case ISD::FMUL:
3775 case ISD::FDIV:
3776 case ISD::FREM: {
3777 SDValue Op0 = Op.getOperand(0);
3778 SDValue Op1 = Op.getOperand(1);
3779
3780 APInt UndefRHS, ZeroRHS;
3781 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3782 Depth + 1))
3783 return true;
3784 APInt UndefLHS, ZeroLHS;
3785 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3786 Depth + 1))
3787 return true;
3788
3789 KnownZero = ZeroLHS & ZeroRHS;
3790 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3791
3792 // Attempt to avoid multi-use ops if we don't need anything from them.
3793 // TODO - use KnownUndef to relax the demandedelts?
3794 if (!DemandedElts.isAllOnes())
3795 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3796 return true;
3797 break;
3798 }
3799 case ISD::SHL:
3800 case ISD::SRL:
3801 case ISD::SRA:
3802 case ISD::ROTL:
3803 case ISD::ROTR: {
3804 SDValue Op0 = Op.getOperand(0);
3805 SDValue Op1 = Op.getOperand(1);
3806
3807 APInt UndefRHS, ZeroRHS;
3808 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3809 Depth + 1))
3810 return true;
3811 APInt UndefLHS, ZeroLHS;
3812 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3813 Depth + 1))
3814 return true;
3815
3816 KnownZero = ZeroLHS;
3817 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3818
3819 // Attempt to avoid multi-use ops if we don't need anything from them.
3820 // TODO - use KnownUndef to relax the demandedelts?
3821 if (!DemandedElts.isAllOnes())
3822 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3823 return true;
3824 break;
3825 }
3826 case ISD::MUL:
3827 case ISD::MULHU:
3828 case ISD::MULHS:
3829 case ISD::AND: {
3830 SDValue Op0 = Op.getOperand(0);
3831 SDValue Op1 = Op.getOperand(1);
3832
3833 APInt SrcUndef, SrcZero;
3834 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3835 Depth + 1))
3836 return true;
3837 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3838 // to demand it in Op0 - its guaranteed to be zero. There is however a
3839 // restriction, as we must not make any of the originally demanded elements
3840 // more poisonous. We could reduce amount of elements demanded, but then we
3841 // also need a to inform SimplifyDemandedVectorElts that some elements must
3842 // not be made more poisonous.
3843 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3844 TLO, Depth + 1))
3845 return true;
3846
3847 KnownUndef &= DemandedElts;
3848 KnownZero &= DemandedElts;
3849
3850 // If every element pair has a zero/undef/poison then just fold to zero.
3851 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3852 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3853 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3854 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3855
3856 // If either side has a zero element, then the result element is zero, even
3857 // if the other is an UNDEF.
3858 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3859 // and then handle 'and' nodes with the rest of the binop opcodes.
3860 KnownZero |= SrcZero;
3861 KnownUndef &= SrcUndef;
3862 KnownUndef &= ~KnownZero;
3863
3864 // Attempt to avoid multi-use ops if we don't need anything from them.
3865 if (!DemandedElts.isAllOnes())
3866 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3867 return true;
3868 break;
3869 }
3870 case ISD::TRUNCATE:
3871 case ISD::SIGN_EXTEND:
3872 case ISD::ZERO_EXTEND:
3873 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3874 KnownZero, TLO, Depth + 1))
3875 return true;
3876
3877 if (!DemandedElts.isAllOnes())
3879 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3880 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3881
3882 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3883 // zext(undef) upper bits are guaranteed to be zero.
3884 if (DemandedElts.isSubsetOf(KnownUndef))
3885 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3886 KnownUndef.clearAllBits();
3887 }
3888 break;
3889 case ISD::SINT_TO_FP:
3890 case ISD::UINT_TO_FP:
3891 case ISD::FP_TO_SINT:
3892 case ISD::FP_TO_UINT:
3893 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3894 KnownZero, TLO, Depth + 1))
3895 return true;
3896 // Don't fall through to generic undef -> undef handling.
3897 return false;
3898 default: {
3899 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3900 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3901 KnownZero, TLO, Depth))
3902 return true;
3903 } else {
3904 KnownBits Known;
3905 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3906 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3907 TLO, Depth, AssumeSingleUse))
3908 return true;
3909 }
3910 break;
3911 }
3912 }
3913 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3914
3915 // Constant fold all undef cases.
3916 // TODO: Handle zero cases as well.
3917 if (DemandedElts.isSubsetOf(KnownUndef))
3918 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3919
3920 return false;
3921}
3922
3923/// Determine which of the bits specified in Mask are known to be either zero or
3924/// one and return them in the Known.
3926 KnownBits &Known,
3927 const APInt &DemandedElts,
3928 const SelectionDAG &DAG,
3929 unsigned Depth) const {
3930 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3931 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3932 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3933 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3934 "Should use MaskedValueIsZero if you don't know whether Op"
3935 " is a target node!");
3936 Known.resetAll();
3937}
3938
3941 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3942 unsigned Depth) const {
3943 Known.resetAll();
3944}
3945
3948 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3949 unsigned Depth) const {
3950 Known.resetAll();
3951}
3952
3954 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3955 // The low bits are known zero if the pointer is aligned.
3956 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3957}
3958
3964
3965/// This method can be implemented by targets that want to expose additional
3966/// information about sign bits to the DAG Combiner.
3968 const APInt &,
3969 const SelectionDAG &,
3970 unsigned Depth) const {
3971 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3972 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3973 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3974 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3975 "Should use ComputeNumSignBits if you don't know whether Op"
3976 " is a target node!");
3977 return 1;
3978}
3979
3981 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3982 const MachineRegisterInfo &MRI, unsigned Depth) const {
3983 return 1;
3984}
3985
3987 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3988 TargetLoweringOpt &TLO, unsigned Depth) const {
3989 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3990 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3991 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3993 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3994 " is a target node!");
3995 return false;
3996}
3997
3999 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4000 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
4001 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4002 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4003 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4005 "Should use SimplifyDemandedBits if you don't know whether Op"
4006 " is a target node!");
4007 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4008 return false;
4009}
4010
4012 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4013 SelectionDAG &DAG, unsigned Depth) const {
4014 assert(
4015 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4016 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4017 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4018 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4019 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4020 " is a target node!");
4021 return SDValue();
4022}
4023
4024SDValue
4027 SelectionDAG &DAG) const {
4028 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4029 if (!LegalMask) {
4030 std::swap(N0, N1);
4032 LegalMask = isShuffleMaskLegal(Mask, VT);
4033 }
4034
4035 if (!LegalMask)
4036 return SDValue();
4037
4038 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4039}
4040
4042 return nullptr;
4043}
4044
4046 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4047 UndefPoisonKind Kind, unsigned Depth) const {
4048 assert(
4049 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4050 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4051 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4052 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4053 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4054 " is a target node!");
4055
4056 // If Op can't create undef/poison and none of its operands are undef/poison
4057 // then Op is never undef/poison.
4058 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4059 /*ConsiderFlags*/ true, Depth) &&
4060 all_of(Op->ops(), [&](SDValue V) {
4061 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4062 });
4063}
4064
4066 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4067 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4068 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4069 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4070 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4071 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4072 "Should use canCreateUndefOrPoison if you don't know whether Op"
4073 " is a target node!");
4074 // Be conservative and return true.
4075 return true;
4076}
4077
4079 KnownFPClass &Known,
4080 const APInt &DemandedElts,
4081 const SelectionDAG &DAG,
4082 unsigned Depth) const {
4083 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4084 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4085 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4086 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4087 "Should use computeKnownFPClass if you don't know whether Op"
4088 " is a target node!");
4089}
4090
4092 const APInt &DemandedElts,
4093 const SelectionDAG &DAG,
4094 bool SNaN,
4095 unsigned Depth) const {
4096 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4097 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4098 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4099 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4100 "Should use isKnownNeverNaN if you don't know whether Op"
4101 " is a target node!");
4102 return false;
4103}
4104
4106 const APInt &DemandedElts,
4107 APInt &UndefElts,
4108 const SelectionDAG &DAG,
4109 unsigned Depth) const {
4110 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4111 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4112 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4113 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4114 "Should use isSplatValue if you don't know whether Op"
4115 " is a target node!");
4116 return false;
4117}
4118
4119// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4120// work with truncating build vectors and vectors with elements of less than
4121// 8 bits.
4123 if (!N)
4124 return false;
4125
4126 unsigned EltWidth;
4127 APInt CVal;
4128 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4129 /*AllowTruncation=*/true)) {
4130 CVal = CN->getAPIntValue();
4131 EltWidth = N.getValueType().getScalarSizeInBits();
4132 } else
4133 return false;
4134
4135 // If this is a truncating splat, truncate the splat value.
4136 // Otherwise, we may fail to match the expected values below.
4137 if (EltWidth < CVal.getBitWidth())
4138 CVal = CVal.trunc(EltWidth);
4139
4140 switch (getBooleanContents(N.getValueType())) {
4142 return CVal[0];
4144 return CVal.isOne();
4146 return CVal.isAllOnes();
4147 }
4148
4149 llvm_unreachable("Invalid boolean contents");
4150}
4151
4153 if (!N)
4154 return false;
4155
4157 if (!CN) {
4159 if (!BV)
4160 return false;
4161
4162 // Only interested in constant splats, we don't care about undef
4163 // elements in identifying boolean constants and getConstantSplatNode
4164 // returns NULL if all ops are undef;
4165 CN = BV->getConstantSplatNode();
4166 if (!CN)
4167 return false;
4168 }
4169
4170 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4171 return !CN->getAPIntValue()[0];
4172
4173 return CN->isZero();
4174}
4175
4177 bool SExt) const {
4178 if (VT == MVT::i1)
4179 return N->isOne();
4180
4182 switch (Cnt) {
4184 // An extended value of 1 is always true, unless its original type is i1,
4185 // in which case it will be sign extended to -1.
4186 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4189 return N->isAllOnes() && SExt;
4190 }
4191 llvm_unreachable("Unexpected enumeration.");
4192}
4193
4194/// This helper function of SimplifySetCC tries to optimize the comparison when
4195/// either operand of the SetCC node is a bitwise-and instruction.
4196SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4197 ISD::CondCode Cond, const SDLoc &DL,
4198 DAGCombinerInfo &DCI) const {
4199 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4200 std::swap(N0, N1);
4201
4202 SelectionDAG &DAG = DCI.DAG;
4203 EVT OpVT = N0.getValueType();
4204 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4205 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4206 return SDValue();
4207
4208 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4209 // iff everything but LSB is known zero:
4210 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4213 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4214 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4215 if (DAG.MaskedValueIsZero(N0, UpperBits))
4216 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4217 }
4218
4219 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4220 // test in a narrow type that we can truncate to with no cost. Examples:
4221 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4222 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4223 // TODO: This conservatively checks for type legality on the source and
4224 // destination types. That may inhibit optimizations, but it also
4225 // allows setcc->shift transforms that may be more beneficial.
4226 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4227 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4228 isTypeLegal(OpVT) && N0.hasOneUse()) {
4229 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4230 AndC->getAPIntValue().getActiveBits());
4231 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4232 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4233 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4234 return DAG.getSetCC(DL, VT, Trunc, Zero,
4236 }
4237 }
4238
4239 // Match these patterns in any of their permutations:
4240 // (X & Y) == Y
4241 // (X & Y) != Y
4242 SDValue X, Y;
4243 if (N0.getOperand(0) == N1) {
4244 X = N0.getOperand(1);
4245 Y = N0.getOperand(0);
4246 } else if (N0.getOperand(1) == N1) {
4247 X = N0.getOperand(0);
4248 Y = N0.getOperand(1);
4249 } else {
4250 return SDValue();
4251 }
4252
4253 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4254 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4255 // its liable to create and infinite loop.
4256 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4257 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4259 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4260 // Note that where Y is variable and is known to have at most one bit set
4261 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4262 // equivalent when Y == 0.
4263 assert(OpVT.isInteger());
4265 if (DCI.isBeforeLegalizeOps() ||
4267 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4268 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4269 // If the target supports an 'and-not' or 'and-complement' logic operation,
4270 // try to use that to make a comparison operation more efficient.
4271 // But don't do this transform if the mask is a single bit because there are
4272 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4273 // 'rlwinm' on PPC).
4274
4275 // Bail out if the compare operand that we want to turn into a zero is
4276 // already a zero (otherwise, infinite loop).
4277 if (isNullConstant(Y))
4278 return SDValue();
4279
4280 // Transform this into: ~X & Y == 0.
4281 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4282 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4283 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4284 }
4285
4286 return SDValue();
4287}
4288
4289/// This helper function of SimplifySetCC tries to optimize the comparison when
4290/// either operand of the SetCC node is a bitwise-or instruction.
4291/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4292SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4293 ISD::CondCode Cond, const SDLoc &DL,
4294 DAGCombinerInfo &DCI) const {
4295 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4296 std::swap(N0, N1);
4297
4298 SelectionDAG &DAG = DCI.DAG;
4299 EVT OpVT = N0.getValueType();
4300 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4301 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4302 return SDValue();
4303
4304 // (X | Y) == Y
4305 // (X | Y) != Y
4306 SDValue X;
4307 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4308 // If the target supports an 'and-not' or 'and-complement' logic operation,
4309 // try to use that to make a comparison operation more efficient.
4310
4311 // Bail out if the compare operand that we want to turn into a zero is
4312 // already a zero (otherwise, infinite loop).
4313 if (isNullConstant(N1))
4314 return SDValue();
4315
4316 // Transform this into: X & ~Y ==/!= 0.
4317 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4318 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4319 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4320 }
4321
4322 return SDValue();
4323}
4324
4325/// There are multiple IR patterns that could be checking whether certain
4326/// truncation of a signed number would be lossy or not. The pattern which is
4327/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4328/// We are looking for the following pattern: (KeptBits is a constant)
4329/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4330/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4331/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4332/// We will unfold it into the natural trunc+sext pattern:
4333/// ((%x << C) a>> C) dstcond %x
4334/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4335SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4336 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4337 const SDLoc &DL) const {
4338 // We must be comparing with a constant.
4339 ConstantSDNode *C1;
4340 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4341 return SDValue();
4342
4343 // N0 should be: add %x, (1 << (KeptBits-1))
4344 if (N0->getOpcode() != ISD::ADD)
4345 return SDValue();
4346
4347 // And we must be 'add'ing a constant.
4348 ConstantSDNode *C01;
4349 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4350 return SDValue();
4351
4352 SDValue X = N0->getOperand(0);
4353 EVT XVT = X.getValueType();
4354
4355 // Validate constants ...
4356
4357 APInt I1 = C1->getAPIntValue();
4358
4359 ISD::CondCode NewCond;
4360 if (Cond == ISD::CondCode::SETULT) {
4361 NewCond = ISD::CondCode::SETEQ;
4362 } else if (Cond == ISD::CondCode::SETULE) {
4363 NewCond = ISD::CondCode::SETEQ;
4364 // But need to 'canonicalize' the constant.
4365 I1 += 1;
4366 } else if (Cond == ISD::CondCode::SETUGT) {
4367 NewCond = ISD::CondCode::SETNE;
4368 // But need to 'canonicalize' the constant.
4369 I1 += 1;
4370 } else if (Cond == ISD::CondCode::SETUGE) {
4371 NewCond = ISD::CondCode::SETNE;
4372 } else
4373 return SDValue();
4374
4375 APInt I01 = C01->getAPIntValue();
4376
4377 auto checkConstants = [&I1, &I01]() -> bool {
4378 // Both of them must be power-of-two, and the constant from setcc is bigger.
4379 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4380 };
4381
4382 if (checkConstants()) {
4383 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4384 } else {
4385 // What if we invert constants? (and the target predicate)
4386 I1.negate();
4387 I01.negate();
4388 assert(XVT.isInteger());
4389 NewCond = getSetCCInverse(NewCond, XVT);
4390 if (!checkConstants())
4391 return SDValue();
4392 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4393 }
4394
4395 // They are power-of-two, so which bit is set?
4396 const unsigned KeptBits = I1.logBase2();
4397 const unsigned KeptBitsMinusOne = I01.logBase2();
4398
4399 // Magic!
4400 if (KeptBits != (KeptBitsMinusOne + 1))
4401 return SDValue();
4402 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4403
4404 // We don't want to do this in every single case.
4405 SelectionDAG &DAG = DCI.DAG;
4406 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4407 return SDValue();
4408
4409 // Unfold into: sext_inreg(%x) cond %x
4410 // Where 'cond' will be either 'eq' or 'ne'.
4411 SDValue SExtInReg = DAG.getNode(
4413 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4414 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4415}
4416
4417// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4418SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4419 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4420 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4422 "Should be a comparison with 0.");
4423 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4424 "Valid only for [in]equality comparisons.");
4425
4426 unsigned NewShiftOpcode;
4427 SDValue X, C, Y;
4428
4429 SelectionDAG &DAG = DCI.DAG;
4430
4431 // Look for '(C l>>/<< Y)'.
4432 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4433 // The shift should be one-use.
4434 if (!V.hasOneUse())
4435 return false;
4436 unsigned OldShiftOpcode = V.getOpcode();
4437 switch (OldShiftOpcode) {
4438 case ISD::SHL:
4439 NewShiftOpcode = ISD::SRL;
4440 break;
4441 case ISD::SRL:
4442 NewShiftOpcode = ISD::SHL;
4443 break;
4444 default:
4445 return false; // must be a logical shift.
4446 }
4447 // We should be shifting a constant.
4448 // FIXME: best to use isConstantOrConstantVector().
4449 C = V.getOperand(0);
4450 ConstantSDNode *CC =
4451 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4452 if (!CC)
4453 return false;
4454 Y = V.getOperand(1);
4455
4456 ConstantSDNode *XC =
4457 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4459 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4460 };
4461
4462 // LHS of comparison should be an one-use 'and'.
4463 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4464 return SDValue();
4465
4466 X = N0.getOperand(0);
4467 SDValue Mask = N0.getOperand(1);
4468
4469 // 'and' is commutative!
4470 if (!Match(Mask)) {
4471 std::swap(X, Mask);
4472 if (!Match(Mask))
4473 return SDValue();
4474 }
4475
4476 EVT VT = X.getValueType();
4477
4478 // Produce:
4479 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4480 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4481 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4482 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4483 return T2;
4484}
4485
4486/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4487/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4488/// handle the commuted versions of these patterns.
4489SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4490 ISD::CondCode Cond, const SDLoc &DL,
4491 DAGCombinerInfo &DCI) const {
4492 unsigned BOpcode = N0.getOpcode();
4493 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4494 "Unexpected binop");
4495 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4496
4497 // (X + Y) == X --> Y == 0
4498 // (X - Y) == X --> Y == 0
4499 // (X ^ Y) == X --> Y == 0
4500 SelectionDAG &DAG = DCI.DAG;
4501 EVT OpVT = N0.getValueType();
4502 SDValue X = N0.getOperand(0);
4503 SDValue Y = N0.getOperand(1);
4504 if (X == N1)
4505 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4506
4507 if (Y != N1)
4508 return SDValue();
4509
4510 // (X + Y) == Y --> X == 0
4511 // (X ^ Y) == Y --> X == 0
4512 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4513 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4514
4515 // The shift would not be valid if the operands are boolean (i1).
4516 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4517 return SDValue();
4518
4519 // (X - Y) == Y --> X == Y << 1
4520 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4521 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4522 if (!DCI.isCalledByLegalizer())
4523 DCI.AddToWorklist(YShl1.getNode());
4524 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4525}
4526
4528 SDValue N0, const APInt &C1,
4529 ISD::CondCode Cond, const SDLoc &dl,
4530 SelectionDAG &DAG) {
4531 // Look through truncs that don't change the value of a ctpop.
4532 // FIXME: Add vector support? Need to be careful with setcc result type below.
4533 SDValue CTPOP = N0;
4534 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4536 CTPOP = N0.getOperand(0);
4537
4538 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4539 return SDValue();
4540
4541 EVT CTVT = CTPOP.getValueType();
4542 SDValue CTOp = CTPOP.getOperand(0);
4543
4544 // Expand a power-of-2-or-zero comparison based on ctpop:
4545 // (ctpop x) u< 2 -> (x & x-1) == 0
4546 // (ctpop x) u> 1 -> (x & x-1) != 0
4547 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4548 // Keep the CTPOP if it is a cheap vector op.
4549 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4550 return SDValue();
4551
4552 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4553 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4554 return SDValue();
4555 if (C1 == 0 && (Cond == ISD::SETULT))
4556 return SDValue(); // This is handled elsewhere.
4557
4558 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4559
4560 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4561 SDValue Result = CTOp;
4562 for (unsigned i = 0; i < Passes; i++) {
4563 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4564 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4565 }
4567 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4568 }
4569
4570 // Expand a power-of-2 comparison based on ctpop
4571 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4572 // Keep the CTPOP if it is cheap.
4573 if (TLI.isCtpopFast(CTVT))
4574 return SDValue();
4575
4576 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4577 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4578 assert(CTVT.isInteger());
4579 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4580
4581 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4582 // check before emitting a potentially unnecessary op.
4583 if (DAG.isKnownNeverZero(CTOp)) {
4584 // (ctpop x) == 1 --> (x & x-1) == 0
4585 // (ctpop x) != 1 --> (x & x-1) != 0
4586 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4587 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4588 return RHS;
4589 }
4590
4591 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4592 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4593 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4595 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4596 }
4597
4598 return SDValue();
4599}
4600
4602 ISD::CondCode Cond, const SDLoc &dl,
4603 SelectionDAG &DAG) {
4604 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4605 return SDValue();
4606
4607 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4608 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4609 return SDValue();
4610
4611 auto getRotateSource = [](SDValue X) {
4612 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4613 return X.getOperand(0);
4614 return SDValue();
4615 };
4616
4617 // Peek through a rotated value compared against 0 or -1:
4618 // (rot X, Y) == 0/-1 --> X == 0/-1
4619 // (rot X, Y) != 0/-1 --> X != 0/-1
4620 if (SDValue R = getRotateSource(N0))
4621 return DAG.getSetCC(dl, VT, R, N1, Cond);
4622
4623 // Peek through an 'or' of a rotated value compared against 0:
4624 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4625 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4626 //
4627 // TODO: Add the 'and' with -1 sibling.
4628 // TODO: Recurse through a series of 'or' ops to find the rotate.
4629 EVT OpVT = N0.getValueType();
4630 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4631 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4632 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4633 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4634 }
4635 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4636 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4637 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4638 }
4639 }
4640
4641 return SDValue();
4642}
4643
4645 ISD::CondCode Cond, const SDLoc &dl,
4646 SelectionDAG &DAG) {
4647 // If we are testing for all-bits-clear, we might be able to do that with
4648 // less shifting since bit-order does not matter.
4649 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4650 return SDValue();
4651
4652 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4653 if (!C1 || !C1->isZero())
4654 return SDValue();
4655
4656 if (!N0.hasOneUse() ||
4657 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4658 return SDValue();
4659
4660 unsigned BitWidth = N0.getScalarValueSizeInBits();
4661 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4662 if (!ShAmtC)
4663 return SDValue();
4664
4665 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4666 if (ShAmt == 0)
4667 return SDValue();
4668
4669 // Canonicalize fshr as fshl to reduce pattern-matching.
4670 if (N0.getOpcode() == ISD::FSHR)
4671 ShAmt = BitWidth - ShAmt;
4672
4673 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4674 SDValue X, Y;
4675 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4676 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4677 return false;
4678 if (Or.getOperand(0) == Other) {
4679 X = Or.getOperand(0);
4680 Y = Or.getOperand(1);
4681 return true;
4682 }
4683 if (Or.getOperand(1) == Other) {
4684 X = Or.getOperand(1);
4685 Y = Or.getOperand(0);
4686 return true;
4687 }
4688 return false;
4689 };
4690
4691 EVT OpVT = N0.getValueType();
4692 EVT ShAmtVT = N0.getOperand(2).getValueType();
4693 SDValue F0 = N0.getOperand(0);
4694 SDValue F1 = N0.getOperand(1);
4695 if (matchOr(F0, F1)) {
4696 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4697 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4698 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4699 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4700 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4701 }
4702 if (matchOr(F1, F0)) {
4703 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4704 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4705 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4706 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4707 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4708 }
4709
4710 return SDValue();
4711}
4712
4713/// Try to simplify a setcc built with the specified operands and cc. If it is
4714/// unable to simplify it, return a null SDValue.
4716 ISD::CondCode Cond, bool foldBooleans,
4717 DAGCombinerInfo &DCI,
4718 const SDLoc &dl) const {
4719 SelectionDAG &DAG = DCI.DAG;
4720 const DataLayout &Layout = DAG.getDataLayout();
4721 EVT OpVT = N0.getValueType();
4722 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4723
4724 // Constant fold or commute setcc.
4725 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4726 return Fold;
4727
4728 bool N0ConstOrSplat =
4729 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4730 bool N1ConstOrSplat =
4731 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4732
4733 // Canonicalize toward having the constant on the RHS.
4734 // TODO: Handle non-splat vector constants. All undef causes trouble.
4735 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4736 // infinite loop here when we encounter one.
4738 if (N0ConstOrSplat && !N1ConstOrSplat &&
4739 (DCI.isBeforeLegalizeOps() ||
4740 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4741 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4742
4743 // If we have a subtract with the same 2 non-constant operands as this setcc
4744 // -- but in reverse order -- then try to commute the operands of this setcc
4745 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4746 // instruction on some targets.
4747 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4748 (DCI.isBeforeLegalizeOps() ||
4749 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4750 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4751 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4752 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4753
4754 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4755 return V;
4756
4757 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4758 return V;
4759
4760 if (auto *N1C = isConstOrConstSplat(N1)) {
4761 const APInt &C1 = N1C->getAPIntValue();
4762
4763 // Optimize some CTPOP cases.
4764 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4765 return V;
4766
4767 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4768 // X * Y == 0 --> (X == 0) || (Y == 0)
4769 // X * Y != 0 --> (X != 0) && (Y != 0)
4770 // TODO: This bails out if minsize is set, but if the target doesn't have a
4771 // single instruction multiply for this type, it would likely be
4772 // smaller to decompose.
4773 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4774 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4775 (N0->getFlags().hasNoUnsignedWrap() ||
4776 N0->getFlags().hasNoSignedWrap()) &&
4777 !Attr.hasFnAttr(Attribute::MinSize)) {
4778 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4779 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4780 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4781 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4782 }
4783
4784 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4785 // equality comparison, then we're just comparing whether X itself is
4786 // zero.
4787 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4788 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4790 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4791 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4792 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4793 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4794 // (srl (ctlz x), 5) == 0 -> X != 0
4795 // (srl (ctlz x), 5) != 1 -> X != 0
4796 Cond = ISD::SETNE;
4797 } else {
4798 // (srl (ctlz x), 5) != 0 -> X == 0
4799 // (srl (ctlz x), 5) == 1 -> X == 0
4800 Cond = ISD::SETEQ;
4801 }
4802 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4803 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4804 Cond);
4805 }
4806 }
4807 }
4808 }
4809
4810 // setcc X, 0, setlt --> X (when X is all sign bits)
4811 // setcc X, 0, setne --> X (when X is all sign bits)
4812 //
4813 // When we know that X has 0 or -1 in each element (or scalar), this
4814 // comparison will produce X. This is only true when boolean contents are
4815 // represented via 0s and -1s.
4816 if (VT == OpVT &&
4817 // Check that the result of setcc is 0 and -1.
4819 // Match only for checks X < 0 and X != 0
4820 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4821 // The identity holds iff we know all sign bits for all lanes.
4823 return N0;
4824
4825 // FIXME: Support vectors.
4826 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4827 const APInt &C1 = N1C->getAPIntValue();
4828
4829 // (zext x) == C --> x == (trunc C)
4830 // (sext x) == C --> x == (trunc C)
4831 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4832 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4833 unsigned MinBits = N0.getValueSizeInBits();
4834 SDValue PreExt;
4835 bool Signed = false;
4836 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4837 // ZExt
4838 MinBits = N0->getOperand(0).getValueSizeInBits();
4839 PreExt = N0->getOperand(0);
4840 } else if (N0->getOpcode() == ISD::AND) {
4841 // DAGCombine turns costly ZExts into ANDs
4842 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4843 if ((C->getAPIntValue()+1).isPowerOf2()) {
4844 MinBits = C->getAPIntValue().countr_one();
4845 PreExt = N0->getOperand(0);
4846 }
4847 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4848 // SExt
4849 MinBits = N0->getOperand(0).getValueSizeInBits();
4850 PreExt = N0->getOperand(0);
4851 Signed = true;
4852 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4853 // ZEXTLOAD / SEXTLOAD
4854 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4855 MinBits = LN0->getMemoryVT().getSizeInBits();
4856 PreExt = N0;
4857 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4858 Signed = true;
4859 MinBits = LN0->getMemoryVT().getSizeInBits();
4860 PreExt = N0;
4861 }
4862 }
4863
4864 // Figure out how many bits we need to preserve this constant.
4865 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4866
4867 // Make sure we're not losing bits from the constant.
4868 if (MinBits > 0 &&
4869 MinBits < C1.getBitWidth() &&
4870 MinBits >= ReqdBits) {
4871 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4872 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4873 // Will get folded away.
4874 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4875 if (MinBits == 1 && C1 == 1)
4876 // Invert the condition.
4877 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4879 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4880 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4881 }
4882
4883 // If truncating the setcc operands is not desirable, we can still
4884 // simplify the expression in some cases:
4885 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4886 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4887 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4888 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4889 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4890 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4891 SDValue TopSetCC = N0->getOperand(0);
4892 unsigned N0Opc = N0->getOpcode();
4893 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4894 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4895 TopSetCC.getOpcode() == ISD::SETCC &&
4896 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4897 (isConstFalseVal(N1) ||
4898 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4899
4900 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4901 (!N1C->isZero() && Cond == ISD::SETNE);
4902
4903 if (!Inverse)
4904 return TopSetCC;
4905
4907 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4908 TopSetCC.getOperand(0).getValueType());
4909 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4910 TopSetCC.getOperand(1),
4911 InvCond);
4912 }
4913 }
4914 }
4915
4916 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4917 // equality or unsigned, and all 1 bits of the const are in the same
4918 // partial word, see if we can shorten the load.
4919 if (DCI.isBeforeLegalize() &&
4921 N0.getOpcode() == ISD::AND && C1 == 0 &&
4922 N0.getNode()->hasOneUse() &&
4923 isa<LoadSDNode>(N0.getOperand(0)) &&
4924 N0.getOperand(0).getNode()->hasOneUse() &&
4926 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4927 APInt bestMask;
4928 unsigned bestWidth = 0, bestOffset = 0;
4929 if (Lod->isSimple() && Lod->isUnindexed() &&
4930 (Lod->getMemoryVT().isByteSized() ||
4931 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4932 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4933 unsigned origWidth = N0.getValueSizeInBits();
4934 unsigned maskWidth = origWidth;
4935 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4936 // 8 bits, but have to be careful...
4937 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4938 origWidth = Lod->getMemoryVT().getSizeInBits();
4939 const APInt &Mask = N0.getConstantOperandAPInt(1);
4940 // Only consider power-of-2 widths (and at least one byte) as candiates
4941 // for the narrowed load.
4942 for (unsigned width = 8; width < origWidth; width *= 2) {
4943 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4944 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4945 // Avoid accessing any padding here for now (we could use memWidth
4946 // instead of origWidth here otherwise).
4947 unsigned maxOffset = origWidth - width;
4948 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4949 if (Mask.isSubsetOf(newMask)) {
4950 unsigned ptrOffset =
4951 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4952 unsigned IsFast = 0;
4953 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4954 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4956 ptrOffset / 8) &&
4958 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4959 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4960 IsFast) {
4961 bestOffset = ptrOffset / 8;
4962 bestMask = Mask.lshr(offset);
4963 bestWidth = width;
4964 break;
4965 }
4966 }
4967 newMask <<= 8;
4968 }
4969 if (bestWidth)
4970 break;
4971 }
4972 }
4973 if (bestWidth) {
4974 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4975 SDValue Ptr = Lod->getBasePtr();
4976 if (bestOffset != 0)
4977 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4978 SDValue NewLoad =
4979 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4980 Lod->getPointerInfo().getWithOffset(bestOffset),
4981 Lod->getBaseAlign());
4982 SDValue And =
4983 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4984 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4985 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4986 }
4987 }
4988
4989 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4990 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4991 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4992
4993 // If the comparison constant has bits in the upper part, the
4994 // zero-extended value could never match.
4996 C1.getBitWidth() - InSize))) {
4997 switch (Cond) {
4998 case ISD::SETUGT:
4999 case ISD::SETUGE:
5000 case ISD::SETEQ:
5001 return DAG.getConstant(0, dl, VT);
5002 case ISD::SETULT:
5003 case ISD::SETULE:
5004 case ISD::SETNE:
5005 return DAG.getConstant(1, dl, VT);
5006 case ISD::SETGT:
5007 case ISD::SETGE:
5008 // True if the sign bit of C1 is set.
5009 return DAG.getConstant(C1.isNegative(), dl, VT);
5010 case ISD::SETLT:
5011 case ISD::SETLE:
5012 // True if the sign bit of C1 isn't set.
5013 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5014 default:
5015 break;
5016 }
5017 }
5018
5019 // Otherwise, we can perform the comparison with the low bits.
5020 switch (Cond) {
5021 case ISD::SETEQ:
5022 case ISD::SETNE:
5023 case ISD::SETUGT:
5024 case ISD::SETUGE:
5025 case ISD::SETULT:
5026 case ISD::SETULE: {
5027 EVT newVT = N0.getOperand(0).getValueType();
5028 // FIXME: Should use isNarrowingProfitable.
5029 if (DCI.isBeforeLegalizeOps() ||
5030 (isOperationLegal(ISD::SETCC, newVT) &&
5031 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5033 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5034 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5035
5036 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5037 NewConst, Cond);
5038 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5039 }
5040 break;
5041 }
5042 default:
5043 break; // todo, be more careful with signed comparisons
5044 }
5045 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5046 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5048 OpVT)) {
5049 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5050 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5051 EVT ExtDstTy = N0.getValueType();
5052 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5053
5054 // If the constant doesn't fit into the number of bits for the source of
5055 // the sign extension, it is impossible for both sides to be equal.
5056 if (C1.getSignificantBits() > ExtSrcTyBits)
5057 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5058
5059 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5060 ExtDstTy != ExtSrcTy && "Unexpected types!");
5061 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5062 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5063 DAG.getConstant(Imm, dl, ExtDstTy));
5064 if (!DCI.isCalledByLegalizer())
5065 DCI.AddToWorklist(ZextOp.getNode());
5066 // Otherwise, make this a use of a zext.
5067 return DAG.getSetCC(dl, VT, ZextOp,
5068 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5069 } else if ((N1C->isZero() || N1C->isOne()) &&
5070 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5071 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5072 // excluded as they are handled below whilst checking for foldBooleans.
5073 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5074 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5075 (N0.getValueType() == MVT::i1 ||
5079 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5080 if (TrueWhenTrue)
5081 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5082 // Invert the condition.
5083 if (N0.getOpcode() == ISD::SETCC) {
5086 if (DCI.isBeforeLegalizeOps() ||
5088 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5089 }
5090 }
5091
5092 if ((N0.getOpcode() == ISD::XOR ||
5093 (N0.getOpcode() == ISD::AND &&
5094 N0.getOperand(0).getOpcode() == ISD::XOR &&
5095 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5096 isOneConstant(N0.getOperand(1))) {
5097 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5098 // can only do this if the top bits are known zero.
5099 unsigned BitWidth = N0.getValueSizeInBits();
5100 if (DAG.MaskedValueIsZero(N0,
5102 BitWidth-1))) {
5103 // Okay, get the un-inverted input value.
5104 SDValue Val;
5105 if (N0.getOpcode() == ISD::XOR) {
5106 Val = N0.getOperand(0);
5107 } else {
5108 assert(N0.getOpcode() == ISD::AND &&
5109 N0.getOperand(0).getOpcode() == ISD::XOR);
5110 // ((X^1)&1)^1 -> X & 1
5111 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5112 N0.getOperand(0).getOperand(0),
5113 N0.getOperand(1));
5114 }
5115
5116 return DAG.getSetCC(dl, VT, Val, N1,
5118 }
5119 } else if (N1C->isOne()) {
5120 SDValue Op0 = N0;
5121 if (Op0.getOpcode() == ISD::TRUNCATE)
5122 Op0 = Op0.getOperand(0);
5123
5124 if ((Op0.getOpcode() == ISD::XOR) &&
5125 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5126 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5127 SDValue XorLHS = Op0.getOperand(0);
5128 SDValue XorRHS = Op0.getOperand(1);
5129 // Ensure that the input setccs return an i1 type or 0/1 value.
5130 if (Op0.getValueType() == MVT::i1 ||
5135 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5137 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5138 }
5139 }
5140 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5141 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5142 if (Op0.getValueType().bitsGT(VT))
5143 Op0 = DAG.getNode(ISD::AND, dl, VT,
5144 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5145 DAG.getConstant(1, dl, VT));
5146 else if (Op0.getValueType().bitsLT(VT))
5147 Op0 = DAG.getNode(ISD::AND, dl, VT,
5148 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5149 DAG.getConstant(1, dl, VT));
5150
5151 return DAG.getSetCC(dl, VT, Op0,
5152 DAG.getConstant(0, dl, Op0.getValueType()),
5154 }
5155 if (Op0.getOpcode() == ISD::AssertZext &&
5156 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5157 return DAG.getSetCC(dl, VT, Op0,
5158 DAG.getConstant(0, dl, Op0.getValueType()),
5160 }
5161 }
5162
5163 // Given:
5164 // icmp eq/ne (urem %x, %y), 0
5165 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5166 // icmp eq/ne %x, 0
5167 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5168 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5169 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5170 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5171 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5172 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5173 }
5174
5175 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5176 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5177 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5179 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5180 N1C->isAllOnes()) {
5181 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5182 DAG.getConstant(0, dl, OpVT),
5184 }
5185
5186 // fold (setcc (trunc x) c) -> (setcc x c)
5187 if (N0.getOpcode() == ISD::TRUNCATE &&
5189 (N0->getFlags().hasNoSignedWrap() &&
5192 EVT NewVT = N0.getOperand(0).getValueType();
5193 SDValue NewConst = DAG.getConstant(
5195 ? C1.sext(NewVT.getSizeInBits())
5196 : C1.zext(NewVT.getSizeInBits()),
5197 dl, NewVT);
5198 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5199 }
5200
5201 if (SDValue V =
5202 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5203 return V;
5204 }
5205
5206 // These simplifications apply to splat vectors as well.
5207 // TODO: Handle more splat vector cases.
5208 if (auto *N1C = isConstOrConstSplat(N1)) {
5209 const APInt &C1 = N1C->getAPIntValue();
5210
5211 APInt MinVal, MaxVal;
5212 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5214 MinVal = APInt::getSignedMinValue(OperandBitSize);
5215 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5216 } else {
5217 MinVal = APInt::getMinValue(OperandBitSize);
5218 MaxVal = APInt::getMaxValue(OperandBitSize);
5219 }
5220
5221 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5222 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5223 // X >= MIN --> true
5224 if (C1 == MinVal)
5225 return DAG.getBoolConstant(true, dl, VT, OpVT);
5226
5227 if (!VT.isVector()) { // TODO: Support this for vectors.
5228 // X >= C0 --> X > (C0 - 1)
5229 APInt C = C1 - 1;
5231 if ((DCI.isBeforeLegalizeOps() ||
5232 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5233 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5234 isLegalICmpImmediate(C.getSExtValue())))) {
5235 return DAG.getSetCC(dl, VT, N0,
5236 DAG.getConstant(C, dl, N1.getValueType()),
5237 NewCC);
5238 }
5239 }
5240 }
5241
5242 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5243 // X <= MAX --> true
5244 if (C1 == MaxVal)
5245 return DAG.getBoolConstant(true, dl, VT, OpVT);
5246
5247 // X <= C0 --> X < (C0 + 1)
5248 if (!VT.isVector()) { // TODO: Support this for vectors.
5249 APInt C = C1 + 1;
5251 if ((DCI.isBeforeLegalizeOps() ||
5252 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5253 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5254 isLegalICmpImmediate(C.getSExtValue())))) {
5255 return DAG.getSetCC(dl, VT, N0,
5256 DAG.getConstant(C, dl, N1.getValueType()),
5257 NewCC);
5258 }
5259 }
5260 }
5261
5262 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5263 if (C1 == MinVal)
5264 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5265
5266 // TODO: Support this for vectors after legalize ops.
5267 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5268 // Canonicalize setlt X, Max --> setne X, Max
5269 if (C1 == MaxVal)
5270 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5271
5272 // If we have setult X, 1, turn it into seteq X, 0
5273 if (C1 == MinVal+1)
5274 return DAG.getSetCC(dl, VT, N0,
5275 DAG.getConstant(MinVal, dl, N0.getValueType()),
5276 ISD::SETEQ);
5277 }
5278 }
5279
5280 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5281 if (C1 == MaxVal)
5282 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5283
5284 // TODO: Support this for vectors after legalize ops.
5285 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5286 // Canonicalize setgt X, Min --> setne X, Min
5287 if (C1 == MinVal)
5288 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5289
5290 // If we have setugt X, Max-1, turn it into seteq X, Max
5291 if (C1 == MaxVal-1)
5292 return DAG.getSetCC(dl, VT, N0,
5293 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5294 ISD::SETEQ);
5295 }
5296 }
5297
5298 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5299 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5300 if (C1.isZero())
5301 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5302 VT, N0, N1, Cond, DCI, dl))
5303 return CC;
5304
5305 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5306 // For example, when high 32-bits of i64 X are known clear:
5307 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5308 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5309 bool CmpZero = N1C->isZero();
5310 bool CmpNegOne = N1C->isAllOnes();
5311 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5312 // Match or(lo,shl(hi,bw/2)) pattern.
5313 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5314 unsigned EltBits = V.getScalarValueSizeInBits();
5315 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5316 return false;
5317 SDValue LHS = V.getOperand(0);
5318 SDValue RHS = V.getOperand(1);
5319 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5320 // Unshifted element must have zero upperbits.
5321 if (RHS.getOpcode() == ISD::SHL &&
5322 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5323 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5324 DAG.MaskedValueIsZero(LHS, HiBits)) {
5325 Lo = LHS;
5326 Hi = RHS.getOperand(0);
5327 return true;
5328 }
5329 if (LHS.getOpcode() == ISD::SHL &&
5330 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5331 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5332 DAG.MaskedValueIsZero(RHS, HiBits)) {
5333 Lo = RHS;
5334 Hi = LHS.getOperand(0);
5335 return true;
5336 }
5337 return false;
5338 };
5339
5340 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5341 unsigned EltBits = N0.getScalarValueSizeInBits();
5342 unsigned HalfBits = EltBits / 2;
5343 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5344 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5345 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5346 SDValue NewN0 =
5347 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5348 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5349 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5350 };
5351
5352 SDValue Lo, Hi;
5353 if (IsConcat(N0, Lo, Hi))
5354 return MergeConcat(Lo, Hi);
5355
5356 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5357 SDValue Lo0, Lo1, Hi0, Hi1;
5358 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5359 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5360 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5361 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5362 }
5363 }
5364 }
5365 }
5366
5367 // If we have "setcc X, C0", check to see if we can shrink the immediate
5368 // by changing cc.
5369 // TODO: Support this for vectors after legalize ops.
5370 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5371 // SETUGT X, SINTMAX -> SETLT X, 0
5372 // SETUGE X, SINTMIN -> SETLT X, 0
5373 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5374 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5375 return DAG.getSetCC(dl, VT, N0,
5376 DAG.getConstant(0, dl, N1.getValueType()),
5377 ISD::SETLT);
5378
5379 // SETULT X, SINTMIN -> SETGT X, -1
5380 // SETULE X, SINTMAX -> SETGT X, -1
5381 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5382 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5383 return DAG.getSetCC(dl, VT, N0,
5384 DAG.getAllOnesConstant(dl, N1.getValueType()),
5385 ISD::SETGT);
5386 }
5387 }
5388
5389 // Back to non-vector simplifications.
5390 // TODO: Can we do these for vector splats?
5391 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5392 const APInt &C1 = N1C->getAPIntValue();
5393 EVT ShValTy = N0.getValueType();
5394
5395 // Fold bit comparisons when we can. This will result in an
5396 // incorrect value when boolean false is negative one, unless
5397 // the bitsize is 1 in which case the false value is the same
5398 // in practice regardless of the representation.
5399 if ((VT.getSizeInBits() == 1 ||
5401 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5402 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5403 N0.getOpcode() == ISD::AND) {
5404 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5405 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5406 // Perform the xform if the AND RHS is a single bit.
5407 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5408 if (AndRHS->getAPIntValue().isPowerOf2() &&
5409 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5410 return DAG.getNode(
5411 ISD::TRUNCATE, dl, VT,
5412 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5413 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5414 }
5415 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5416 // (X & 8) == 8 --> (X & 8) >> 3
5417 // Perform the xform if C1 is a single bit.
5418 unsigned ShCt = C1.logBase2();
5419 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5420 return DAG.getNode(
5421 ISD::TRUNCATE, dl, VT,
5422 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5423 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5424 }
5425 }
5426 }
5427 }
5428
5429 if (C1.getSignificantBits() <= 64 &&
5431 // (X & -256) == 256 -> (X >> 8) == 1
5432 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5433 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5434 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5435 const APInt &AndRHSC = AndRHS->getAPIntValue();
5436 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5437 unsigned ShiftBits = AndRHSC.countr_zero();
5438 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5439 // If using an unsigned shift doesn't yield a legal compare
5440 // immediate, try using sra instead.
5441 APInt NewC = C1.lshr(ShiftBits);
5442 if (NewC.getSignificantBits() <= 64 &&
5444 APInt SignedC = C1.ashr(ShiftBits);
5445 if (SignedC.getSignificantBits() <= 64 &&
5447 SDValue Shift = DAG.getNode(
5448 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5449 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5450 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5451 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5452 }
5453 }
5454 SDValue Shift = DAG.getNode(
5455 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5456 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5457 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5458 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5459 }
5460 }
5461 }
5462 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5463 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5464 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5465 // X < 0x100000000 -> (X >> 32) < 1
5466 // X >= 0x100000000 -> (X >> 32) >= 1
5467 // X <= 0x0ffffffff -> (X >> 32) < 1
5468 // X > 0x0ffffffff -> (X >> 32) >= 1
5469 unsigned ShiftBits;
5470 APInt NewC = C1;
5471 ISD::CondCode NewCond = Cond;
5472 if (AdjOne) {
5473 ShiftBits = C1.countr_one();
5474 NewC = NewC + 1;
5475 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5476 } else {
5477 ShiftBits = C1.countr_zero();
5478 }
5479 NewC.lshrInPlace(ShiftBits);
5480 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5482 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5483 SDValue Shift =
5484 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5485 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5486 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5487 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5488 }
5489 }
5490 }
5491 }
5492
5494 auto *CFP = cast<ConstantFPSDNode>(N1);
5495 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5496
5497 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5498 // constant if knowing that the operand is non-nan is enough. We prefer to
5499 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5500 // materialize 0.0.
5501 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5502 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5503
5504 // setcc (fneg x), C -> setcc swap(pred) x, -C
5505 if (N0.getOpcode() == ISD::FNEG) {
5507 if (DCI.isBeforeLegalizeOps() ||
5508 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5509 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5510 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5511 }
5512 }
5513
5514 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5516 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5517 bool IsFabs = N0.getOpcode() == ISD::FABS;
5518 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5519 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5520 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5521 : (IsFabs ? fcInf : fcPosInf);
5522 if (Cond == ISD::SETUEQ)
5523 Flag |= fcNan;
5524 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5525 DAG.getTargetConstant(Flag, dl, MVT::i32));
5526 }
5527 }
5528
5529 // If the condition is not legal, see if we can find an equivalent one
5530 // which is legal.
5532 // If the comparison was an awkward floating-point == or != and one of
5533 // the comparison operands is infinity or negative infinity, convert the
5534 // condition to a less-awkward <= or >=.
5535 if (CFP->getValueAPF().isInfinity()) {
5536 bool IsNegInf = CFP->getValueAPF().isNegative();
5538 switch (Cond) {
5539 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5540 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5541 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5542 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5543 default: break;
5544 }
5545 if (NewCond != ISD::SETCC_INVALID &&
5546 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5547 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5548 }
5549 }
5550 }
5551
5552 if (N0 == N1) {
5553 // The sext(setcc()) => setcc() optimization relies on the appropriate
5554 // constant being emitted.
5555 assert(!N0.getValueType().isInteger() &&
5556 "Integer types should be handled by FoldSetCC");
5557
5558 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5559 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5560 if (UOF == 2) // FP operators that are undefined on NaNs.
5561 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5562 if (UOF == unsigned(EqTrue))
5563 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5564 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5565 // if it is not already.
5566 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5567 if (NewCond != Cond &&
5568 (DCI.isBeforeLegalizeOps() ||
5569 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5570 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5571 }
5572
5573 // ~X > ~Y --> Y > X
5574 // ~X < ~Y --> Y < X
5575 // ~X < C --> X > ~C
5576 // ~X > C --> X < ~C
5577 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5578 N0.getValueType().isInteger()) {
5579 if (isBitwiseNot(N0)) {
5580 if (isBitwiseNot(N1))
5581 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5582
5585 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5586 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5587 }
5588 }
5589 }
5590
5591 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5592 N0.getValueType().isInteger()) {
5593 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5594 N0.getOpcode() == ISD::XOR) {
5595 // Simplify (X+Y) == (X+Z) --> Y == Z
5596 if (N0.getOpcode() == N1.getOpcode()) {
5597 if (N0.getOperand(0) == N1.getOperand(0))
5598 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5599 if (N0.getOperand(1) == N1.getOperand(1))
5600 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5601 if (isCommutativeBinOp(N0.getOpcode())) {
5602 // If X op Y == Y op X, try other combinations.
5603 if (N0.getOperand(0) == N1.getOperand(1))
5604 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5605 Cond);
5606 if (N0.getOperand(1) == N1.getOperand(0))
5607 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5608 Cond);
5609 }
5610 }
5611
5612 // If RHS is a legal immediate value for a compare instruction, we need
5613 // to be careful about increasing register pressure needlessly.
5614 bool LegalRHSImm = false;
5615
5616 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5617 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5618 // Turn (X+C1) == C2 --> X == C2-C1
5619 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5620 return DAG.getSetCC(
5621 dl, VT, N0.getOperand(0),
5622 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5623 dl, N0.getValueType()),
5624 Cond);
5625
5626 // Turn (X^C1) == C2 --> X == C1^C2
5627 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5628 return DAG.getSetCC(
5629 dl, VT, N0.getOperand(0),
5630 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5631 dl, N0.getValueType()),
5632 Cond);
5633 }
5634
5635 // Turn (C1-X) == C2 --> X == C1-C2
5636 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5637 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5638 return DAG.getSetCC(
5639 dl, VT, N0.getOperand(1),
5640 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5641 dl, N0.getValueType()),
5642 Cond);
5643
5644 // Could RHSC fold directly into a compare?
5645 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5646 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5647 }
5648
5649 // (X+Y) == X --> Y == 0 and similar folds.
5650 // Don't do this if X is an immediate that can fold into a cmp
5651 // instruction and X+Y has other uses. It could be an induction variable
5652 // chain, and the transform would increase register pressure.
5653 if (!LegalRHSImm || N0.hasOneUse())
5654 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5655 return V;
5656 }
5657
5658 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5659 N1.getOpcode() == ISD::XOR)
5660 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5661 return V;
5662
5663 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5664 return V;
5665
5666 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5667 return V;
5668 }
5669
5670 // Fold remainder of division by a constant.
5671 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5672 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5673 // When division is cheap or optimizing for minimum size,
5674 // fall through to DIVREM creation by skipping this fold.
5675 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5676 if (N0.getOpcode() == ISD::UREM) {
5677 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5678 return Folded;
5679 } else if (N0.getOpcode() == ISD::SREM) {
5680 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5681 return Folded;
5682 }
5683 }
5684 }
5685
5686 // Fold away ALL boolean setcc's.
5687 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5688 SDValue Temp;
5689 switch (Cond) {
5690 default: llvm_unreachable("Unknown integer setcc!");
5691 case ISD::SETEQ: // X == Y -> ~(X^Y)
5692 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5693 N0 = DAG.getNOT(dl, Temp, OpVT);
5694 if (!DCI.isCalledByLegalizer())
5695 DCI.AddToWorklist(Temp.getNode());
5696 break;
5697 case ISD::SETNE: // X != Y --> (X^Y)
5698 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5699 break;
5700 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5701 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5702 Temp = DAG.getNOT(dl, N0, OpVT);
5703 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5704 if (!DCI.isCalledByLegalizer())
5705 DCI.AddToWorklist(Temp.getNode());
5706 break;
5707 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5708 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5709 Temp = DAG.getNOT(dl, N1, OpVT);
5710 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5711 if (!DCI.isCalledByLegalizer())
5712 DCI.AddToWorklist(Temp.getNode());
5713 break;
5714 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5715 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5716 Temp = DAG.getNOT(dl, N0, OpVT);
5717 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5718 if (!DCI.isCalledByLegalizer())
5719 DCI.AddToWorklist(Temp.getNode());
5720 break;
5721 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5722 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5723 Temp = DAG.getNOT(dl, N1, OpVT);
5724 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5725 break;
5726 }
5727 if (VT.getScalarType() != MVT::i1) {
5728 if (!DCI.isCalledByLegalizer())
5729 DCI.AddToWorklist(N0.getNode());
5730 // FIXME: If running after legalize, we probably can't do this.
5732 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5733 }
5734 return N0;
5735 }
5736
5737 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5738 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5739 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5741 N1->getFlags().hasNoUnsignedWrap()) ||
5743 N1->getFlags().hasNoSignedWrap())) &&
5745 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5746 }
5747
5748 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5749 // TODO: Remove that .isVector() check
5750 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5752 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5753 }
5754
5755 // Could not fold it.
5756 return SDValue();
5757}
5758
5759/// Returns true (and the GlobalValue and the offset) if the node is a
5760/// GlobalAddress + offset.
5762 int64_t &Offset) const {
5763
5764 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5765
5766 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5767 GA = GASD->getGlobal();
5768 Offset += GASD->getOffset();
5769 return true;
5770 }
5771
5772 if (N->isAnyAdd()) {
5773 SDValue N1 = N->getOperand(0);
5774 SDValue N2 = N->getOperand(1);
5775 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5776 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5777 Offset += V->getSExtValue();
5778 return true;
5779 }
5780 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5781 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5782 Offset += V->getSExtValue();
5783 return true;
5784 }
5785 }
5786 }
5787
5788 return false;
5789}
5790
5792 DAGCombinerInfo &DCI) const {
5793 // Default implementation: no optimization.
5794 return SDValue();
5795}
5796
5797//===----------------------------------------------------------------------===//
5798// Inline Assembler Implementation Methods
5799//===----------------------------------------------------------------------===//
5800
5803 unsigned S = Constraint.size();
5804
5805 if (S == 1) {
5806 switch (Constraint[0]) {
5807 default: break;
5808 case 'r':
5809 return C_RegisterClass;
5810 case 'm': // memory
5811 case 'o': // offsetable
5812 case 'V': // not offsetable
5813 return C_Memory;
5814 case 'p': // Address.
5815 return C_Address;
5816 case 'n': // Simple Integer
5817 case 'E': // Floating Point Constant
5818 case 'F': // Floating Point Constant
5819 return C_Immediate;
5820 case 'i': // Simple Integer or Relocatable Constant
5821 case 's': // Relocatable Constant
5822 case 'X': // Allow ANY value.
5823 case 'I': // Target registers.
5824 case 'J':
5825 case 'K':
5826 case 'L':
5827 case 'M':
5828 case 'N':
5829 case 'O':
5830 case 'P':
5831 case '<':
5832 case '>':
5833 return C_Other;
5834 }
5835 }
5836
5837 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5838 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5839 return C_Memory;
5840 return C_Register;
5841 }
5842 return C_Unknown;
5843}
5844
5845/// Try to replace an X constraint, which matches anything, with another that
5846/// has more specific requirements based on the type of the corresponding
5847/// operand.
5848const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5849 if (ConstraintVT.isInteger())
5850 return "r";
5851 if (ConstraintVT.isFloatingPoint())
5852 return "f"; // works for many targets
5853 return nullptr;
5854}
5855
5857 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5858 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5859 return SDValue();
5860}
5861
5862/// Lower the specified operand into the Ops vector.
5863/// If it is invalid, don't add anything to Ops.
5865 StringRef Constraint,
5866 std::vector<SDValue> &Ops,
5867 SelectionDAG &DAG) const {
5868
5869 if (Constraint.size() > 1)
5870 return;
5871
5872 char ConstraintLetter = Constraint[0];
5873 switch (ConstraintLetter) {
5874 default: break;
5875 case 'X': // Allows any operand
5876 case 'i': // Simple Integer or Relocatable Constant
5877 case 'n': // Simple Integer
5878 case 's': { // Relocatable Constant
5879
5881 uint64_t Offset = 0;
5882
5883 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5884 // etc., since getelementpointer is variadic. We can't use
5885 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5886 // while in this case the GA may be furthest from the root node which is
5887 // likely an ISD::ADD.
5888 while (true) {
5889 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5890 // gcc prints these as sign extended. Sign extend value to 64 bits
5891 // now; without this it would get ZExt'd later in
5892 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5893 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5894 BooleanContent BCont = getBooleanContents(MVT::i64);
5895 ISD::NodeType ExtOpc =
5896 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5897 int64_t ExtVal =
5898 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5899 Ops.push_back(
5900 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5901 return;
5902 }
5903 if (ConstraintLetter != 'n') {
5904 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5905 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5906 GA->getValueType(0),
5907 Offset + GA->getOffset()));
5908 return;
5909 }
5910 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5911 Ops.push_back(DAG.getTargetBlockAddress(
5912 BA->getBlockAddress(), BA->getValueType(0),
5913 Offset + BA->getOffset(), BA->getTargetFlags()));
5914 return;
5915 }
5917 Ops.push_back(Op);
5918 return;
5919 }
5920 }
5921 const unsigned OpCode = Op.getOpcode();
5922 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5923 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5924 Op = Op.getOperand(1);
5925 // Subtraction is not commutative.
5926 else if (OpCode == ISD::ADD &&
5927 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5928 Op = Op.getOperand(0);
5929 else
5930 return;
5931 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5932 continue;
5933 }
5934 return;
5935 }
5936 break;
5937 }
5938 }
5939}
5940
5944
5945std::pair<unsigned, const TargetRegisterClass *>
5947 StringRef Constraint,
5948 MVT VT) const {
5949 if (!Constraint.starts_with("{"))
5950 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5951 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5952
5953 // Remove the braces from around the name.
5954 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5955
5956 std::pair<unsigned, const TargetRegisterClass *> R =
5957 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5958
5959 // Figure out which register class contains this reg.
5960 for (const TargetRegisterClass *RC : RI->regclasses()) {
5961 // If none of the value types for this register class are valid, we
5962 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5963 if (!isLegalRC(*RI, *RC))
5964 continue;
5965
5966 for (const MCPhysReg &PR : *RC) {
5967 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5968 std::pair<unsigned, const TargetRegisterClass *> S =
5969 std::make_pair(PR, RC);
5970
5971 // If this register class has the requested value type, return it,
5972 // otherwise keep searching and return the first class found
5973 // if no other is found which explicitly has the requested type.
5974 if (RI->isTypeLegalForClass(*RC, VT))
5975 return S;
5976 if (!R.second)
5977 R = S;
5978 }
5979 }
5980 }
5981
5982 return R;
5983}
5984
5985//===----------------------------------------------------------------------===//
5986// Constraint Selection.
5987
5988/// Return true of this is an input operand that is a matching constraint like
5989/// "4".
5991 assert(!ConstraintCode.empty() && "No known constraint!");
5992 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5993}
5994
5995/// If this is an input matching constraint, this method returns the output
5996/// operand it matches.
5998 assert(!ConstraintCode.empty() && "No known constraint!");
5999 return atoi(ConstraintCode.c_str());
6000}
6001
6002/// Split up the constraint string from the inline assembly value into the
6003/// specific constraints and their prefixes, and also tie in the associated
6004/// operand values.
6005/// If this returns an empty vector, and if the constraint string itself
6006/// isn't empty, there was an error parsing.
6009 const TargetRegisterInfo *TRI,
6010 const CallBase &Call) const {
6011 /// Information about all of the constraints.
6012 AsmOperandInfoVector ConstraintOperands;
6013 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6014 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6015
6016 // Do a prepass over the constraints, canonicalizing them, and building up the
6017 // ConstraintOperands list.
6018 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6019 unsigned ResNo = 0; // ResNo - The result number of the next output.
6020 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6021
6022 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6023 ConstraintOperands.emplace_back(std::move(CI));
6024 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6025
6026 // Update multiple alternative constraint count.
6027 if (OpInfo.multipleAlternatives.size() > maCount)
6028 maCount = OpInfo.multipleAlternatives.size();
6029
6030 OpInfo.ConstraintVT = MVT::Other;
6031
6032 // Compute the value type for each operand.
6033 switch (OpInfo.Type) {
6034 case InlineAsm::isOutput: {
6035 // Indirect outputs just consume an argument.
6036 if (OpInfo.isIndirect) {
6037 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6038 break;
6039 }
6040
6041 // The return value of the call is this value. As such, there is no
6042 // corresponding argument.
6043 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6044 EVT VT;
6045 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6046 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6047 } else {
6048 assert(ResNo == 0 && "Asm only has one result!");
6049 VT = getAsmOperandValueType(DL, Call.getType());
6050 }
6051 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6052 ++ResNo;
6053 break;
6054 }
6055 case InlineAsm::isInput:
6056 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6057 break;
6058 case InlineAsm::isLabel:
6059 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6060 ++LabelNo;
6061 continue;
6063 // Nothing to do.
6064 break;
6065 }
6066
6067 if (OpInfo.CallOperandVal) {
6068 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6069 if (OpInfo.isIndirect) {
6070 OpTy = Call.getParamElementType(ArgNo);
6071 assert(OpTy && "Indirect operand must have elementtype attribute");
6072 }
6073
6074 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6075 if (StructType *STy = dyn_cast<StructType>(OpTy))
6076 if (STy->getNumElements() == 1)
6077 OpTy = STy->getElementType(0);
6078
6079 // If OpTy is not a single value, it may be a struct/union that we
6080 // can tile with integers.
6081 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6082 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6083 switch (BitSize) {
6084 default: break;
6085 case 1:
6086 case 8:
6087 case 16:
6088 case 32:
6089 case 64:
6090 case 128:
6091 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6092 break;
6093 }
6094 }
6095
6096 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6097 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6098 ArgNo++;
6099 }
6100 }
6101
6102 // If we have multiple alternative constraints, select the best alternative.
6103 if (!ConstraintOperands.empty()) {
6104 if (maCount) {
6105 unsigned bestMAIndex = 0;
6106 int bestWeight = -1;
6107 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6108 int weight = -1;
6109 unsigned maIndex;
6110 // Compute the sums of the weights for each alternative, keeping track
6111 // of the best (highest weight) one so far.
6112 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6113 int weightSum = 0;
6114 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6115 cIndex != eIndex; ++cIndex) {
6116 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6117 if (OpInfo.Type == InlineAsm::isClobber)
6118 continue;
6119
6120 // If this is an output operand with a matching input operand,
6121 // look up the matching input. If their types mismatch, e.g. one
6122 // is an integer, the other is floating point, or their sizes are
6123 // different, flag it as an maCantMatch.
6124 if (OpInfo.hasMatchingInput()) {
6125 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6126 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6127 if ((OpInfo.ConstraintVT.isInteger() !=
6128 Input.ConstraintVT.isInteger()) ||
6129 (OpInfo.ConstraintVT.getSizeInBits() !=
6130 Input.ConstraintVT.getSizeInBits())) {
6131 weightSum = -1; // Can't match.
6132 break;
6133 }
6134 }
6135 }
6136 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6137 if (weight == -1) {
6138 weightSum = -1;
6139 break;
6140 }
6141 weightSum += weight;
6142 }
6143 // Update best.
6144 if (weightSum > bestWeight) {
6145 bestWeight = weightSum;
6146 bestMAIndex = maIndex;
6147 }
6148 }
6149
6150 // Now select chosen alternative in each constraint.
6151 for (AsmOperandInfo &cInfo : ConstraintOperands)
6152 if (cInfo.Type != InlineAsm::isClobber)
6153 cInfo.selectAlternative(bestMAIndex);
6154 }
6155 }
6156
6157 // Check and hook up tied operands, choose constraint code to use.
6158 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6159 cIndex != eIndex; ++cIndex) {
6160 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6161
6162 // If this is an output operand with a matching input operand, look up the
6163 // matching input. If their types mismatch, e.g. one is an integer, the
6164 // other is floating point, or their sizes are different, flag it as an
6165 // error.
6166 if (OpInfo.hasMatchingInput()) {
6167 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6168
6169 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6170 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6171 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6172 OpInfo.ConstraintVT);
6173 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6174 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6175 Input.ConstraintVT);
6176 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6177 OpInfo.ConstraintVT.isFloatingPoint();
6178 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6179 Input.ConstraintVT.isFloatingPoint();
6180 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6181 (MatchRC.second != InputRC.second)) {
6182 report_fatal_error("Unsupported asm: input constraint"
6183 " with a matching output constraint of"
6184 " incompatible type!");
6185 }
6186 }
6187 }
6188 }
6189
6190 return ConstraintOperands;
6191}
6192
6193/// Return a number indicating our preference for chosing a type of constraint
6194/// over another, for the purpose of sorting them. Immediates are almost always
6195/// preferrable (when they can be emitted). A higher return value means a
6196/// stronger preference for one constraint type relative to another.
6197/// FIXME: We should prefer registers over memory but doing so may lead to
6198/// unrecoverable register exhaustion later.
6199/// https://github.com/llvm/llvm-project/issues/20571
6201 switch (CT) {
6204 return 4;
6207 return 3;
6209 return 2;
6211 return 1;
6213 return 0;
6214 }
6215 llvm_unreachable("Invalid constraint type");
6216}
6217
6218/// Examine constraint type and operand type and determine a weight value.
6219/// This object must already have been set up with the operand type
6220/// and the current alternative constraint selected.
6223 AsmOperandInfo &info, int maIndex) const {
6225 if (maIndex >= (int)info.multipleAlternatives.size())
6226 rCodes = &info.Codes;
6227 else
6228 rCodes = &info.multipleAlternatives[maIndex].Codes;
6229 ConstraintWeight BestWeight = CW_Invalid;
6230
6231 // Loop over the options, keeping track of the most general one.
6232 for (const std::string &rCode : *rCodes) {
6233 ConstraintWeight weight =
6234 getSingleConstraintMatchWeight(info, rCode.c_str());
6235 if (weight > BestWeight)
6236 BestWeight = weight;
6237 }
6238
6239 return BestWeight;
6240}
6241
6242/// Examine constraint type and operand type and determine a weight value.
6243/// This object must already have been set up with the operand type
6244/// and the current alternative constraint selected.
6247 AsmOperandInfo &info, const char *constraint) const {
6249 Value *CallOperandVal = info.CallOperandVal;
6250 // If we don't have a value, we can't do a match,
6251 // but allow it at the lowest weight.
6252 if (!CallOperandVal)
6253 return CW_Default;
6254 // Look at the constraint type.
6255 switch (*constraint) {
6256 case 'i': // immediate integer.
6257 case 'n': // immediate integer with a known value.
6258 if (isa<ConstantInt>(CallOperandVal))
6259 weight = CW_Constant;
6260 break;
6261 case 's': // non-explicit intregal immediate.
6262 if (isa<GlobalValue>(CallOperandVal))
6263 weight = CW_Constant;
6264 break;
6265 case 'E': // immediate float if host format.
6266 case 'F': // immediate float.
6267 if (isa<ConstantFP>(CallOperandVal))
6268 weight = CW_Constant;
6269 break;
6270 case '<': // memory operand with autodecrement.
6271 case '>': // memory operand with autoincrement.
6272 case 'm': // memory operand.
6273 case 'o': // offsettable memory operand
6274 case 'V': // non-offsettable memory operand
6275 weight = CW_Memory;
6276 break;
6277 case 'r': // general register.
6278 case 'g': // general register, memory operand or immediate integer.
6279 // note: Clang converts "g" to "imr".
6280 if (CallOperandVal->getType()->isIntegerTy())
6281 weight = CW_Register;
6282 break;
6283 case 'X': // any operand.
6284 default:
6285 weight = CW_Default;
6286 break;
6287 }
6288 return weight;
6289}
6290
6291/// If there are multiple different constraints that we could pick for this
6292/// operand (e.g. "imr") try to pick the 'best' one.
6293/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6294/// into seven classes:
6295/// Register -> one specific register
6296/// RegisterClass -> a group of regs
6297/// Memory -> memory
6298/// Address -> a symbolic memory reference
6299/// Immediate -> immediate values
6300/// Other -> magic values (such as "Flag Output Operands")
6301/// Unknown -> something we don't recognize yet and can't handle
6302/// Ideally, we would pick the most specific constraint possible: if we have
6303/// something that fits into a register, we would pick it. The problem here
6304/// is that if we have something that could either be in a register or in
6305/// memory that use of the register could cause selection of *other*
6306/// operands to fail: they might only succeed if we pick memory. Because of
6307/// this the heuristic we use is:
6308///
6309/// 1) If there is an 'other' constraint, and if the operand is valid for
6310/// that constraint, use it. This makes us take advantage of 'i'
6311/// constraints when available.
6312/// 2) Otherwise, pick the most general constraint present. This prefers
6313/// 'm' over 'r', for example.
6314///
6316 TargetLowering::AsmOperandInfo &OpInfo) const {
6317 ConstraintGroup Ret;
6318
6319 Ret.reserve(OpInfo.Codes.size());
6320 for (StringRef Code : OpInfo.Codes) {
6322
6323 // Indirect 'other' or 'immediate' constraints are not allowed.
6324 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6325 CType == TargetLowering::C_Register ||
6327 continue;
6328
6329 // Things with matching constraints can only be registers, per gcc
6330 // documentation. This mainly affects "g" constraints.
6331 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6332 continue;
6333
6334 Ret.emplace_back(Code, CType);
6335 }
6336
6338 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6339 });
6340
6341 return Ret;
6342}
6343
6344/// If we have an immediate, see if we can lower it. Return true if we can,
6345/// false otherwise.
6347 SDValue Op, SelectionDAG *DAG,
6348 const TargetLowering &TLI) {
6349
6350 assert((P.second == TargetLowering::C_Other ||
6351 P.second == TargetLowering::C_Immediate) &&
6352 "need immediate or other");
6353
6354 if (!Op.getNode())
6355 return false;
6356
6357 std::vector<SDValue> ResultOps;
6358 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6359 return !ResultOps.empty();
6360}
6361
6362/// Determines the constraint code and constraint type to use for the specific
6363/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6365 SDValue Op,
6366 SelectionDAG *DAG) const {
6367 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6368
6369 // Single-letter constraints ('r') are very common.
6370 if (OpInfo.Codes.size() == 1) {
6371 OpInfo.ConstraintCode = OpInfo.Codes[0];
6372 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6373 } else {
6375 if (G.empty())
6376 return;
6377
6378 unsigned BestIdx = 0;
6379 for (const unsigned E = G.size();
6380 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6381 G[BestIdx].second == TargetLowering::C_Immediate);
6382 ++BestIdx) {
6383 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6384 break;
6385 // If we're out of constraints, just pick the first one.
6386 if (BestIdx + 1 == E) {
6387 BestIdx = 0;
6388 break;
6389 }
6390 }
6391
6392 OpInfo.ConstraintCode = G[BestIdx].first;
6393 OpInfo.ConstraintType = G[BestIdx].second;
6394 }
6395
6396 // 'X' matches anything.
6397 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6398 // Constants are handled elsewhere. For Functions, the type here is the
6399 // type of the result, which is not what we want to look at; leave them
6400 // alone.
6401 Value *v = OpInfo.CallOperandVal;
6402 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6403 return;
6404 }
6405
6406 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6407 OpInfo.ConstraintCode = "i";
6408 return;
6409 }
6410
6411 // Otherwise, try to resolve it to something we know about by looking at
6412 // the actual operand type.
6413 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6414 OpInfo.ConstraintCode = Repl;
6415 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6416 }
6417 }
6418}
6419
6420/// Given an exact SDIV by a constant, create a multiplication
6421/// with the multiplicative inverse of the constant.
6422/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6424 const SDLoc &dl, SelectionDAG &DAG,
6425 SmallVectorImpl<SDNode *> &Created) {
6426 SDValue Op0 = N->getOperand(0);
6427 SDValue Op1 = N->getOperand(1);
6428 EVT VT = N->getValueType(0);
6429 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6430 EVT ShSVT = ShVT.getScalarType();
6431
6432 bool UseSRA = false;
6433 SmallVector<SDValue, 16> Shifts, Factors;
6434
6435 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6436 if (C->isZero())
6437 return false;
6438
6439 EVT CT = C->getValueType(0);
6440 APInt Divisor = C->getAPIntValue();
6441 unsigned Shift = Divisor.countr_zero();
6442 if (Shift) {
6443 Divisor.ashrInPlace(Shift);
6444 UseSRA = true;
6445 }
6446 APInt Factor = Divisor.multiplicativeInverse();
6447 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6448 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6449 return true;
6450 };
6451
6452 // Collect all magic values from the build vector.
6453 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6454 return SDValue();
6455
6456 SDValue Shift, Factor;
6457 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6458 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6459 Factor = DAG.getBuildVector(VT, dl, Factors);
6460 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6461 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6462 "Expected matchUnaryPredicate to return one element for scalable "
6463 "vectors");
6464 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6465 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6466 } else {
6467 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6468 Shift = Shifts[0];
6469 Factor = Factors[0];
6470 }
6471
6472 SDValue Res = Op0;
6473 if (UseSRA) {
6474 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6475 Created.push_back(Res.getNode());
6476 }
6477
6478 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6479}
6480
6481/// Given an exact UDIV by a constant, create a multiplication
6482/// with the multiplicative inverse of the constant.
6483/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6485 const SDLoc &dl, SelectionDAG &DAG,
6486 SmallVectorImpl<SDNode *> &Created) {
6487 EVT VT = N->getValueType(0);
6488 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6489 EVT ShSVT = ShVT.getScalarType();
6490
6491 bool UseSRL = false;
6492 SmallVector<SDValue, 16> Shifts, Factors;
6493
6494 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6495 if (C->isZero())
6496 return false;
6497
6498 EVT CT = C->getValueType(0);
6499 APInt Divisor = C->getAPIntValue();
6500 unsigned Shift = Divisor.countr_zero();
6501 if (Shift) {
6502 Divisor.lshrInPlace(Shift);
6503 UseSRL = true;
6504 }
6505 // Calculate the multiplicative inverse modulo BW.
6506 APInt Factor = Divisor.multiplicativeInverse();
6507 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6508 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6509 return true;
6510 };
6511
6512 SDValue Op1 = N->getOperand(1);
6513
6514 // Collect all magic values from the build vector.
6515 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6516 return SDValue();
6517
6518 SDValue Shift, Factor;
6519 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6520 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6521 Factor = DAG.getBuildVector(VT, dl, Factors);
6522 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6523 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6524 "Expected matchUnaryPredicate to return one element for scalable "
6525 "vectors");
6526 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6527 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6528 } else {
6529 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6530 Shift = Shifts[0];
6531 Factor = Factors[0];
6532 }
6533
6534 SDValue Res = N->getOperand(0);
6535 if (UseSRL) {
6536 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6537 Created.push_back(Res.getNode());
6538 }
6539
6540 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6541}
6542
6544 SelectionDAG &DAG,
6545 SmallVectorImpl<SDNode *> &Created) const {
6546 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6547 if (isIntDivCheap(N->getValueType(0), Attr))
6548 return SDValue(N, 0); // Lower SDIV as SDIV
6549 return SDValue();
6550}
6551
6552SDValue
6554 SelectionDAG &DAG,
6555 SmallVectorImpl<SDNode *> &Created) const {
6556 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6557 if (isIntDivCheap(N->getValueType(0), Attr))
6558 return SDValue(N, 0); // Lower SREM as SREM
6559 return SDValue();
6560}
6561
6562/// Build sdiv by power-of-2 with conditional move instructions
6563/// Ref: "Hacker's Delight" by Henry Warren 10-1
6564/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6565/// bgez x, label
6566/// add x, x, 2**k-1
6567/// label:
6568/// sra res, x, k
6569/// neg res, res (when the divisor is negative)
6571 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6572 SmallVectorImpl<SDNode *> &Created) const {
6573 unsigned Lg2 = Divisor.countr_zero();
6574 EVT VT = N->getValueType(0);
6575
6576 SDLoc DL(N);
6577 SDValue N0 = N->getOperand(0);
6578 SDValue Zero = DAG.getConstant(0, DL, VT);
6579 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6580 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6581
6582 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6583 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6584 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6585 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6586 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6587
6588 Created.push_back(Cmp.getNode());
6589 Created.push_back(Add.getNode());
6590 Created.push_back(CMov.getNode());
6591
6592 // Divide by pow2.
6593 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6594 DAG.getShiftAmountConstant(Lg2, VT, DL));
6595
6596 // If we're dividing by a positive value, we're done. Otherwise, we must
6597 // negate the result.
6598 if (Divisor.isNonNegative())
6599 return SRA;
6600
6601 Created.push_back(SRA.getNode());
6602 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6603}
6604
6605/// Given an ISD::SDIV node expressing a divide by constant,
6606/// return a DAG expression to select that will generate the same value by
6607/// multiplying by a magic number.
6608/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6610 bool IsAfterLegalization,
6611 bool IsAfterLegalTypes,
6612 SmallVectorImpl<SDNode *> &Created) const {
6613 SDLoc dl(N);
6614
6615 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6616 if (N->getFlags().hasExact())
6617 return BuildExactSDIV(*this, N, dl, DAG, Created);
6618
6619 EVT VT = N->getValueType(0);
6620 EVT SVT = VT.getScalarType();
6621 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6622 EVT ShSVT = ShVT.getScalarType();
6623 unsigned EltBits = VT.getScalarSizeInBits();
6624 EVT MulVT;
6625
6626 // Check to see if we can do this.
6627 // FIXME: We should be more aggressive here.
6628 EVT QueryVT = VT;
6629 if (VT.isVector()) {
6630 // If the vector type will be legalized to a vector type with the same
6631 // element type, allow the transform before type legalization if MULHS or
6632 // SMUL_LOHI are supported.
6633 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6634 if (!QueryVT.isVector() ||
6636 return SDValue();
6637 } else if (!isTypeLegal(VT)) {
6638 // Limit this to simple scalars for now.
6639 if (!VT.isSimple())
6640 return SDValue();
6641
6642 // If this type will be promoted to a large enough type with a legal
6643 // multiply operation, we can go ahead and do this transform.
6645 return SDValue();
6646
6647 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6648 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6649 !isOperationLegal(ISD::MUL, MulVT))
6650 return SDValue();
6651 }
6652
6653 bool HasMULHS =
6654 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6655 bool HasSMUL_LOHI =
6656 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6657
6658 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6659 // If type twice as wide legal, widen and use a mul plus a shift.
6660 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6661 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6662 // custom lowered. This is very expensive so avoid it at all costs for
6663 // constant divisors.
6664 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6667 MulVT = WideVT;
6668 }
6669
6670 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6671 return SDValue();
6672
6673 // If we're after type legalization and SVT is not legal, use the
6674 // promoted type for creating constants to avoid creating nodes with
6675 // illegal types.
6676 if (IsAfterLegalTypes && VT.isVector()) {
6677 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6678 if (SVT.bitsLT(VT.getScalarType()))
6679 return SDValue();
6680 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6681 if (ShSVT.bitsLT(ShVT.getScalarType()))
6682 return SDValue();
6683 }
6684 const unsigned SVTBits = SVT.getSizeInBits();
6685
6686 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6687
6688 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6689 if (C->isZero())
6690 return false;
6691 // Truncate the divisor to the target scalar type in case it was promoted
6692 // during type legalization.
6693 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6695 int NumeratorFactor = 0;
6696 int ShiftMask = -1;
6697
6698 if (Divisor.isOne() || Divisor.isAllOnes()) {
6699 // If d is +1/-1, we just multiply the numerator by +1/-1.
6700 NumeratorFactor = Divisor.getSExtValue();
6701 magics.Magic = 0;
6702 magics.ShiftAmount = 0;
6703 ShiftMask = 0;
6704 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6705 // If d > 0 and m < 0, add the numerator.
6706 NumeratorFactor = 1;
6707 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6708 // If d < 0 and m > 0, subtract the numerator.
6709 NumeratorFactor = -1;
6710 }
6711
6712 MagicFactors.push_back(
6713 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6714 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6715 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6716 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6717 return true;
6718 };
6719
6720 SDValue N0 = N->getOperand(0);
6721 SDValue N1 = N->getOperand(1);
6722
6723 // Collect the shifts / magic values from each element.
6724 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6725 /*AllowTruncation=*/true))
6726 return SDValue();
6727
6728 SDValue MagicFactor, Factor, Shift, ShiftMask;
6729 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6730 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6731 Factor = DAG.getBuildVector(VT, dl, Factors);
6732 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6733 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6734 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6735 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6736 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6737 "Expected matchUnaryPredicate to return one element for scalable "
6738 "vectors");
6739 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6740 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6741 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6742 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6743 } else {
6744 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6745 MagicFactor = MagicFactors[0];
6746 Factor = Factors[0];
6747 Shift = Shifts[0];
6748 ShiftMask = ShiftMasks[0];
6749 }
6750
6751 // Multiply the numerator (operand 0) by the magic value.
6752 auto GetMULHS = [&](SDValue X, SDValue Y) {
6753 if (HasMULHS)
6754 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6755 if (HasSMUL_LOHI) {
6756 SDValue LoHi =
6757 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6758 return LoHi.getValue(1);
6759 }
6760
6761 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6762 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6763 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6764 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6765 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6766 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6767 };
6768
6769 SDValue Q = GetMULHS(N0, MagicFactor);
6770 if (!Q)
6771 return SDValue();
6772
6773 Created.push_back(Q.getNode());
6774
6775 // (Optionally) Add/subtract the numerator using Factor.
6776 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6777 Created.push_back(Factor.getNode());
6778 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6779 Created.push_back(Q.getNode());
6780
6781 // Shift right algebraic by shift value.
6782 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6783 Created.push_back(Q.getNode());
6784
6785 // Extract the sign bit, mask it and add it to the quotient.
6786 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6787 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6788 Created.push_back(T.getNode());
6789 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6790 Created.push_back(T.getNode());
6791 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6792}
6793
6794/// Given an ISD::UDIV node expressing a divide by constant,
6795/// return a DAG expression to select that will generate the same value by
6796/// multiplying by a magic number.
6797/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6799 bool IsAfterLegalization,
6800 bool IsAfterLegalTypes,
6801 SmallVectorImpl<SDNode *> &Created) const {
6802 SDLoc dl(N);
6803
6804 // If the udiv has an 'exact' bit we can use a simpler lowering.
6805 if (N->getFlags().hasExact())
6806 return BuildExactUDIV(*this, N, dl, DAG, Created);
6807
6808 EVT VT = N->getValueType(0);
6809 EVT SVT = VT.getScalarType();
6810 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6811 EVT ShSVT = ShVT.getScalarType();
6812 unsigned EltBits = VT.getScalarSizeInBits();
6813 EVT MulVT;
6814
6815 // Check to see if we can do this.
6816 // FIXME: We should be more aggressive here.
6817 EVT QueryVT = VT;
6818 if (VT.isVector()) {
6819 // If the vector type will be legalized to a vector type with the same
6820 // element type, allow the transform before type legalization if MULHU or
6821 // UMUL_LOHI are supported.
6822 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6823 if (!QueryVT.isVector() ||
6825 return SDValue();
6826 } else if (!isTypeLegal(VT)) {
6827 // Limit this to simple scalars for now.
6828 if (!VT.isSimple())
6829 return SDValue();
6830
6831 // If this type will be promoted to a large enough type with a legal
6832 // multiply operation, we can go ahead and do this transform.
6834 return SDValue();
6835
6836 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6837 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6838 !isOperationLegal(ISD::MUL, MulVT))
6839 return SDValue();
6840 }
6841
6842 bool HasMULHU =
6843 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6844 bool HasUMUL_LOHI =
6845 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6846
6847 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6848 // If type twice as wide legal, widen and use a mul plus a shift.
6849 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6850 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6851 // custom lowered. This is very expensive so avoid it at all costs for
6852 // constant divisors.
6853 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6856 MulVT = WideVT;
6857 }
6858
6859 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6860 return SDValue();
6861
6862 SDValue N0 = N->getOperand(0);
6863 SDValue N1 = N->getOperand(1);
6864
6865 // Try to use leading zeros of the dividend to reduce the multiplier and
6866 // avoid expensive fixups.
6867 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6868
6869 // If we're after type legalization and SVT is not legal, use the
6870 // promoted type for creating constants to avoid creating nodes with
6871 // illegal types.
6872 if (IsAfterLegalTypes && VT.isVector()) {
6873 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6874 if (SVT.bitsLT(VT.getScalarType()))
6875 return SDValue();
6876 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6877 if (ShSVT.bitsLT(ShVT.getScalarType()))
6878 return SDValue();
6879 }
6880 const unsigned SVTBits = SVT.getSizeInBits();
6881
6882 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6883 // UMUL_LOHI is supported.
6884 const EVT WideSVT = MVT::i64;
6885 const bool HasWideMULHU =
6886 VT == MVT::i32 &&
6887 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6888 const bool HasWideUMUL_LOHI =
6889 VT == MVT::i32 &&
6890 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6891 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6892
6893 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6894 bool UseWiden = false;
6895 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6896
6897 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6898 if (C->isZero())
6899 return false;
6900 // Truncate the divisor to the target scalar type in case it was promoted
6901 // during type legalization.
6902 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6903
6904 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6905
6906 // Magic algorithm doesn't work for division by 1. We need to emit a select
6907 // at the end.
6908 if (Divisor.isOne()) {
6909 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6910 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6911 } else {
6914 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6915 /*AllowEvenDivisorOptimization=*/true,
6916 /*AllowWidenOptimization=*/AllowWiden);
6917
6918 if (magics.Widen) {
6919 UseWiden = true;
6920 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6921 } else {
6922 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6923 }
6924
6925 assert(magics.PreShift < Divisor.getBitWidth() &&
6926 "We shouldn't generate an undefined shift!");
6927 assert(magics.PostShift < Divisor.getBitWidth() &&
6928 "We shouldn't generate an undefined shift!");
6929 assert((!magics.IsAdd || magics.PreShift == 0) &&
6930 "Unexpected pre-shift");
6931 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6932 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6933 NPQFactor = DAG.getConstant(
6934 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6935 : APInt::getZero(SVTBits),
6936 dl, SVT);
6937 UseNPQ |= magics.IsAdd;
6938 UsePreShift |= magics.PreShift != 0;
6939 UsePostShift |= magics.PostShift != 0;
6940 }
6941
6942 PreShifts.push_back(PreShift);
6943 MagicFactors.push_back(MagicFactor);
6944 NPQFactors.push_back(NPQFactor);
6945 PostShifts.push_back(PostShift);
6946 return true;
6947 };
6948
6949 // Collect the shifts/magic values from each element.
6950 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6951 /*AllowTruncation=*/true))
6952 return SDValue();
6953
6954 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6955 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6956 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6957 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6958 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6959 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6960 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6961 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6962 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6963 "Expected matchUnaryPredicate to return one for scalable vectors");
6964 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6965 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6966 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6967 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6968 } else {
6969 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6970 PreShift = PreShifts[0];
6971 MagicFactor = MagicFactors[0];
6972 PostShift = PostShifts[0];
6973 }
6974
6975 if (UseWiden) {
6976 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6977 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6978
6979 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6980 // WideSVT bits
6981 SDValue High;
6982 if (HasWideMULHU) {
6983 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6984 } else {
6985 assert(HasWideUMUL_LOHI);
6986 SDValue LoHi =
6987 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6988 WideN0, MagicFactor);
6989 High = LoHi.getValue(1);
6990 }
6991
6992 Created.push_back(High.getNode());
6993 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6994 }
6995
6996 SDValue Q = N0;
6997 if (UsePreShift) {
6998 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6999 Created.push_back(Q.getNode());
7000 }
7001
7002 auto GetMULHU = [&](SDValue X, SDValue Y) {
7003 if (HasMULHU)
7004 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
7005 if (HasUMUL_LOHI) {
7006 SDValue LoHi =
7007 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
7008 return LoHi.getValue(1);
7009 }
7010
7011 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7012 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7013 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7014 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7015 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7016 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7017 };
7018
7019 // Multiply the numerator (operand 0) by the magic value.
7020 Q = GetMULHU(Q, MagicFactor);
7021 if (!Q)
7022 return SDValue();
7023
7024 Created.push_back(Q.getNode());
7025
7026 if (UseNPQ) {
7027 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7028 Created.push_back(NPQ.getNode());
7029
7030 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7031 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7032 if (VT.isVector())
7033 NPQ = GetMULHU(NPQ, NPQFactor);
7034 else
7035 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7036
7037 Created.push_back(NPQ.getNode());
7038
7039 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7040 Created.push_back(Q.getNode());
7041 }
7042
7043 if (UsePostShift) {
7044 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7045 Created.push_back(Q.getNode());
7046 }
7047
7048 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7049
7050 SDValue One = DAG.getConstant(1, dl, VT);
7051 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7052 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7053}
7054
7055/// If all values in Values that *don't* match the predicate are same 'splat'
7056/// value, then replace all values with that splat value.
7057/// Else, if AlternativeReplacement was provided, then replace all values that
7058/// do match predicate with AlternativeReplacement value.
7059static void
7061 std::function<bool(SDValue)> Predicate,
7062 SDValue AlternativeReplacement = SDValue()) {
7063 SDValue Replacement;
7064 // Is there a value for which the Predicate does *NOT* match? What is it?
7065 auto SplatValue = llvm::find_if_not(Values, Predicate);
7066 if (SplatValue != Values.end()) {
7067 // Does Values consist only of SplatValue's and values matching Predicate?
7068 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7069 return Value == *SplatValue || Predicate(Value);
7070 })) // Then we shall replace values matching predicate with SplatValue.
7071 Replacement = *SplatValue;
7072 }
7073 if (!Replacement) {
7074 // Oops, we did not find the "baseline" splat value.
7075 if (!AlternativeReplacement)
7076 return; // Nothing to do.
7077 // Let's replace with provided value then.
7078 Replacement = AlternativeReplacement;
7079 }
7080 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7081}
7082
7083/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7084/// where the divisor and comparison target are constants,
7085/// return a DAG expression that will generate the same comparison result
7086/// using only multiplications, additions and shifts/rotations.
7087/// Ref: "Hacker's Delight" 10-17.
7088SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7089 SDValue CompTargetNode,
7091 DAGCombinerInfo &DCI,
7092 const SDLoc &DL) const {
7094 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7095 DCI, DL, Built)) {
7096 for (SDNode *N : Built)
7097 DCI.AddToWorklist(N);
7098 return Folded;
7099 }
7100
7101 return SDValue();
7102}
7103
7104SDValue
7105TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7106 SDValue CompTargetNode, ISD::CondCode Cond,
7107 DAGCombinerInfo &DCI, const SDLoc &DL,
7108 SmallVectorImpl<SDNode *> &Created) const {
7109 // fold (seteq/ne (urem N, D), C) ->
7110 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7111 // - D must be constant, with D = D0 * 2^K where D0 is odd
7112 // - P is the multiplicative inverse of D0 modulo 2^W
7113 // - Q = floor(((2^W) - 1) / D)
7114 // where W is the width of the common type of N and D.
7115 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7116 "Only applicable for (in)equality comparisons.");
7117
7118 SelectionDAG &DAG = DCI.DAG;
7119
7120 EVT VT = REMNode.getValueType();
7121 EVT SVT = VT.getScalarType();
7122 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7123 EVT ShSVT = ShVT.getScalarType();
7124
7125 // If MUL is unavailable, we cannot proceed in any case.
7126 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7127 return SDValue();
7128
7129 bool ComparingWithAllZeros = true;
7130 bool AllComparisonsWithNonZerosAreTautological = true;
7131 bool HadTautologicalLanes = false;
7132 bool AllLanesAreTautological = true;
7133 bool HadEvenDivisor = false;
7134 bool AllDivisorsArePowerOfTwo = true;
7135 bool HadTautologicalInvertedLanes = false;
7136 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7137
7138 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7139 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7140 if (CDiv->isZero())
7141 return false;
7142
7143 const APInt &D = CDiv->getAPIntValue();
7144 const APInt &Cmp = CCmp->getAPIntValue();
7145
7146 ComparingWithAllZeros &= Cmp.isZero();
7147
7148 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7149 // if C2 is not less than C1, the comparison is always false.
7150 // But we will only be able to produce the comparison that will give the
7151 // opposive tautological answer. So this lane would need to be fixed up.
7152 bool TautologicalInvertedLane = D.ule(Cmp);
7153 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7154
7155 // If all lanes are tautological (either all divisors are ones, or divisor
7156 // is not greater than the constant we are comparing with),
7157 // we will prefer to avoid the fold.
7158 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7159 HadTautologicalLanes |= TautologicalLane;
7160 AllLanesAreTautological &= TautologicalLane;
7161
7162 // If we are comparing with non-zero, we need'll need to subtract said
7163 // comparison value from the LHS. But there is no point in doing that if
7164 // every lane where we are comparing with non-zero is tautological..
7165 if (!Cmp.isZero())
7166 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7167
7168 // Decompose D into D0 * 2^K
7169 unsigned K = D.countr_zero();
7170 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7171 APInt D0 = D.lshr(K);
7172
7173 // D is even if it has trailing zeros.
7174 HadEvenDivisor |= (K != 0);
7175 // D is a power-of-two if D0 is one.
7176 // If all divisors are power-of-two, we will prefer to avoid the fold.
7177 AllDivisorsArePowerOfTwo &= D0.isOne();
7178
7179 // P = inv(D0, 2^W)
7180 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7181 unsigned W = D.getBitWidth();
7182 APInt P = D0.multiplicativeInverse();
7183 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7184
7185 // Q = floor((2^W - 1) u/ D)
7186 // R = ((2^W - 1) u% D)
7187 APInt Q, R;
7189
7190 // If we are comparing with zero, then that comparison constant is okay,
7191 // else it may need to be one less than that.
7192 if (Cmp.ugt(R))
7193 Q -= 1;
7194
7196 "We are expecting that K is always less than all-ones for ShSVT");
7197
7198 // If the lane is tautological the result can be constant-folded.
7199 if (TautologicalLane) {
7200 // Set P and K amount to a bogus values so we can try to splat them.
7201 P = 0;
7202 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7203 // And ensure that comparison constant is tautological,
7204 // it will always compare true/false.
7205 Q.setAllBits();
7206 } else {
7207 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7208 }
7209
7210 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7211 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7212 return true;
7213 };
7214
7215 SDValue N = REMNode.getOperand(0);
7216 SDValue D = REMNode.getOperand(1);
7217
7218 // Collect the values from each element.
7219 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7220 return SDValue();
7221
7222 // If all lanes are tautological, the result can be constant-folded.
7223 if (AllLanesAreTautological)
7224 return SDValue();
7225
7226 // If this is a urem by a powers-of-two, avoid the fold since it can be
7227 // best implemented as a bit test.
7228 if (AllDivisorsArePowerOfTwo)
7229 return SDValue();
7230
7231 SDValue PVal, KVal, QVal;
7232 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7233 if (HadTautologicalLanes) {
7234 // Try to turn PAmts into a splat, since we don't care about the values
7235 // that are currently '0'. If we can't, just keep '0'`s.
7237 // Try to turn KAmts into a splat, since we don't care about the values
7238 // that are currently '-1'. If we can't, change them to '0'`s.
7240 DAG.getConstant(0, DL, ShSVT));
7241 }
7242
7243 PVal = DAG.getBuildVector(VT, DL, PAmts);
7244 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7245 QVal = DAG.getBuildVector(VT, DL, QAmts);
7246 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7247 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7248 "Expected matchBinaryPredicate to return one element for "
7249 "SPLAT_VECTORs");
7250 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7251 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7252 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7253 } else {
7254 PVal = PAmts[0];
7255 KVal = KAmts[0];
7256 QVal = QAmts[0];
7257 }
7258
7259 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7260 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7261 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7262 assert(CompTargetNode.getValueType() == N.getValueType() &&
7263 "Expecting that the types on LHS and RHS of comparisons match.");
7264 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7265 }
7266
7267 // (mul N, P)
7268 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7269 Created.push_back(Op0.getNode());
7270
7271 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7272 // divisors as a performance improvement, since rotating by 0 is a no-op.
7273 if (HadEvenDivisor) {
7274 // We need ROTR to do this.
7275 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7276 return SDValue();
7277 // UREM: (rotr (mul N, P), K)
7278 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7279 Created.push_back(Op0.getNode());
7280 }
7281
7282 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7283 SDValue NewCC =
7284 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7286 if (!HadTautologicalInvertedLanes)
7287 return NewCC;
7288
7289 // If any lanes previously compared always-false, the NewCC will give
7290 // always-true result for them, so we need to fixup those lanes.
7291 // Or the other way around for inequality predicate.
7292 assert(VT.isVector() && "Can/should only get here for vectors.");
7293 Created.push_back(NewCC.getNode());
7294
7295 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7296 // if C2 is not less than C1, the comparison is always false.
7297 // But we have produced the comparison that will give the
7298 // opposive tautological answer. So these lanes would need to be fixed up.
7299 SDValue TautologicalInvertedChannels =
7300 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7301 Created.push_back(TautologicalInvertedChannels.getNode());
7302
7303 // NOTE: we avoid letting illegal types through even if we're before legalize
7304 // ops – legalization has a hard time producing good code for this.
7305 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7306 // If we have a vector select, let's replace the comparison results in the
7307 // affected lanes with the correct tautological result.
7308 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7309 DL, SETCCVT, SETCCVT);
7310 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7311 Replacement, NewCC);
7312 }
7313
7314 // Else, we can just invert the comparison result in the appropriate lanes.
7315 //
7316 // NOTE: see the note above VSELECT above.
7317 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7318 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7319 TautologicalInvertedChannels);
7320
7321 return SDValue(); // Don't know how to lower.
7322}
7323
7324/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7325/// where the divisor is constant and the comparison target is zero,
7326/// return a DAG expression that will generate the same comparison result
7327/// using only multiplications, additions and shifts/rotations.
7328/// Ref: "Hacker's Delight" 10-17.
7329SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7330 SDValue CompTargetNode,
7332 DAGCombinerInfo &DCI,
7333 const SDLoc &DL) const {
7335 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7336 DCI, DL, Built)) {
7337 assert(Built.size() <= 7 && "Max size prediction failed.");
7338 for (SDNode *N : Built)
7339 DCI.AddToWorklist(N);
7340 return Folded;
7341 }
7342
7343 return SDValue();
7344}
7345
7346SDValue
7347TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7348 SDValue CompTargetNode, ISD::CondCode Cond,
7349 DAGCombinerInfo &DCI, const SDLoc &DL,
7350 SmallVectorImpl<SDNode *> &Created) const {
7351 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7352 // Fold:
7353 // (seteq/ne (srem N, D), 0)
7354 // To:
7355 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7356 //
7357 // - D must be constant, with D = D0 * 2^K where D0 is odd
7358 // - P is the multiplicative inverse of D0 modulo 2^W
7359 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7360 // - Q = floor((2 * A) / (2^K))
7361 // where W is the width of the common type of N and D.
7362 //
7363 // When D is a power of two (and thus D0 is 1), the normal
7364 // formula for A and Q don't apply, because the derivation
7365 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7366 // does not apply. This specifically fails when N = INT_MIN.
7367 //
7368 // Instead, for power-of-two D, we use:
7369 // - A = 0
7370 // | -> No offset needed. We're effectively treating it the same as urem.
7371 // - Q = 2^(W-K) - 1
7372 // |-> Test that the top K bits are zero after rotation
7373 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7374 "Only applicable for (in)equality comparisons.");
7375
7376 SelectionDAG &DAG = DCI.DAG;
7377
7378 EVT VT = REMNode.getValueType();
7379 EVT SVT = VT.getScalarType();
7380 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7381 EVT ShSVT = ShVT.getScalarType();
7382
7383 // If we are after ops legalization, and MUL is unavailable, we can not
7384 // proceed.
7385 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7386 return SDValue();
7387
7388 // TODO: Could support comparing with non-zero too.
7389 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7390 if (!CompTarget || !CompTarget->isZero())
7391 return SDValue();
7392
7393 bool HadOneDivisor = false;
7394 bool AllDivisorsAreOnes = true;
7395 bool HadEvenDivisor = false;
7396 bool AllDivisorsArePowerOfTwo = true;
7397 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7398
7399 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7400 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7401 if (C->isZero())
7402 return false;
7403
7404 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7405
7406 // WARNING: this fold is only valid for positive divisors!
7407 // `rem %X, -C` is equivalent to `rem %X, C`
7408 APInt D = C->getAPIntValue().abs();
7409
7410 // If all divisors are ones, we will prefer to avoid the fold.
7411 HadOneDivisor |= D.isOne();
7412 AllDivisorsAreOnes &= D.isOne();
7413
7414 // Decompose D into D0 * 2^K
7415 unsigned K = D.countr_zero();
7416 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7417 APInt D0 = D.lshr(K);
7418
7419 // D is even if it has trailing zeros.
7420 HadEvenDivisor |= (K != 0);
7421
7422 // D is a power-of-two if D0 is one. This includes INT_MIN.
7423 // If all divisors are power-of-two, we will prefer to avoid the fold.
7424 AllDivisorsArePowerOfTwo &= D0.isOne();
7425
7426 // P = inv(D0, 2^W)
7427 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7428 unsigned W = D.getBitWidth();
7429 APInt P = D0.multiplicativeInverse();
7430 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7431
7432 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7433 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7434 A.clearLowBits(K);
7435
7436 // Q = floor((2 * A) / (2^K))
7437 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7438
7440 "We are expecting that A is always less than all-ones for SVT");
7442 "We are expecting that K is always less than all-ones for ShSVT");
7443
7444 // If D was a power of two, apply the alternate constant derivation.
7445 if (D0.isOne()) {
7446 // A = 0
7447 A = APInt(W, 0);
7448 // - Q = 2^(W-K) - 1
7449 Q = APInt::getLowBitsSet(W, W - K);
7450 }
7451
7452 // If the divisor is 1 the result can be constant-folded.
7453 if (D.isOne()) {
7454 // Set P, A and K to a bogus values so we can try to splat them.
7455 P = 0;
7456 A.setAllBits();
7457 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7458
7459 // x ?% 1 == 0 <--> true <--> x u<= -1
7460 Q.setAllBits();
7461 } else {
7462 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7463 }
7464
7465 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7466 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7467 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7468 return true;
7469 };
7470
7471 SDValue N = REMNode.getOperand(0);
7472 SDValue D = REMNode.getOperand(1);
7473
7474 // Collect the values from each element.
7475 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7476 return SDValue();
7477
7478 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7479 if (AllDivisorsAreOnes)
7480 return SDValue();
7481
7482 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7483 // since it can be best implemented as a bit test.
7484 if (AllDivisorsArePowerOfTwo)
7485 return SDValue();
7486
7487 SDValue PVal, AVal, KVal, QVal;
7488 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7489 if (HadOneDivisor) {
7490 // Try to turn PAmts into a splat, since we don't care about the values
7491 // that are currently '0'. If we can't, just keep '0'`s.
7493 // Try to turn AAmts into a splat, since we don't care about the
7494 // values that are currently '-1'. If we can't, change them to '0'`s.
7496 DAG.getConstant(0, DL, SVT));
7497 // Try to turn KAmts into a splat, since we don't care about the values
7498 // that are currently '-1'. If we can't, change them to '0'`s.
7500 DAG.getConstant(0, DL, ShSVT));
7501 }
7502
7503 PVal = DAG.getBuildVector(VT, DL, PAmts);
7504 AVal = DAG.getBuildVector(VT, DL, AAmts);
7505 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7506 QVal = DAG.getBuildVector(VT, DL, QAmts);
7507 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7508 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7509 QAmts.size() == 1 &&
7510 "Expected matchUnaryPredicate to return one element for scalable "
7511 "vectors");
7512 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7513 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7514 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7515 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7516 } else {
7517 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7518 PVal = PAmts[0];
7519 AVal = AAmts[0];
7520 KVal = KAmts[0];
7521 QVal = QAmts[0];
7522 }
7523
7524 // (mul N, P)
7525 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7526 Created.push_back(Op0.getNode());
7527
7528 // We need ADD to do this.
7529 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7530 return SDValue();
7531
7532 // (add (mul N, P), A)
7533 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7534 Created.push_back(Op0.getNode());
7535
7536 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7537 // divisors as a performance improvement, since rotating by 0 is a no-op.
7538 if (HadEvenDivisor) {
7539 // We need ROTR to do this.
7540 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7541 return SDValue();
7542 // SREM: (rotr (add (mul N, P), A), K)
7543 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7544 Created.push_back(Op0.getNode());
7545 }
7546
7547 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7548 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7550}
7551
7553 const DenormalMode &Mode,
7554 SDNodeFlags Flags) const {
7555 SDLoc DL(Op);
7556 EVT VT = Op.getValueType();
7557 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7558 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7559
7560 // This is specifically a check for the handling of denormal inputs, not the
7561 // result.
7562 if (Mode.Input == DenormalMode::PreserveSign ||
7563 Mode.Input == DenormalMode::PositiveZero) {
7564 // Test = X == 0.0
7565 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7566 /*Signaling=*/false, Flags);
7567 }
7568
7569 // Testing it with denormal inputs to avoid wrong estimate.
7570 //
7571 // Test = fabs(X) < SmallestNormal
7572 const fltSemantics &FltSem = VT.getFltSemantics();
7573 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7574 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7575 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7576 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7577 /*Signaling=*/false, Flags);
7578}
7579
7581 bool LegalOps, bool OptForSize,
7583 unsigned Depth) const {
7584 // fneg is removable even if it has multiple uses.
7585 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7587 return Op.getOperand(0);
7588 }
7589
7590 // Don't recurse exponentially.
7592 return SDValue();
7593
7594 // Pre-increment recursion depth for use in recursive calls.
7595 ++Depth;
7596 const SDNodeFlags Flags = Op->getFlags();
7597 EVT VT = Op.getValueType();
7598 unsigned Opcode = Op.getOpcode();
7599
7600 // Don't allow anything with multiple uses unless we know it is free.
7601 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7602 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7603 isFPExtFree(VT, Op.getOperand(0).getValueType());
7604 if (!IsFreeExtend)
7605 return SDValue();
7606 }
7607
7608 auto RemoveDeadNode = [&](SDValue N) {
7609 if (N && N.getNode()->use_empty())
7610 DAG.RemoveDeadNode(N.getNode());
7611 };
7612
7613 SDLoc DL(Op);
7614
7615 // Because getNegatedExpression can delete nodes we need a handle to keep
7616 // temporary nodes alive in case the recursion manages to create an identical
7617 // node.
7618 std::list<HandleSDNode> Handles;
7619
7620 switch (Opcode) {
7621 case ISD::ConstantFP: {
7622 // Don't invert constant FP values after legalization unless the target says
7623 // the negated constant is legal.
7624 bool IsOpLegal =
7626 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7627 OptForSize);
7628
7629 if (LegalOps && !IsOpLegal)
7630 break;
7631
7632 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7633 V.changeSign();
7634 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7635
7636 // If we already have the use of the negated floating constant, it is free
7637 // to negate it even it has multiple uses.
7638 if (!Op.hasOneUse() && CFP.use_empty())
7639 break;
7641 return CFP;
7642 }
7643 case ISD::SPLAT_VECTOR: {
7644 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7645 SDValue X = Op.getOperand(0);
7647 break;
7648
7649 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7650 if (!NegX)
7651 break;
7653 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7654 }
7655 case ISD::BUILD_VECTOR: {
7656 // Only permit BUILD_VECTOR of constants.
7657 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7658 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7659 }))
7660 break;
7661
7662 bool IsOpLegal =
7665 llvm::all_of(Op->op_values(), [&](SDValue N) {
7666 return N.isUndef() ||
7667 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7668 OptForSize);
7669 });
7670
7671 if (LegalOps && !IsOpLegal)
7672 break;
7673
7675 for (SDValue C : Op->op_values()) {
7676 if (C.isUndef()) {
7677 Ops.push_back(C);
7678 continue;
7679 }
7680 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7681 V.changeSign();
7682 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7683 }
7685 return DAG.getBuildVector(VT, DL, Ops);
7686 }
7687 case ISD::FADD: {
7688 if (!Flags.hasNoSignedZeros())
7689 break;
7690
7691 // After operation legalization, it might not be legal to create new FSUBs.
7692 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7693 break;
7694 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7695
7696 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7698 SDValue NegX =
7699 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7700 // Prevent this node from being deleted by the next call.
7701 if (NegX)
7702 Handles.emplace_back(NegX);
7703
7704 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7706 SDValue NegY =
7707 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7708
7709 // We're done with the handles.
7710 Handles.clear();
7711
7712 // Negate the X if its cost is less or equal than Y.
7713 if (NegX && (CostX <= CostY)) {
7714 Cost = CostX;
7715 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7716 if (NegY != N)
7717 RemoveDeadNode(NegY);
7718 return N;
7719 }
7720
7721 // Negate the Y if it is not expensive.
7722 if (NegY) {
7723 Cost = CostY;
7724 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7725 if (NegX != N)
7726 RemoveDeadNode(NegX);
7727 return N;
7728 }
7729 break;
7730 }
7731 case ISD::FSUB: {
7732 // We can't turn -(A-B) into B-A when we honor signed zeros.
7733 if (!Flags.hasNoSignedZeros())
7734 break;
7735
7736 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7737 // fold (fneg (fsub 0, Y)) -> Y
7738 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7739 if (C->isZero()) {
7741 return Y;
7742 }
7743
7744 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7746 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7747 }
7748 case ISD::FMUL:
7749 case ISD::FDIV: {
7750 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7751
7752 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7754 SDValue NegX =
7755 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7756 // Prevent this node from being deleted by the next call.
7757 if (NegX)
7758 Handles.emplace_back(NegX);
7759
7760 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7762 SDValue NegY =
7763 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7764
7765 // We're done with the handles.
7766 Handles.clear();
7767
7768 // Negate the X if its cost is less or equal than Y.
7769 if (NegX && (CostX <= CostY)) {
7770 Cost = CostX;
7771 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7772 if (NegY != N)
7773 RemoveDeadNode(NegY);
7774 return N;
7775 }
7776
7777 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7778 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7779 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7780 break;
7781
7782 // Negate the Y if it is not expensive.
7783 if (NegY) {
7784 Cost = CostY;
7785 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7786 if (NegX != N)
7787 RemoveDeadNode(NegX);
7788 return N;
7789 }
7790 break;
7791 }
7792 case ISD::FMA:
7793 case ISD::FMULADD:
7794 case ISD::FMAD: {
7795 if (!Flags.hasNoSignedZeros())
7796 break;
7797
7798 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7800 SDValue NegZ =
7801 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7802 // Give up if fail to negate the Z.
7803 if (!NegZ)
7804 break;
7805
7806 // Prevent this node from being deleted by the next two calls.
7807 Handles.emplace_back(NegZ);
7808
7809 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7811 SDValue NegX =
7812 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7813 // Prevent this node from being deleted by the next call.
7814 if (NegX)
7815 Handles.emplace_back(NegX);
7816
7817 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7819 SDValue NegY =
7820 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7821
7822 // We're done with the handles.
7823 Handles.clear();
7824
7825 // Negate the X if its cost is less or equal than Y.
7826 if (NegX && (CostX <= CostY)) {
7827 Cost = std::min(CostX, CostZ);
7828 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7829 if (NegY != N)
7830 RemoveDeadNode(NegY);
7831 return N;
7832 }
7833
7834 // Negate the Y if it is not expensive.
7835 if (NegY) {
7836 Cost = std::min(CostY, CostZ);
7837 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7838 if (NegX != N)
7839 RemoveDeadNode(NegX);
7840 return N;
7841 }
7842 break;
7843 }
7844
7845 case ISD::FP_EXTEND:
7846 case ISD::FSIN:
7847 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7848 OptForSize, Cost, Depth))
7849 return DAG.getNode(Opcode, DL, VT, NegV);
7850 break;
7851 case ISD::FP_ROUND:
7852 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7853 OptForSize, Cost, Depth))
7854 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7855 break;
7856 case ISD::SELECT:
7857 case ISD::VSELECT: {
7858 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7859 // iff at least one cost is cheaper and the other is neutral/cheaper
7860 SDValue LHS = Op.getOperand(1);
7862 SDValue NegLHS =
7863 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7864 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7865 RemoveDeadNode(NegLHS);
7866 break;
7867 }
7868
7869 // Prevent this node from being deleted by the next call.
7870 Handles.emplace_back(NegLHS);
7871
7872 SDValue RHS = Op.getOperand(2);
7874 SDValue NegRHS =
7875 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7876
7877 // We're done with the handles.
7878 Handles.clear();
7879
7880 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7881 (CostLHS != NegatibleCost::Cheaper &&
7882 CostRHS != NegatibleCost::Cheaper)) {
7883 RemoveDeadNode(NegLHS);
7884 RemoveDeadNode(NegRHS);
7885 break;
7886 }
7887
7888 Cost = std::min(CostLHS, CostRHS);
7889 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7890 }
7891 }
7892
7893 return SDValue();
7894}
7895
7896//===----------------------------------------------------------------------===//
7897// Legalization Utilities
7898//===----------------------------------------------------------------------===//
7899
7900bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7901 SDValue LHS, SDValue RHS,
7903 EVT HiLoVT, SelectionDAG &DAG,
7904 MulExpansionKind Kind, SDValue LL,
7905 SDValue LH, SDValue RL, SDValue RH) const {
7906 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7907 Opcode == ISD::SMUL_LOHI);
7908
7909 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7911 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7913 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7915 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7917
7918 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7919 return false;
7920
7921 unsigned OuterBitSize = VT.getScalarSizeInBits();
7922 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7923
7924 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7925 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7926 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7927
7928 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7929 bool Signed) -> bool {
7930 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7931 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7932 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7933 Hi = Lo.getValue(1);
7934 return true;
7935 }
7936 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7937 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7938 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7939 return true;
7940 }
7941 return false;
7942 };
7943
7944 SDValue Lo, Hi;
7945
7946 if (!LL.getNode() && !RL.getNode() &&
7948 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7949 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7950 }
7951
7952 if (!LL.getNode())
7953 return false;
7954
7955 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7956 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7957 DAG.MaskedValueIsZero(RHS, HighMask)) {
7958 // The inputs are both zero-extended.
7959 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7960 Result.push_back(Lo);
7961 Result.push_back(Hi);
7962 if (Opcode != ISD::MUL) {
7963 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7964 Result.push_back(Zero);
7965 Result.push_back(Zero);
7966 }
7967 return true;
7968 }
7969 }
7970
7971 if (!VT.isVector() && Opcode == ISD::MUL &&
7972 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7973 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7974 // The input values are both sign-extended.
7975 // TODO non-MUL case?
7976 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7977 Result.push_back(Lo);
7978 Result.push_back(Hi);
7979 return true;
7980 }
7981 }
7982
7983 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7984 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7985
7986 if (!LH.getNode() && !RH.getNode() &&
7989 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7990 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7991 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7992 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7993 }
7994
7995 if (!LH.getNode())
7996 return false;
7997
7998 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7999 return false;
8000
8001 Result.push_back(Lo);
8002
8003 if (Opcode == ISD::MUL) {
8004 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
8005 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
8006 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8007 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8008 Result.push_back(Hi);
8009 return true;
8010 }
8011
8012 // Compute the full width result.
8013 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8014 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8015 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8016 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8017 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8018 };
8019
8020 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8021 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8022 return false;
8023
8024 // This is effectively the add part of a multiply-add of half-sized operands,
8025 // so it cannot overflow.
8026 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8027
8028 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8029 return false;
8030
8031 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8032 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8033
8034 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8036 if (UseGlue)
8037 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8038 Merge(Lo, Hi));
8039 else
8040 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8041 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8042
8043 SDValue Carry = Next.getValue(1);
8044 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8045 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8046
8047 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8048 return false;
8049
8050 if (UseGlue)
8051 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8052 Carry);
8053 else
8054 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8055 Zero, Carry);
8056
8057 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8058
8059 if (Opcode == ISD::SMUL_LOHI) {
8060 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8061 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8062 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8063
8064 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8065 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8066 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8067 }
8068
8069 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8070 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8071 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8072 return true;
8073}
8074
8076 SelectionDAG &DAG, MulExpansionKind Kind,
8077 SDValue LL, SDValue LH, SDValue RL,
8078 SDValue RH) const {
8080 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8081 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8082 DAG, Kind, LL, LH, RL, RH);
8083 if (Ok) {
8084 assert(Result.size() == 2);
8085 Lo = Result[0];
8086 Hi = Result[1];
8087 }
8088 return Ok;
8089}
8090
8091// Optimize unsigned division or remainder by constants for types twice as large
8092// as a legal VT.
8093//
8094// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8095// can be computed
8096// as:
8097// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8098// Remainder = Sum % Constant;
8099//
8100// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8101// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8102// High:Low into 3 chunks of W bits and compute remainder as
8103// Sum = Chunk0 + Chunk1 + Chunk2;
8104// Remainder = Sum % Constant;
8105//
8106// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8107//
8108// For division, we can compute the remainder using the algorithm described
8109// above, subtract it from the dividend to get an exact multiple of Constant.
8110// Then multiply that exact multiply by the multiplicative inverse modulo
8111// (1 << (BitWidth / 2)) to get the quotient.
8112
8113// If Constant is even, we can shift right the dividend and the divisor by the
8114// number of trailing zeros in Constant before applying the remainder algorithm.
8115// If we're after the quotient, we can subtract this value from the shifted
8116// dividend and multiply by the multiplicative inverse of the shifted divisor.
8117// If we want the remainder, we shift the value left by the number of trailing
8118// zeros and add the bits that were shifted out of the dividend.
8119bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8120 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8121 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8122 unsigned Opcode = N->getOpcode();
8123 EVT VT = N->getValueType(0);
8124
8125 unsigned BitWidth = Divisor.getBitWidth();
8126 unsigned HBitWidth = BitWidth / 2;
8128 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8129
8130 // If the divisor is even, shift it until it becomes odd.
8131 unsigned TrailingZeros = 0;
8132 if (!Divisor[0]) {
8133 TrailingZeros = Divisor.countr_zero();
8134 Divisor.lshrInPlace(TrailingZeros);
8135 }
8136
8137 // After removing trailing zeros, the divisor needs to be less than
8138 // (1 << HBitWidth).
8139 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8140 if (Divisor.uge(HalfMaxPlus1))
8141 return false;
8142
8143 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8144 // (1 << W) % Divisor == -1.
8145 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8146 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8147 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8148 if (I == HBitWidth - 1)
8149 continue;
8150
8151 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8152
8153 if (Mod.isOne()) {
8154 BestChunkWidth = I;
8155 break;
8156 }
8157
8158 // We have an alternate strategy for Remainder == Divisor - 1.
8159 // FIXME: Support HBitWidth.
8160 if (I != HBitWidth && Mod == Divisor - 1)
8161 AltChunkWidth = I;
8162 }
8163
8164 bool Alternate = false;
8165 if (!BestChunkWidth) {
8166 if (!AltChunkWidth)
8167 return false;
8168 Alternate = true;
8169 BestChunkWidth = AltChunkWidth;
8170 }
8171
8172 SDLoc dl(N);
8173
8174 assert(!LL == !LH && "Expected both input halves or no input halves!");
8175 if (!LL)
8176 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8177
8178 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8179
8180 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8181 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8182 if (HasFSHR)
8183 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8184 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8185 return DAG.getNode(
8186 ISD::OR, dl, HiLoVT,
8187 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8188 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8189 DAG.getNode(
8190 ISD::SHL, dl, HiLoVT, Hi,
8191 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8192 };
8193
8194 // Helper to perform a right shift on a 128-bit value split into two halves.
8195 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8196 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8197 if (ShiftAmt == 0)
8198 return;
8199 if (ShiftAmt < HBitWidth) {
8200 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8201 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8202 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8203 } else if (ShiftAmt == HBitWidth) {
8204 Lo = Hi;
8205 Hi = DAG.getConstant(0, dl, HiLoVT);
8206 } else {
8207 Lo = DAG.getNode(
8208 ISD::SRL, dl, HiLoVT, Hi,
8209 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8210 Hi = DAG.getConstant(0, dl, HiLoVT);
8211 }
8212 };
8213
8214 // Shift the input by the number of TrailingZeros in the divisor. The
8215 // shifted out bits will be added to the remainder later.
8216 SDValue PartialRemL, PartialRemH;
8217 if (TrailingZeros && Opcode != ISD::UDIV) {
8218 // Save the shifted off bits if we need the remainder.
8219 if (TrailingZeros < HBitWidth) {
8220 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8221 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8222 DAG.getConstant(Mask, dl, HiLoVT));
8223 } else if (TrailingZeros == HBitWidth) {
8224 // All of LL is part of the remainder.
8225 PartialRemL = LL;
8226 } else {
8227 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8228 PartialRemL = LL;
8229 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8230 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8231 DAG.getConstant(Mask, dl, HiLoVT));
8232 }
8233 }
8234
8235 SDValue Sum;
8236 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8237 // out, add that to the final sum.
8238 if (BestChunkWidth == HBitWidth) {
8239 assert(!Alternate);
8240 // Shift LH:LL right if there were trailing zeros in the divisor.
8241 ShiftRight(LL, LH, TrailingZeros);
8242
8243 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8244 EVT SetCCType =
8245 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8247 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8248 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8249 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8250 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8251 } else {
8252 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8253 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8254 // If the boolean for the target is 0 or 1, we can add the setcc result
8255 // directly.
8256 if (getBooleanContents(HiLoVT) ==
8258 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8259 else
8260 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8261 DAG.getConstant(0, dl, HiLoVT));
8262 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8263 }
8264 } else {
8265 // Otherwise split into multple chunks and add them together. We chose
8266 // BestChunkWidth so that the sum will not overflow.
8267 SDValue Mask = DAG.getConstant(
8268 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8269
8270 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8271 // If there were trailing zeros in the divisor, increase the shift amount.
8272 unsigned Shift = I + TrailingZeros;
8273 SDValue Chunk;
8274 if (Shift == 0)
8275 Chunk = LL;
8276 else if (Shift >= HBitWidth)
8277 Chunk = DAG.getNode(
8278 ISD::SRL, dl, HiLoVT, LH,
8279 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8280 else
8281 Chunk = GetFSHR(LL, LH, Shift);
8282 // If we're on the last chunk, we don't need an AND.
8283 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8284 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8285 if (!Sum) {
8286 Sum = Chunk;
8287 } else {
8288 // For Alternate, we need to subtract odd chunks.
8289 unsigned ChunkNum = I / BestChunkWidth;
8290 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8291 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8292 }
8293 }
8294
8295 // For Alternate, the sum may be negative, but we need a positive sum. We
8296 // can increase it by a multiple of the divisor to make it positive. For 3
8297 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8298 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8299 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8300 // positive.
8301 if (Alternate) {
8302 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8303 assert(NumChunks <= 4);
8304
8305 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8306 Adjust.setBit(0);
8307 // If there are 4 chunks, we need to adjust twice.
8308 if (NumChunks == 4)
8309 Adjust <<= 1;
8310 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8311 DAG.getConstant(Adjust, dl, HiLoVT));
8312 }
8313 }
8314
8315 // Perform a HiLoVT urem on the Sum using truncated divisor.
8316 SDValue RemL =
8317 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8318 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8319 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8320
8321 if (Opcode != ISD::UREM) {
8322 // If we didn't shift LH/LR earlier, do it now.
8323 if (BestChunkWidth != HBitWidth)
8324 ShiftRight(LL, LH, TrailingZeros);
8325
8326 // Subtract the remainder from the shifted dividend.
8327 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8328 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8329
8330 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8331
8332 // Multiply by the multiplicative inverse of the divisor modulo
8333 // (1 << BitWidth).
8334 APInt MulFactor = Divisor.multiplicativeInverse();
8335
8336 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8337 DAG.getConstant(MulFactor, dl, VT));
8338
8339 // Split the quotient into low and high parts.
8340 SDValue QuotL, QuotH;
8341 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8342 Result.push_back(QuotL);
8343 Result.push_back(QuotH);
8344 }
8345
8346 if (Opcode != ISD::UDIV) {
8347 // If we shifted the input, shift the remainder left and add the bits we
8348 // shifted off the input.
8349 if (TrailingZeros) {
8350 if (TrailingZeros < HBitWidth) {
8351 // Shift RemH:RemL left by TrailingZeros.
8352 // RemH gets the high bits shifted out of RemL.
8353 RemH = DAG.getNode(
8354 ISD::SRL, dl, HiLoVT, RemL,
8355 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8356 RemL =
8357 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8358 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8359 // OR in the partial remainder.
8360 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8362 } else if (TrailingZeros == HBitWidth) {
8363 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8364 // PartialRemL.
8365 RemH = RemL;
8366 RemL = PartialRemL;
8367 } else {
8368 // Shift left by more than HBitWidth.
8369 RemH = DAG.getNode(
8370 ISD::SHL, dl, HiLoVT, RemL,
8371 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8372 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8374 RemL = PartialRemL;
8375 }
8376 }
8377 Result.push_back(RemL);
8378 Result.push_back(RemH);
8379 }
8380
8381 return true;
8382}
8383
8384bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8385 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8386 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8387
8388 SDValue N0 = N->getOperand(0);
8389 EVT VT = N0->getValueType(0);
8390 SDLoc DL{N};
8391
8392 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8393
8394 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8395 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8396 const APInt &Const,
8397 SmallVectorImpl<SDValue> &Result) {
8398 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8399 SDValue RHS = DAG.getConstant(Const, DL, VT);
8400 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8401 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8403 LL, LH, RL, RH);
8404 };
8405
8406 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8407 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8408 SDValue RH) {
8409 SDValue AddSubNode =
8411 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8412 SDValue OutL = AddSubNode.getValue(0);
8413 SDValue Overflow = AddSubNode.getValue(1);
8414 SDValue AddSubWithOverflow =
8416 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8417 SDValue OutH = AddSubWithOverflow.getValue(0);
8418 return std::make_pair(OutL, OutH);
8419 };
8420
8421 // This helper creates a SRL of the pair (LL, LH) by Shift.
8422 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8423 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8424 if (Shift < HBitWidth) {
8425 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8426 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8427 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8428 return std::make_pair(ResL, ResH);
8429 }
8430 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8431 if (Shift == HBitWidth)
8432 return std::make_pair(LH, Zero);
8433 assert(Shift - HBitWidth < HBitWidth &&
8434 "We shouldn't generate an undefined shift");
8435 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8436 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8437 };
8438
8439 // Knowledge of leading zeros may help to reduce the multiplier.
8440 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8441
8442 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8443 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8444
8445 assert(!LL == !LH && "Expected both input halves or no input halves!");
8446 if (!LL)
8447 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8448 SDValue QL = LL;
8449 SDValue QH = LH;
8450 if (Magics.PreShift != 0)
8451 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8452
8453 SmallVector<SDValue, 4> UMulResult;
8454 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8455 return false;
8456
8457 QL = UMulResult[2];
8458 QH = UMulResult[3];
8459
8460 if (Magics.IsAdd) {
8461 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8462 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8463 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8464 }
8465
8466 if (Magics.PostShift != 0)
8467 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8468
8469 unsigned Opcode = N->getOpcode();
8470 if (Opcode != ISD::UREM) {
8471 Result.push_back(QL);
8472 Result.push_back(QH);
8473 }
8474
8475 if (Opcode != ISD::UDIV) {
8476 SmallVector<SDValue, 2> MulResult;
8477 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8478 return false;
8479
8480 assert(MulResult.size() == 2);
8481
8482 auto [RemL, RemH] =
8483 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8484
8485 Result.push_back(RemL);
8486 Result.push_back(RemH);
8487 }
8488
8489 return true;
8490}
8491
8494 EVT HiLoVT, SelectionDAG &DAG,
8495 SDValue LL, SDValue LH) const {
8496 unsigned Opcode = N->getOpcode();
8497
8498 // TODO: Support signed division/remainder.
8499 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8500 return false;
8501 assert(
8502 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8503 "Unexpected opcode");
8504
8505 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8506 if (!CN)
8507 return false;
8508
8509 APInt Divisor = CN->getAPIntValue();
8510
8511 // We depend on the UREM by constant optimization in DAGCombiner that requires
8512 // high multiply.
8513 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8515 return false;
8516
8517 // Don't expand if optimizing for size.
8518 if (DAG.shouldOptForSize())
8519 return false;
8520
8521 // Early out for 0 or 1 divisors.
8522 if (Divisor.ule(1))
8523 return false;
8524
8525 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8526 DAG, LL, LH))
8527 return true;
8528
8529 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8530 LH))
8531 return true;
8532
8533 return false;
8534}
8535
8536// Check that (every element of) Z is undef or not an exact multiple of BW.
8537static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8539 Z,
8540 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8541 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8542}
8543
8545 EVT VT = Node->getValueType(0);
8546 SDValue ShX, ShY;
8547 SDValue ShAmt, InvShAmt;
8548 SDValue X = Node->getOperand(0);
8549 SDValue Y = Node->getOperand(1);
8550 SDValue Z = Node->getOperand(2);
8551 SDValue Mask = Node->getOperand(3);
8552 SDValue VL = Node->getOperand(4);
8553
8554 unsigned BW = VT.getScalarSizeInBits();
8555 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8556 SDLoc DL(SDValue(Node, 0));
8557
8558 EVT ShVT = Z.getValueType();
8559 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8560 // fshl: X << C | Y >> (BW - C)
8561 // fshr: X << (BW - C) | Y >> C
8562 // where C = Z % BW is not zero
8563 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8564 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8565 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8566 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8567 VL);
8568 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8569 VL);
8570 } else {
8571 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8572 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8573 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8574 if (isPowerOf2_32(BW)) {
8575 // Z % BW -> Z & (BW - 1)
8576 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8577 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8578 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8579 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8580 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8581 } else {
8582 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8583 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8584 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8585 }
8586
8587 SDValue One = DAG.getConstant(1, DL, ShVT);
8588 if (IsFSHL) {
8589 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8590 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8591 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8592 } else {
8593 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8594 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8595 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8596 }
8597 }
8598 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8599}
8600
8602 SelectionDAG &DAG) const {
8603 if (Node->isVPOpcode())
8604 return expandVPFunnelShift(Node, DAG);
8605
8606 EVT VT = Node->getValueType(0);
8607
8608 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8612 return SDValue();
8613
8614 SDValue X = Node->getOperand(0);
8615 SDValue Y = Node->getOperand(1);
8616 SDValue Z = Node->getOperand(2);
8617
8618 unsigned BW = VT.getScalarSizeInBits();
8619 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8620 SDLoc DL(SDValue(Node, 0));
8621
8622 EVT ShVT = Z.getValueType();
8623
8624 // If a funnel shift in the other direction is more supported, use it.
8625 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8626 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8627 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8628 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8629 // fshl X, Y, Z -> fshr X, Y, -Z
8630 // fshr X, Y, Z -> fshl X, Y, -Z
8631 Z = DAG.getNegative(Z, DL, ShVT);
8632 } else {
8633 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8634 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8635 SDValue One = DAG.getConstant(1, DL, ShVT);
8636 if (IsFSHL) {
8637 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8638 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8639 } else {
8640 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8641 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8642 }
8643 Z = DAG.getNOT(DL, Z, ShVT);
8644 }
8645 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8646 }
8647
8648 SDValue ShX, ShY;
8649 SDValue ShAmt, InvShAmt;
8650 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8651 // fshl: X << C | Y >> (BW - C)
8652 // fshr: X << (BW - C) | Y >> C
8653 // where C = Z % BW is not zero
8654 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8655 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8656 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8657 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8658 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8659 } else {
8660 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8661 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8662 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8663 if (isPowerOf2_32(BW)) {
8664 // Z % BW -> Z & (BW - 1)
8665 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8666 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8667 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8668 } else {
8669 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8670 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8671 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8672 }
8673
8674 SDValue One = DAG.getConstant(1, DL, ShVT);
8675 if (IsFSHL) {
8676 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8677 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8678 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8679 } else {
8680 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8681 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8682 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8683 }
8684 }
8685 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8686}
8687
8688// TODO: Merge with expandFunnelShift.
8690 SelectionDAG &DAG) const {
8691 EVT VT = Node->getValueType(0);
8692 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8693 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8694 SDValue Op0 = Node->getOperand(0);
8695 SDValue Op1 = Node->getOperand(1);
8696 SDLoc DL(SDValue(Node, 0));
8697
8698 EVT ShVT = Op1.getValueType();
8699 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8700
8701 // If a rotate in the other direction is more supported, use it.
8702 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8703 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8704 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8705 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8706 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8707 }
8708
8709 if (!AllowVectorOps && VT.isVector() &&
8715 return SDValue();
8716
8717 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8718 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8719 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8720 SDValue ShVal;
8721 SDValue HsVal;
8722 if (isPowerOf2_32(EltSizeInBits)) {
8723 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8724 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8725 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8726 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8727 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8728 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8729 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8730 } else {
8731 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8732 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8733 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8734 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8735 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8736 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8737 SDValue One = DAG.getConstant(1, DL, ShVT);
8738 HsVal =
8739 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8740 }
8741 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8742}
8743
8744/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8745/// a chain of halving decompositions (halving element width) and/or vector
8746/// widening (doubling element count). This guides expansion strategy selection:
8747/// if true, the halving/widening path produces better code than bit-by-bit.
8748///
8749/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8750/// Widening steps are cheap (O(1) pad/extract) and don't count.
8751/// Limiting halvings to 2 prevents exponential blowup:
8752/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8753/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8754/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8756 EVT VT, unsigned HalveDepth = 0,
8757 unsigned TotalDepth = 0) {
8758 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8759 return false;
8761 return true;
8762 if (!TLI.isTypeLegal(VT))
8763 return false;
8764
8765 unsigned BW = VT.getScalarSizeInBits();
8766
8767 // Halve: halve element width, same element count.
8768 // This is the expensive step -- each halving creates ~4x more operations.
8769 if (BW % 2 == 0) {
8770 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8771 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8772 if (TLI.isTypeLegal(HalfVT) &&
8773 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8774 return true;
8775 }
8776
8777 // Widen: double element count (fixed-width vectors only).
8778 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8779 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8780 if (TLI.isTypeLegal(WideVT) &&
8781 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8782 return true;
8783
8784 return false;
8785}
8786
8788 SDLoc DL(Node);
8789 EVT VT = Node->getValueType(0);
8790 SDValue X = Node->getOperand(0);
8791 SDValue Y = Node->getOperand(1);
8792 unsigned BW = VT.getScalarSizeInBits();
8793 unsigned Opcode = Node->getOpcode();
8794 LLVMContext &Ctx = *DAG.getContext();
8795
8796 switch (Opcode) {
8797 case ISD::CLMUL: {
8798 // For vector types, try decomposition strategies that leverage legal
8799 // CLMUL on narrower or wider element types, avoiding the expensive
8800 // bit-by-bit expansion.
8801 if (VT.isVector()) {
8802 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8803 // Applies ExpandIntRes_CLMUL's identity element-wise:
8804 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8805 // where:
8806 // Lo = CLMUL(XLo, YLo)
8807 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8808 unsigned HalfBW = BW / 2;
8809 if (BW % 2 == 0) {
8810 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8811 EVT HalfVT =
8812 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8813 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8814 /*HalveDepth=*/1)) {
8815 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8816
8817 // Extract low and high halves of each element.
8818 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8819 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8820 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8821 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8822 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8823 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8824
8825 // Lo = CLMUL(XLo, YLo)
8826 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8827
8828 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8829 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8830 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8831 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8832 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8833 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8834
8835 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8836 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8837 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8838 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8839 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8840 }
8841 }
8842
8843 // Strategy 2: Promote to double-element-width CLMUL.
8844 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8845 {
8846 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8847 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8848 // If CLMUL on ExtVT is Custom (not Legal), the target may
8849 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8850 // fallback costs O(BW) vectorized iterations. Only widen when
8851 // element count is small enough that scalarization is cheaper.
8852 unsigned NumElts = VT.getVectorMinNumElements();
8853 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8854 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8855 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8856 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8857 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8858 }
8859 }
8860 }
8861
8862 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8863 // vector, extract lower result). CLMUL is element-wise, so upper
8864 // (undef) lanes don't affect the lower results.
8865 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8866 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8867 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8868 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8869 SDValue Undef = DAG.getUNDEF(WideVT);
8870 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8871 X, DAG.getVectorIdxConstant(0, DL));
8872 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8873 Y, DAG.getVectorIdxConstant(0, DL));
8874 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8875 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8876 DAG.getVectorIdxConstant(0, DL));
8877 }
8878 }
8879 }
8880
8881 // NOTE: If you change this expansion, please update the cost model
8882 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8883 // Intrinsic::clmul.
8884
8885 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8886
8887 SDValue Res = DAG.getConstant(0, DL, VT);
8888 for (unsigned I = 0; I < BW; ++I) {
8889 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8890 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8891 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8892
8893 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8894 // multiply, use a shift-based expansion to avoid expensive MUL
8895 // instructions.
8896 SDValue Part;
8897 if (!hasBitTest(Y, ShiftAmt) &&
8900 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8901 } else {
8902 // Canonical bit test: (Y & (1 << I)) != 0
8903 SDValue Zero = DAG.getConstant(0, DL, VT);
8904 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8905 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8906 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8907 }
8908 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8909 }
8910 return Res;
8911 }
8912 case ISD::CLMULR:
8913 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8916 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8917 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8918 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8919 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8920 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8921 DAG.getShiftAmountConstant(1, VT, DL));
8922 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8923 }
8924 [[fallthrough]];
8925 case ISD::CLMULH: {
8926 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8927 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8928 // when any of these hold:
8929 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8930 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8931 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8932 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8933 // path creates CLMUL(VT) which will be expanded efficiently. The
8934 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8935 // causing a cycle.
8936 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8937 // => trunc path is preferred over the bitreverse path, as it avoids the
8938 // cost of 3 bitreverse operations.
8943 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8944 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8945 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8946 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8947 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8948 if (Opcode == ISD::CLMULH)
8949 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8950 DAG.getShiftAmountConstant(1, VT, DL));
8951 return Res;
8952 }
8953 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8954 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8955 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8956 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8957 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8958 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8959 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8960 }
8961 }
8962 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8963}
8964
8966 SelectionDAG &DAG) const {
8967 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8968 EVT VT = Node->getValueType(0);
8969 unsigned VTBits = VT.getScalarSizeInBits();
8970 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8971
8972 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8973 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8974 SDValue ShOpLo = Node->getOperand(0);
8975 SDValue ShOpHi = Node->getOperand(1);
8976 SDValue ShAmt = Node->getOperand(2);
8977 EVT ShAmtVT = ShAmt.getValueType();
8978 EVT ShAmtCCVT =
8979 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8980 SDLoc dl(Node);
8981
8982 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8983 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8984 // away during isel.
8985 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8986 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8987 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8988 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8989 : DAG.getConstant(0, dl, VT);
8990
8991 SDValue Tmp2, Tmp3;
8992 if (IsSHL) {
8993 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8994 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8995 } else {
8996 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8997 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8998 }
8999
9000 // If the shift amount is larger or equal than the width of a part we don't
9001 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
9002 // values for large shift amounts.
9003 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9004 DAG.getConstant(VTBits, dl, ShAmtVT));
9005 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
9006 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
9007
9008 if (IsSHL) {
9009 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9010 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9011 } else {
9012 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9013 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9014 }
9015}
9016
9018 SelectionDAG &DAG) const {
9019 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9020 // suggested in
9021 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9022 // It uses strict_fp operations even outside a strict_fp context in order
9023 // to guarantee that the canonicalization is not optimized away by later
9024 // passes. The result chain introduced by that is intentionally ignored
9025 // since no ordering requirement is intended here.
9026 EVT VT = Node->getValueType(0);
9027 SDLoc DL(Node);
9028 SDNodeFlags Flags = Node->getFlags();
9029 Flags.setNoFPExcept(true);
9030 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9031 SDValue Mul =
9032 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9033 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9034 return Mul;
9035}
9036
9037SDValue
9039 SelectionDAG &DAG) const {
9040 SDLoc dl(Node);
9041 EVT DstVT = Node->getValueType(0);
9042 EVT DstScalarVT = DstVT.getScalarType();
9043
9044 SDValue IntVal = Node->getOperand(0);
9045 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9046 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9047
9048 // Supported source formats.
9049 switch (Sem) {
9055 break;
9056 default:
9057 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9058 "source format (semantics enum " +
9059 Twine(SemEnum) + ")");
9060 return SDValue();
9061 }
9062
9063 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9064 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9065 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9066 const unsigned SrcMant = SrcPrecision - 1;
9067 const unsigned SrcExp = SrcBits - SrcMant - 1;
9068 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9069 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9070
9071 // Destination format parameters.
9072 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9073 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9074 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9075 const unsigned DstExpBits = DstBits - DstMant - 1;
9076 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9077 const int DstBias = 1 - DstMinExp;
9078 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9079
9080 // Work in an integer type matching the destination float width.
9081 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9082 EVT IntVT = DstVT.isVector()
9083 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9084 DstVT.getVectorElementCount())
9085 : IntScalarVT;
9086
9087 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9088
9089 EVT SetCCVT =
9090 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9091
9092 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9093 SDValue One = DAG.getConstant(1, dl, IntVT);
9094
9095 // Extract bit fields.
9096 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9097 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9098
9099 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9100 DAG.getConstant(MantMask, dl, IntVT));
9101
9102 SDValue ExpField =
9103 DAG.getNode(ISD::AND, dl, IntVT,
9104 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9105 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9106 DAG.getConstant(ExpMask, dl, IntVT));
9107
9108 SDValue SignBit =
9109 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9110 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9111
9112 SDValue SignShifted =
9113 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9114 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9115
9116 // Classify the input.
9117 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9118 SDValue IsExpAllOnes =
9119 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9120 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9121 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9122 SDValue IsMantNonZero =
9123 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9124
9125 SDValue IsNaN;
9126 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9127 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9128 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9129 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9130 } else {
9132 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9133 SDValue IsMantAllOnes =
9134 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9135 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9136 }
9137
9138 SDValue IsInf;
9139 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9140 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9141 else
9142 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9143
9144 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9145 SDValue IsDenorm =
9146 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9147
9148 // Normal value conversion.
9149 const int BiasAdjust = DstBias - SrcBias;
9150 SDValue NormDstExp =
9151 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9152 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9153
9154 SDValue NormDstMant;
9155 if (DstMant > SrcMant) {
9156 SDValue NormDstMantShift =
9157 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9158 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9159 } else {
9160 NormDstMant = MantField;
9161 }
9162
9163 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9164 SDValue NormExpShifted =
9165 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9166 SDValue NormResult =
9167 DAG.getNode(ISD::OR, dl, IntVT,
9168 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9169 NormDstMant);
9170
9171 // Denormal value conversion.
9172 SDValue DenormResult;
9173 {
9174 const unsigned IntVTBits = DstBits;
9175 SDValue LeadingZeros =
9176 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9177
9178 const int DenormExpConst =
9179 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9180 SDValue DenormDstExp = DAG.getNode(
9181 ISD::SUB, dl, IntVT,
9182 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9183 LeadingZeros);
9184
9185 SDValue MantMSB =
9186 DAG.getNode(ISD::SUB, dl, IntVT,
9187 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9188
9189 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9190 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9191
9192 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9193 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9194 DAG.getConstant(ShiftSub, dl, IntVT));
9195
9196 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9197
9198 SDValue DenormExpShifted =
9199 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9200 DenormResult = DAG.getNode(
9201 ISD::OR, dl, IntVT,
9202 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9203 DenormDstMant);
9204 }
9205
9206 SDValue FiniteResult =
9207 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9208
9209 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9210 SDValue NaNResult =
9211 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9212
9213 SDValue InfResult =
9214 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9215 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9216
9217 SDValue ZeroResult = SignShifted;
9218
9219 SDValue Result = FiniteResult;
9220 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9221 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9222 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9223
9224 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9225}
9226
9228 SelectionDAG &DAG) const {
9229 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9230 SDValue Src = Node->getOperand(OpNo);
9231 EVT SrcVT = Src.getValueType();
9232 EVT DstVT = Node->getValueType(0);
9233 SDLoc dl(SDValue(Node, 0));
9234
9235 // FIXME: Only f32 to i64 conversions are supported.
9236 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9237 return false;
9238
9239 if (Node->isStrictFPOpcode())
9240 // When a NaN is converted to an integer a trap is allowed. We can't
9241 // use this expansion here because it would eliminate that trap. Other
9242 // traps are also allowed and cannot be eliminated. See
9243 // IEEE 754-2008 sec 5.8.
9244 return false;
9245
9246 // Expand f32 -> i64 conversion
9247 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9248 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9249 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9250 EVT IntVT = SrcVT.changeTypeToInteger();
9251 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9252
9253 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9254 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9255 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9256 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9257 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9258 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9259
9260 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9261
9262 SDValue ExponentBits = DAG.getNode(
9263 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9264 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9265 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9266
9267 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9268 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9269 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9270 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9271
9272 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9273 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9274 DAG.getConstant(0x00800000, dl, IntVT));
9275
9276 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9277
9278 R = DAG.getSelectCC(
9279 dl, Exponent, ExponentLoBit,
9280 DAG.getNode(ISD::SHL, dl, DstVT, R,
9281 DAG.getZExtOrTrunc(
9282 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9283 dl, IntShVT)),
9284 DAG.getNode(ISD::SRL, dl, DstVT, R,
9285 DAG.getZExtOrTrunc(
9286 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9287 dl, IntShVT)),
9288 ISD::SETGT);
9289
9290 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9291 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9292
9293 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9294 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9295 return true;
9296}
9297
9299 SDValue &Chain,
9300 SelectionDAG &DAG) const {
9301 SDLoc dl(SDValue(Node, 0));
9302 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9303 SDValue Src = Node->getOperand(OpNo);
9304
9305 EVT SrcVT = Src.getValueType();
9306 EVT DstVT = Node->getValueType(0);
9307 EVT SetCCVT =
9308 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9309 EVT DstSetCCVT =
9310 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9311
9312 // Only expand vector types if we have the appropriate vector bit operations.
9313 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9315 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9317 return false;
9318
9319 // If the maximum float value is smaller then the signed integer range,
9320 // the destination signmask can't be represented by the float, so we can
9321 // just use FP_TO_SINT directly.
9322 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9323 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9324 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9326 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9327 if (Node->isStrictFPOpcode()) {
9328 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9329 { Node->getOperand(0), Src });
9330 Chain = Result.getValue(1);
9331 } else
9332 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9333 return true;
9334 }
9335
9336 // Don't expand it if there isn't cheap fsub instruction.
9338 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9339 return false;
9340
9341 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9342 SDValue Sel;
9343
9344 if (Node->isStrictFPOpcode()) {
9345 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9346 Node->getOperand(0), /*IsSignaling*/ true);
9347 Chain = Sel.getValue(1);
9348 } else {
9349 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9350 }
9351
9352 bool Strict = Node->isStrictFPOpcode() ||
9353 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9354
9355 if (Strict) {
9356 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9357 // signmask then offset (the result of which should be fully representable).
9358 // Sel = Src < 0x8000000000000000
9359 // FltOfs = select Sel, 0, 0x8000000000000000
9360 // IntOfs = select Sel, 0, 0x8000000000000000
9361 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9362
9363 // TODO: Should any fast-math-flags be set for the FSUB?
9364 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9365 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9366 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9367 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9368 DAG.getConstant(0, dl, DstVT),
9369 DAG.getConstant(SignMask, dl, DstVT));
9370 SDValue SInt;
9371 if (Node->isStrictFPOpcode()) {
9372 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9373 { Chain, Src, FltOfs });
9374 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9375 { Val.getValue(1), Val });
9376 Chain = SInt.getValue(1);
9377 } else {
9378 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9379 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9380 }
9381 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9382 } else {
9383 // Expand based on maximum range of FP_TO_SINT:
9384 // True = fp_to_sint(Src)
9385 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9386 // Result = select (Src < 0x8000000000000000), True, False
9387
9388 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9389 // TODO: Should any fast-math-flags be set for the FSUB?
9390 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9391 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9392 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9393 DAG.getConstant(SignMask, dl, DstVT));
9394 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9395 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9396 }
9397 return true;
9398}
9399
9401 SDValue &Chain, SelectionDAG &DAG) const {
9402 // This transform is not correct for converting 0 when rounding mode is set
9403 // to round toward negative infinity which will produce -0.0. So disable
9404 // under strictfp.
9405 if (Node->isStrictFPOpcode())
9406 return false;
9407
9408 SDValue Src = Node->getOperand(0);
9409 EVT SrcVT = Src.getValueType();
9410 EVT DstVT = Node->getValueType(0);
9411
9412 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9413 // it.
9414 if (Node->getFlags().hasNonNeg() &&
9416 Result =
9417 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9418 return true;
9419 }
9420
9421 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9422 return false;
9423
9424 // Only expand vector types if we have the appropriate vector bit
9425 // operations.
9426 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9431 return false;
9432
9433 SDLoc dl(SDValue(Node, 0));
9434
9435 // Implementation of unsigned i64 to f64 following the algorithm in
9436 // __floatundidf in compiler_rt. This implementation performs rounding
9437 // correctly in all rounding modes with the exception of converting 0
9438 // when rounding toward negative infinity. In that case the fsub will
9439 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9440 // incorrect.
9441 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9442 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9443 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9444 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9445 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9446 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9447
9448 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9449 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9450 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9451 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9452 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9453 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9454 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9455 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9456 return true;
9457}
9458
9459SDValue
9461 SelectionDAG &DAG) const {
9462 unsigned Opcode = Node->getOpcode();
9463 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9464 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9465 "Wrong opcode");
9466
9467 if (Node->getFlags().hasNoNaNs()) {
9468 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9469 EVT VT = Node->getValueType(0);
9470 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9472 VT.isVector())
9473 return SDValue();
9474 SDValue Op1 = Node->getOperand(0);
9475 SDValue Op2 = Node->getOperand(1);
9476 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9477 Node->getFlags());
9478 }
9479
9480 return SDValue();
9481}
9482
9484 SelectionDAG &DAG) const {
9485 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9486 return Expanded;
9487
9488 EVT VT = Node->getValueType(0);
9489 if (VT.isScalableVector())
9491 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9492
9493 SDLoc dl(Node);
9494 unsigned NewOp =
9496
9497 if (isOperationLegalOrCustom(NewOp, VT)) {
9498 SDValue Quiet0 = Node->getOperand(0);
9499 SDValue Quiet1 = Node->getOperand(1);
9500
9501 if (!Node->getFlags().hasNoNaNs()) {
9502 // Insert canonicalizes if it's possible we need to quiet to get correct
9503 // sNaN behavior.
9504 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9505 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9506 Node->getFlags());
9507 }
9508 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9509 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9510 Node->getFlags());
9511 }
9512 }
9513
9514 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9515 }
9516
9517 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9518 // instead if there are no NaNs.
9519 if (Node->getFlags().hasNoNaNs() ||
9520 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9521 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9522 unsigned IEEE2018Op =
9523 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9524 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9525 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9526 Node->getOperand(1), Node->getFlags());
9527 }
9528
9530 return SelCC;
9531
9532 return SDValue();
9533}
9534
9536 SelectionDAG &DAG) const {
9537 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9538 return Expanded;
9539
9540 SDLoc DL(N);
9541 SDValue LHS = N->getOperand(0);
9542 SDValue RHS = N->getOperand(1);
9543 unsigned Opc = N->getOpcode();
9544 EVT VT = N->getValueType(0);
9545 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9546 bool IsMax = Opc == ISD::FMAXIMUM;
9547 SDNodeFlags Flags = N->getFlags();
9548
9549 // First, implement comparison not propagating NaN. If no native fmin or fmax
9550 // available, use plain select with setcc instead.
9552 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9553 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9554
9555 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9556 // signed zero behavior.
9557 bool MinMaxMustRespectOrderedZero = false;
9558
9559 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9560 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9561 MinMaxMustRespectOrderedZero = true;
9562 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9563 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9564 } else {
9566 return DAG.UnrollVectorOp(N);
9567
9568 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9569 SDValue Compare =
9570 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9571 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9572 }
9573
9574 // Propagate any NaN of both operands
9575 if (!N->getFlags().hasNoNaNs() &&
9576 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9577 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9579 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9580 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9581 }
9582
9583 // fminimum/fmaximum requires -0.0 less than +0.0
9584 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9585 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9586 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9587 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9588 SDValue TestZero =
9589 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9590 SDValue LCmp = DAG.getSelect(
9591 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9592 MinMax, Flags);
9593 SDValue RCmp = DAG.getSelect(
9594 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9595 LCmp, Flags);
9596 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9597 }
9598
9599 return MinMax;
9600}
9601
9603 SelectionDAG &DAG) const {
9604 SDLoc DL(Node);
9605 SDValue LHS = Node->getOperand(0);
9606 SDValue RHS = Node->getOperand(1);
9607 unsigned Opc = Node->getOpcode();
9608 EVT VT = Node->getValueType(0);
9609 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9610 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9611 SDNodeFlags Flags = Node->getFlags();
9612
9613 unsigned NewOp =
9615
9616 if (isOperationLegalOrCustom(NewOp, VT)) {
9617 if (!Flags.hasNoNaNs()) {
9618 // Insert canonicalizes if it's possible we need to quiet to get correct
9619 // sNaN behavior.
9620 if (!DAG.isKnownNeverSNaN(LHS)) {
9621 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9622 }
9623 if (!DAG.isKnownNeverSNaN(RHS)) {
9624 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9625 }
9626 }
9627
9628 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9629 }
9630
9631 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9632 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9633 if (Flags.hasNoNaNs() ||
9634 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9635 unsigned IEEE2019Op =
9637 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9638 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9639 }
9640
9641 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9642 // either one for +0.0 vs -0.0.
9643 if ((Flags.hasNoNaNs() ||
9644 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9645 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9646 DAG.isKnownNeverLogicalZero(RHS))) {
9647 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9648 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9649 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9650 }
9651
9652 if (VT.isVector() &&
9655 return DAG.UnrollVectorOp(Node);
9656
9657 // If only one operand is NaN, override it with another operand.
9658 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9659 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9660 }
9661 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9662 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9663 }
9664
9665 // Always prefer RHS if equal.
9666 SDValue MinMax =
9667 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9668
9669 // TODO: We need quiet sNaN if strictfp.
9670
9671 // Fixup signed zero behavior.
9672 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9673 DAG.isKnownNeverLogicalZero(RHS)) {
9674 return MinMax;
9675 }
9676 SDValue TestZero =
9677 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9678 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9679 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9680 EVT IntVT = VT.changeTypeToInteger();
9681 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9682 SDValue LHSTrunc = LHS;
9684 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9685 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9686 }
9687 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9688 // we preferred RHS when generate MinMax, if the operands are equal.
9689 SDValue RetZero = DAG.getSelect(
9690 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9691 MinMax, Flags);
9692 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9693}
9694
9695/// Returns a true value if if this FPClassTest can be performed with an ordered
9696/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9697/// std::nullopt if it cannot be performed as a compare with 0.
9698static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9699 const fltSemantics &Semantics,
9700 const MachineFunction &MF) {
9701 FPClassTest OrderedMask = Test & ~fcNan;
9702 FPClassTest NanTest = Test & fcNan;
9703 bool IsOrdered = NanTest == fcNone;
9704 bool IsUnordered = NanTest == fcNan;
9705
9706 // Skip cases that are testing for only a qnan or snan.
9707 if (!IsOrdered && !IsUnordered)
9708 return std::nullopt;
9709
9710 if (OrderedMask == fcZero &&
9711 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9712 return IsOrdered;
9713 if (OrderedMask == (fcZero | fcSubnormal) &&
9714 MF.getDenormalMode(Semantics).inputsAreZero())
9715 return IsOrdered;
9716 return std::nullopt;
9717}
9718
9720 const FPClassTest OrigTestMask,
9721 SDNodeFlags Flags, const SDLoc &DL,
9722 SelectionDAG &DAG) const {
9723 EVT OperandVT = Op.getValueType();
9724 assert(OperandVT.isFloatingPoint());
9725 FPClassTest Test = OrigTestMask;
9726
9727 // Degenerated cases.
9728 if (Test == fcNone)
9729 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9730 if (Test == fcAllFlags)
9731 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9732
9733 // PPC double double is a pair of doubles, of which the higher part determines
9734 // the value class.
9735 if (OperandVT == MVT::ppcf128) {
9736 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9737 DAG.getConstant(1, DL, MVT::i32));
9738 OperandVT = MVT::f64;
9739 }
9740
9741 // Floating-point type properties.
9742 EVT ScalarFloatVT = OperandVT.getScalarType();
9743 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9744 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9745 bool IsF80 = (ScalarFloatVT == MVT::f80);
9746
9747 // Some checks can be implemented using float comparisons, if floating point
9748 // exceptions are ignored.
9749 if (Flags.hasNoFPExcept() &&
9751 FPClassTest FPTestMask = Test;
9752 bool IsInvertedFP = false;
9753
9754 if (FPClassTest InvertedFPCheck =
9755 invertFPClassTestIfSimpler(FPTestMask, true)) {
9756 FPTestMask = InvertedFPCheck;
9757 IsInvertedFP = true;
9758 }
9759
9760 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9761 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9762
9763 // See if we can fold an | fcNan into an unordered compare.
9764 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9765
9766 // Can't fold the ordered check if we're only testing for snan or qnan
9767 // individually.
9768 if ((FPTestMask & fcNan) != fcNan)
9769 OrderedFPTestMask = FPTestMask;
9770
9771 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9772
9773 if (std::optional<bool> IsCmp0 =
9774 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9775 IsCmp0 && (isCondCodeLegalOrCustom(
9776 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9777 OperandVT.getScalarType().getSimpleVT()))) {
9778
9779 // If denormals could be implicitly treated as 0, this is not equivalent
9780 // to a compare with 0 since it will also be true for denormals.
9781 return DAG.getSetCC(DL, ResultVT, Op,
9782 DAG.getConstantFP(0.0, DL, OperandVT),
9783 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9784 }
9785
9786 if (FPTestMask == fcNan &&
9788 OperandVT.getScalarType().getSimpleVT()))
9789 return DAG.getSetCC(DL, ResultVT, Op, Op,
9790 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9791
9792 bool IsOrderedInf = FPTestMask == fcInf;
9793 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9794 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9795 : UnorderedCmpOpcode,
9796 OperandVT.getScalarType().getSimpleVT()) &&
9799 (OperandVT.isVector() &&
9801 // isinf(x) --> fabs(x) == inf
9802 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9803 SDValue Inf =
9804 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9805 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9806 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9807 }
9808
9809 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9810 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9811 : UnorderedCmpOpcode,
9812 OperandVT.getSimpleVT())) {
9813 // isposinf(x) --> x == inf
9814 // isneginf(x) --> x == -inf
9815 // isposinf(x) || nan --> x u== inf
9816 // isneginf(x) || nan --> x u== -inf
9817
9818 SDValue Inf = DAG.getConstantFP(
9819 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9820 OperandVT);
9821 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9822 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9823 }
9824
9825 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9826 // TODO: Could handle ordered case, but it produces worse code for
9827 // x86. Maybe handle ordered if fabs is free?
9828
9829 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9830 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9831
9832 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9833 OperandVT.getScalarType().getSimpleVT())) {
9834 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9835
9836 // TODO: Maybe only makes sense if fabs is free. Integer test of
9837 // exponent bits seems better for x86.
9838 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9839 SDValue SmallestNormal = DAG.getConstantFP(
9840 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9841 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9842 IsOrdered ? OrderedOp : UnorderedOp);
9843 }
9844 }
9845
9846 if (FPTestMask == fcNormal) {
9847 // TODO: Handle unordered
9848 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9849 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9850
9851 if (isCondCodeLegalOrCustom(IsFiniteOp,
9852 OperandVT.getScalarType().getSimpleVT()) &&
9853 isCondCodeLegalOrCustom(IsNormalOp,
9854 OperandVT.getScalarType().getSimpleVT()) &&
9855 isFAbsFree(OperandVT)) {
9856 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9857 SDValue Inf =
9858 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9859 SDValue SmallestNormal = DAG.getConstantFP(
9860 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9861
9862 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9863 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9864 SDValue IsNormal =
9865 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9866 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9867 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9868 }
9869 }
9870 }
9871
9872 // Some checks may be represented as inversion of simpler check, for example
9873 // "inf|normal|subnormal|zero" => !"nan".
9874 bool IsInverted = false;
9875
9876 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9877 Test = InvertedCheck;
9878 IsInverted = true;
9879 }
9880
9881 // In the general case use integer operations.
9882 unsigned BitSize = OperandVT.getScalarSizeInBits();
9883 EVT IntVT = OperandVT.changeElementType(
9884 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9885 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9886
9887 // Various masks.
9888 APInt SignBit = APInt::getSignMask(BitSize);
9889 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9890 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9891 const unsigned ExplicitIntBitInF80 = 63;
9892 APInt ExpMask = Inf;
9893 if (IsF80)
9894 ExpMask.clearBit(ExplicitIntBitInF80);
9895 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9896 APInt QNaNBitMask =
9897 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9898 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9899
9900 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9901 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9902 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9903 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9904 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9905 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9906
9907 SDValue Res;
9908 const auto appendResult = [&](SDValue PartialRes) {
9909 if (PartialRes) {
9910 if (Res)
9911 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9912 else
9913 Res = PartialRes;
9914 }
9915 };
9916
9917 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9918 const auto getIntBitIsSet = [&]() -> SDValue {
9919 if (!IntBitIsSetV) {
9920 APInt IntBitMask(BitSize, 0);
9921 IntBitMask.setBit(ExplicitIntBitInF80);
9922 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9923 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9924 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9925 }
9926 return IntBitIsSetV;
9927 };
9928
9929 // Split the value into sign bit and absolute value.
9930 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9931 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9932 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9933
9934 // Tests that involve more than one class should be processed first.
9935 SDValue PartialRes;
9936
9937 if (IsF80)
9938 ; // Detect finite numbers of f80 by checking individual classes because
9939 // they have different settings of the explicit integer bit.
9940 else if ((Test & fcFinite) == fcFinite) {
9941 // finite(V) ==> (a << 1) < (inf << 1)
9942 //
9943 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9944 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9945
9947 "finite check requires IEEE-like FP");
9948
9949 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9950 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9951 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9952
9953 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9954 Test &= ~fcFinite;
9955 } else if ((Test & fcFinite) == fcPosFinite) {
9956 // finite(V) && V > 0 ==> V < exp_mask
9957 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9958 Test &= ~fcPosFinite;
9959 } else if ((Test & fcFinite) == fcNegFinite) {
9960 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9961 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9962 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9963 Test &= ~fcNegFinite;
9964 }
9965 appendResult(PartialRes);
9966
9967 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9968 // fcZero | fcSubnormal => test all exponent bits are 0
9969 // TODO: Handle sign bit specific cases
9970 if (PartialCheck == (fcZero | fcSubnormal)) {
9971 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9972 SDValue ExpIsZero =
9973 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9974 appendResult(ExpIsZero);
9975 Test &= ~PartialCheck & fcAllFlags;
9976 }
9977 }
9978
9979 // Check for individual classes.
9980
9981 if (unsigned PartialCheck = Test & fcZero) {
9982 if (PartialCheck == fcPosZero)
9983 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9984 else if (PartialCheck == fcZero)
9985 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9986 else // ISD::fcNegZero
9987 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9988 appendResult(PartialRes);
9989 }
9990
9991 if (unsigned PartialCheck = Test & fcSubnormal) {
9992 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9993 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9994 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9995 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9996 SDValue VMinusOneV =
9997 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9998 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9999 if (PartialCheck == fcNegSubnormal)
10000 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10001 appendResult(PartialRes);
10002 }
10003
10004 if (unsigned PartialCheck = Test & fcInf) {
10005 if (PartialCheck == fcPosInf)
10006 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
10007 else if (PartialCheck == fcInf)
10008 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
10009 else { // ISD::fcNegInf
10010 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10011 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10012 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10013 }
10014 appendResult(PartialRes);
10015 }
10016
10017 if (unsigned PartialCheck = Test & fcNan) {
10018 APInt InfWithQnanBit = Inf | QNaNBitMask;
10019 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10020 if (PartialCheck == fcNan) {
10021 // isnan(V) ==> abs(V) > int(inf)
10022 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10023 if (IsF80) {
10024 // Recognize unsupported values as NaNs for compatibility with glibc.
10025 // In them (exp(V)==0) == int_bit.
10026 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10027 SDValue ExpIsZero =
10028 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10029 SDValue IsPseudo =
10030 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10031 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10032 }
10033 } else if (PartialCheck == fcQNan) {
10034 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10035 PartialRes =
10036 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10037 } else { // ISD::fcSNan
10038 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10039 // abs(V) < (unsigned(Inf) | quiet_bit)
10040 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10041 SDValue IsNotQnan =
10042 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10043 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10044 }
10045 appendResult(PartialRes);
10046 }
10047
10048 if (unsigned PartialCheck = Test & fcNormal) {
10049 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10050 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10051 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10052 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10053 APInt ExpLimit = ExpMask - ExpLSB;
10054 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10055 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10056 if (PartialCheck == fcNegNormal)
10057 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10058 else if (PartialCheck == fcPosNormal) {
10059 SDValue PosSignV =
10060 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10061 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10062 }
10063 if (IsF80)
10064 PartialRes =
10065 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10066 appendResult(PartialRes);
10067 }
10068
10069 if (!Res)
10070 return DAG.getConstant(IsInverted, DL, ResultVT);
10071 if (IsInverted)
10072 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10073 return Res;
10074}
10075
10076// Only expand vector types if we have the appropriate vector bit operations.
10077static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10078 assert(VT.isVector() && "Expected vector type");
10079 unsigned Len = VT.getScalarSizeInBits();
10080 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10083 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10085}
10086
10088 SDLoc dl(Node);
10089 EVT VT = Node->getValueType(0);
10090 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10091 SDValue Op = Node->getOperand(0);
10092 unsigned Len = VT.getScalarSizeInBits();
10093 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10094
10095 // TODO: Add support for irregular type lengths.
10096 if (!(Len <= 128 && Len % 8 == 0))
10097 return SDValue();
10098
10099 // Only expand vector types if we have the appropriate vector bit operations.
10100 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10101 return SDValue();
10102
10103 // This is the "best" algorithm from
10104 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10105 SDValue Mask55 =
10106 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10107 SDValue Mask33 =
10108 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10109 SDValue Mask0F =
10110 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10111
10112 // v = v - ((v >> 1) & 0x55555555...)
10113 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10114 DAG.getNode(ISD::AND, dl, VT,
10115 DAG.getNode(ISD::SRL, dl, VT, Op,
10116 DAG.getConstant(1, dl, ShVT)),
10117 Mask55));
10118 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10119 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10120 DAG.getNode(ISD::AND, dl, VT,
10121 DAG.getNode(ISD::SRL, dl, VT, Op,
10122 DAG.getConstant(2, dl, ShVT)),
10123 Mask33));
10124 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10125 Op = DAG.getNode(ISD::AND, dl, VT,
10126 DAG.getNode(ISD::ADD, dl, VT, Op,
10127 DAG.getNode(ISD::SRL, dl, VT, Op,
10128 DAG.getConstant(4, dl, ShVT))),
10129 Mask0F);
10130
10131 if (Len <= 8)
10132 return Op;
10133
10134 // Avoid the multiply if we only have 2 bytes to add.
10135 // TODO: Only doing this for scalars because vectors weren't as obviously
10136 // improved.
10137 if (Len == 16 && !VT.isVector()) {
10138 // v = (v + (v >> 8)) & 0x00FF;
10139 return DAG.getNode(ISD::AND, dl, VT,
10140 DAG.getNode(ISD::ADD, dl, VT, Op,
10141 DAG.getNode(ISD::SRL, dl, VT, Op,
10142 DAG.getConstant(8, dl, ShVT))),
10143 DAG.getConstant(0xFF, dl, VT));
10144 }
10145
10146 // v = (v * 0x01010101...) >> (Len - 8)
10147 SDValue V;
10150 SDValue Mask01 =
10151 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10152 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10153 } else {
10154 V = Op;
10155 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10156 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10157 V = DAG.getNode(ISD::ADD, dl, VT, V,
10158 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10159 }
10160 }
10161 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10162}
10163
10165 SDLoc dl(Node);
10166 EVT VT = Node->getValueType(0);
10167 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10168 SDValue Op = Node->getOperand(0);
10169 SDValue Mask = Node->getOperand(1);
10170 SDValue VL = Node->getOperand(2);
10171 unsigned Len = VT.getScalarSizeInBits();
10172 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10173
10174 // TODO: Add support for irregular type lengths.
10175 if (!(Len <= 128 && Len % 8 == 0))
10176 return SDValue();
10177
10178 // This is same algorithm of expandCTPOP from
10179 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10180 SDValue Mask55 =
10181 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10182 SDValue Mask33 =
10183 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10184 SDValue Mask0F =
10185 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10186
10187 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10188
10189 // v = v - ((v >> 1) & 0x55555555...)
10190 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10191 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10192 DAG.getConstant(1, dl, ShVT), Mask, VL),
10193 Mask55, Mask, VL);
10194 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10195
10196 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10197 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10198 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10199 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10200 DAG.getConstant(2, dl, ShVT), Mask, VL),
10201 Mask33, Mask, VL);
10202 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10203
10204 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10205 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10206 Mask, VL),
10207 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10208 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10209
10210 if (Len <= 8)
10211 return Op;
10212
10213 // v = (v * 0x01010101...) >> (Len - 8)
10214 SDValue V;
10216 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10217 SDValue Mask01 =
10218 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10219 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10220 } else {
10221 V = Op;
10222 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10223 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10224 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10225 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10226 Mask, VL);
10227 }
10228 }
10229 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10230 Mask, VL);
10231}
10232
10234 SDLoc dl(Node);
10235 EVT VT = Node->getValueType(0);
10236 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10237 SDValue Op = Node->getOperand(0);
10238 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10239
10240 // If the non-ZERO_POISON version is supported we can use that instead.
10241 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10243 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10244
10245 // If the ZERO_POISON version is supported use that and handle the zero case.
10247 EVT SetCCVT =
10248 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10249 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10250 SDValue Zero = DAG.getConstant(0, dl, VT);
10251 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10252 return DAG.getSelect(dl, VT, SrcIsZero,
10253 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10254 }
10255
10256 // Only expand vector types if we have the appropriate vector bit operations.
10257 // This includes the operations needed to expand CTPOP if it isn't supported.
10258 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10260 !canExpandVectorCTPOP(*this, VT)) ||
10263 return SDValue();
10264
10265 // for now, we do this:
10266 // x = x | (x >> 1);
10267 // x = x | (x >> 2);
10268 // ...
10269 // x = x | (x >>16);
10270 // x = x | (x >>32); // for 64-bit input
10271 // return popcount(~x);
10272 //
10273 // Ref: "Hacker's Delight" by Henry Warren
10274 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10275 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10276 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10277 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10278 }
10279 Op = DAG.getNOT(dl, Op, VT);
10280 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10281}
10282
10284 SDLoc dl(Node);
10285 EVT VT = Node->getValueType(0);
10286 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10287 SDValue Op = Node->getOperand(0);
10288 SDValue Mask = Node->getOperand(1);
10289 SDValue VL = Node->getOperand(2);
10290 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10291
10292 // do this:
10293 // x = x | (x >> 1);
10294 // x = x | (x >> 2);
10295 // ...
10296 // x = x | (x >>16);
10297 // x = x | (x >>32); // for 64-bit input
10298 // return popcount(~x);
10299 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10300 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10301 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10302 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10303 VL);
10304 }
10305 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10306 Mask, VL);
10307 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10308}
10309
10311 SDLoc dl(Node);
10312 EVT VT = Node->getValueType(0);
10313 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10314 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10315
10316 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10317 // This transforms the sign bits into leading zeros that can be counted.
10318 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10319 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10320 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10321 SDValue Shl =
10322 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10323 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10324 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10325}
10326
10328 const SDLoc &DL, EVT VT, SDValue Op,
10329 unsigned BitWidth) const {
10330 if (BitWidth != 32 && BitWidth != 64)
10331 return SDValue();
10332
10333 const DataLayout &TD = DAG.getDataLayout();
10335 return SDValue();
10336
10337 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10338 : APInt(64, 0x0218A392CD3D5DBFULL);
10339 MachinePointerInfo PtrInfo =
10341 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10342 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10343 SDValue Lookup = DAG.getNode(
10344 ISD::SRL, DL, VT,
10345 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10346 DAG.getConstant(DeBruijn, DL, VT)),
10347 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10349
10351 for (unsigned i = 0; i < BitWidth; i++) {
10352 APInt Shl = DeBruijn.shl(i);
10353 APInt Lshr = Shl.lshr(ShiftAmt);
10354 Table[Lshr.getZExtValue()] = i;
10355 }
10356
10357 // Create a ConstantArray in Constant Pool
10358 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10359 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10360 TD.getPrefTypeAlign(CA->getType()));
10361 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10362 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10363 PtrInfo, MVT::i8);
10364 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10365 return ExtLoad;
10366
10367 EVT SetCCVT =
10368 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10369 SDValue Zero = DAG.getConstant(0, DL, VT);
10370 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10371 return DAG.getSelect(DL, VT, SrcIsZero,
10372 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10373}
10374
10376 SDLoc dl(Node);
10377 EVT VT = Node->getValueType(0);
10378 SDValue Op = Node->getOperand(0);
10379 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10380
10381 // If the non-ZERO_POISON version is supported we can use that instead.
10382 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10384 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10385
10386 // If the ZERO_POISON version is supported use that and handle the zero case.
10388 EVT SetCCVT =
10389 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10390 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10391 SDValue Zero = DAG.getConstant(0, dl, VT);
10392 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10393 return DAG.getSelect(dl, VT, SrcIsZero,
10394 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10395 }
10396
10397 // Only expand vector types if we have the appropriate vector bit operations.
10398 // This includes the operations needed to expand CTPOP if it isn't supported.
10399 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10402 !canExpandVectorCTPOP(*this, VT)) ||
10406 return SDValue();
10407
10408 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10409 // to be expanded or converted to a libcall.
10412 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10413 return V;
10414
10415 // for now, we use: { return popcount(~x & (x - 1)); }
10416 // unless the target has ctlz but not ctpop, in which case we use:
10417 // { return 32 - nlz(~x & (x-1)); }
10418 // Ref: "Hacker's Delight" by Henry Warren
10419 SDValue Tmp = DAG.getNode(
10420 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10421 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10422
10423 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10425 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10426 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10427 }
10428
10429 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10430}
10431
10433 SDValue Op = Node->getOperand(0);
10434 SDValue Mask = Node->getOperand(1);
10435 SDValue VL = Node->getOperand(2);
10436 SDLoc dl(Node);
10437 EVT VT = Node->getValueType(0);
10438
10439 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10440 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10441 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10442 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10443 DAG.getConstant(1, dl, VT), Mask, VL);
10444 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10445 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10446}
10447
10449 SelectionDAG &DAG) const {
10450 // %cond = to_bool_vec %source
10451 // %splat = splat /*val=*/VL
10452 // %tz = step_vector
10453 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10454 // %r = vp.reduce.umin %v
10455 SDLoc DL(N);
10456 SDValue Source = N->getOperand(0);
10457 SDValue Mask = N->getOperand(1);
10458 SDValue EVL = N->getOperand(2);
10459 EVT SrcVT = Source.getValueType();
10460 EVT ResVT = N->getValueType(0);
10461 EVT ResVecVT =
10462 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10463
10464 // Convert to boolean vector.
10465 if (SrcVT.getScalarType() != MVT::i1) {
10466 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10467 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10468 SrcVT.getVectorElementCount());
10469 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10470 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10471 }
10472
10473 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10474 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10475 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10476 SDValue Select =
10477 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10478 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10479}
10480
10481/// Returns a type-legalized version of \p Mask as the first item in the
10482/// pair. The second item contains a type-legalized step vector that's
10483/// guaranteed to fit the number of elements in \p Mask.
10484/// If the stepvector would require splitting, returns an empty SDValue
10485/// as the second item to signal that the operation should be split instead.
10486static std::pair<SDValue, SDValue>
10488 SelectionDAG &DAG) {
10489 EVT MaskVT = Mask.getValueType();
10490 EVT BoolVT = MaskVT.getScalarType();
10491
10492 // Find a suitable type for a stepvector.
10493 // If zero is poison, we can assume the upper limit of the result is VF-1.
10494 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10495 if (MaskVT.isScalableVector())
10496 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10497 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10498 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10499 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10500 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10501 // If the step vector element type is smaller than the mask element type,
10502 // use the mask type directly to avoid widening issues.
10503 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10504 EVT StepVT = MVT::getIntegerVT(EltWidth);
10505 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10506
10507 // If promotion or widening is required to make the type legal, do it here.
10508 // Promotion of integers within LegalizeVectorOps is looking for types of
10509 // the same size but with a smaller number of larger elements, not the usual
10510 // larger size with the same number of larger elements.
10512 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10513 SDValue StepVec;
10514 if (TypeAction == TargetLowering::TypePromoteInteger) {
10515 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10516 StepVec = DAG.getStepVector(DL, StepVecVT);
10517 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10518 // For widening, the element count changes. Create a step vector with only
10519 // the original elements valid and zeros for padding. Also widen the mask.
10520 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10521 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10522
10523 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10524 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10525 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10526 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10527
10528 // Widen mask: pad with zeros.
10529 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10530 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10531 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10532 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10533 // The stepvector type would require splitting. Signal to the caller
10534 // that the operation should be split instead of expanded.
10535 return {Mask, SDValue()};
10536 } else {
10537 StepVec = DAG.getStepVector(DL, StepVecVT);
10538 }
10539
10540 return {Mask, StepVec};
10541}
10542
10544 SelectionDAG &DAG) const {
10545 SDLoc DL(N);
10546 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10547 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10548
10549 // If StepVec is empty, the stepvector would require splitting.
10550 // Split the operation instead and let it be recursively legalized.
10551 if (!StepVec) {
10552 EVT MaskVT = N->getOperand(0).getValueType();
10553 EVT ResVT = N->getValueType(0);
10554
10555 // Split the mask
10556 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10557 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10558
10559 // Create split VECTOR_FIND_LAST_ACTIVE operations
10560 SDValue LoResult =
10561 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10562 SDValue HiResult =
10563 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10564
10565 // Check if any lane is active in the high mask.
10566 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10568 AnyHiActive, DL,
10569 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10570 MVT::i1);
10571
10572 // Adjust HiResult by adding the number of elements in Lo
10573 SDValue LoNumElts =
10574 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10575 SDValue AdjustedHiResult =
10576 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10577
10578 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10579 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10580 LoResult);
10581 }
10582
10583 EVT StepVecVT = StepVec.getValueType();
10584 EVT StepVT = StepVec.getValueType().getVectorElementType();
10585
10586 // Zero out lanes with inactive elements, then find the highest remaining
10587 // value from the stepvector.
10588 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10589 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10590 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10591 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10592}
10593
10595 SelectionDAG &DAG) const {
10596 SDLoc DL(N);
10597 EVT VT = N->getValueType(0);
10598 SDValue SourceValue = N->getOperand(0);
10599 SDValue SinkValue = N->getOperand(1);
10600 SDValue EltSizeInBytes = N->getOperand(2);
10601
10602 // Note: The lane offset is scalable if the mask is scalable.
10603 ElementCount LaneOffsetEC =
10604 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
10605
10606 EVT AddrVT = SourceValue->getValueType(0);
10607 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
10608
10609 // Take the difference between the pointers and divided by the element size,
10610 // to see how many lanes separate them.
10611 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
10612 if (IsReadAfterWrite)
10613 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
10614 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
10615
10616 // The pointers do not alias if:
10617 // * Diff <= 0 (WAR_MASK)
10618 // * Diff == 0 (RAW_MASK)
10619 EVT CmpVT =
10620 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
10621 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
10622 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
10623 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
10624
10625 // The pointers do not alias if:
10626 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
10627 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
10628 SDValue MaskN = DAG.getSelect(
10629 DL, AddrVT, Cmp,
10631 AddrVT),
10632 Diff);
10633
10634 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
10635}
10636
10638 bool IsNegative) const {
10639 SDLoc dl(N);
10640 EVT VT = N->getValueType(0);
10641 SDValue Op = N->getOperand(0);
10642
10643 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
10644 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
10646 SDValue AbsVal = DAG.getNode(ISD::ABS, dl, VT, Op);
10647 if (IsNegative)
10648 return DAG.getNegative(AbsVal, dl, VT);
10649 return AbsVal;
10650 }
10651
10652 // abs(x) -> smax(x,sub(0,x))
10653 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10655 SDValue Zero = DAG.getConstant(0, dl, VT);
10656 Op = DAG.getFreeze(Op);
10657 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10658 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10659 }
10660
10661 // abs(x) -> umin(x,sub(0,x))
10662 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10664 SDValue Zero = DAG.getConstant(0, dl, VT);
10665 Op = DAG.getFreeze(Op);
10666 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10667 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10668 }
10669
10670 // 0 - abs(x) -> smin(x, sub(0,x))
10671 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10673 SDValue Zero = DAG.getConstant(0, dl, VT);
10674 Op = DAG.getFreeze(Op);
10675 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10676 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10677 }
10678
10679 // Only expand vector types if we have the appropriate vector operations.
10680 if (VT.isVector() &&
10682 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10683 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10685 return SDValue();
10686
10687 Op = DAG.getFreeze(Op);
10688 SDValue Shift = DAG.getNode(
10689 ISD::SRA, dl, VT, Op,
10690 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10691 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10692
10693 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10694 if (!IsNegative)
10695 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10696
10697 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10698 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10699}
10700
10702 SDLoc dl(N);
10703 EVT VT = N->getValueType(0);
10704 SDValue LHS = N->getOperand(0);
10705 SDValue RHS = N->getOperand(1);
10706 bool IsSigned = N->getOpcode() == ISD::ABDS;
10707
10708 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10709 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10710 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10711 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10712 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10713 LHS = DAG.getFreeze(LHS);
10714 RHS = DAG.getFreeze(RHS);
10715 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10716 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10717 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10718 }
10719
10720 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10721 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10722 LHS = DAG.getFreeze(LHS);
10723 RHS = DAG.getFreeze(RHS);
10724 return DAG.getNode(ISD::OR, dl, VT,
10725 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10726 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10727 }
10728
10729 // If the subtract doesn't overflow then just use abs(sub())
10730 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10731
10732 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10733 return DAG.getNode(ISD::ABS, dl, VT,
10734 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10735
10736 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10737 return DAG.getNode(ISD::ABS, dl, VT,
10738 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10739
10740 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10742 LHS = DAG.getFreeze(LHS);
10743 RHS = DAG.getFreeze(RHS);
10744 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10745
10746 // Branchless expansion iff cmp result is allbits:
10747 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10748 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10749 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10750 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10751 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10752 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10753 }
10754
10755 // Similar to the branchless expansion, if we don't prefer selects, use the
10756 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10757 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10758 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10759 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10761 SDValue USubO =
10762 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10763 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10764 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10765 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10766 }
10767
10768 // FIXME: Should really try to split the vector in case it's legal on a
10769 // subvector.
10771 return DAG.UnrollVectorOp(N);
10772
10773 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10774 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10775 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10776 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10777}
10778
10780 SDLoc dl(N);
10781 EVT VT = N->getValueType(0);
10782 SDValue LHS = N->getOperand(0);
10783 SDValue RHS = N->getOperand(1);
10784
10785 unsigned Opc = N->getOpcode();
10786 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10787 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10788 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10789 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10790 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10791 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10793 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10794 "Unknown AVG node");
10795
10796 // If the operands are already extended, we can add+shift.
10797 bool IsExt =
10798 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10799 DAG.ComputeNumSignBits(RHS) >= 2) ||
10800 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10801 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10802 if (IsExt) {
10803 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10804 if (!IsFloor)
10805 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10806 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10807 DAG.getShiftAmountConstant(1, VT, dl));
10808 }
10809
10810 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10811 if (VT.isScalarInteger()) {
10812 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10813 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10814 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10815 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10816 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10817 if (!IsFloor)
10818 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10819 DAG.getConstant(1, dl, ExtVT));
10820 // Just use SRL as we will be truncating away the extended sign bits.
10821 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10822 DAG.getShiftAmountConstant(1, ExtVT, dl));
10823 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10824 }
10825 }
10826
10827 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10828 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10831 SDValue UAddWithOverflow =
10832 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10833
10834 SDValue Sum = UAddWithOverflow.getValue(0);
10835 SDValue Overflow = UAddWithOverflow.getValue(1);
10836
10837 // Right shift the sum by 1
10838 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10839 DAG.getShiftAmountConstant(1, VT, dl));
10840
10841 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10842 SDValue OverflowShl = DAG.getNode(
10843 ISD::SHL, dl, VT, ZeroExtOverflow,
10844 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10845
10846 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10847 }
10848
10849 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10850 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10851 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10852 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10853 LHS = DAG.getFreeze(LHS);
10854 RHS = DAG.getFreeze(RHS);
10855 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10856 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10857 SDValue Shift =
10858 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10859 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10860}
10861
10863 SDLoc dl(N);
10864 EVT VT = N->getValueType(0);
10865 SDValue Op = N->getOperand(0);
10866
10867 if (!VT.isSimple())
10868 return SDValue();
10869
10870 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10871 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10872 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10873 default:
10874 return SDValue();
10875 case MVT::i16:
10876 // Use a rotate by 8. This can be further expanded if necessary.
10877 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10878 case MVT::i32:
10879 // This is meant for ARM specifically, which has ROTR but no ROTL.
10880 // t = x ^ rotr(x, 16)
10881 // t = bic(t, 0x00ff0000)
10882 // t = lshr(t, 8)
10883 // x = t ^ rotr(x, 8)
10885 SDValue Rotr16 =
10886 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
10887 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
10888 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
10889 DAG.getConstant(0xFF00FFFF, dl, VT));
10890 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
10891 SDValue Rotr8 =
10892 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10893 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
10894 }
10895 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10896 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10897 DAG.getConstant(0xFF00, dl, VT));
10898 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10899 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10900 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10901 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10902 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10903 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10904 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10905 case MVT::i64:
10906 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10907 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10908 DAG.getConstant(255ULL<<8, dl, VT));
10909 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10910 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10911 DAG.getConstant(255ULL<<16, dl, VT));
10912 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10913 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10914 DAG.getConstant(255ULL<<24, dl, VT));
10915 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10916 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10917 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10918 DAG.getConstant(255ULL<<24, dl, VT));
10919 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10920 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10921 DAG.getConstant(255ULL<<16, dl, VT));
10922 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10923 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10924 DAG.getConstant(255ULL<<8, dl, VT));
10925 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10926 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10927 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10928 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10929 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10930 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10931 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10932 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10933 }
10934}
10935
10937 SDLoc dl(N);
10938 EVT VT = N->getValueType(0);
10939 SDValue Op = N->getOperand(0);
10940 SDValue Mask = N->getOperand(1);
10941 SDValue EVL = N->getOperand(2);
10942
10943 if (!VT.isSimple())
10944 return SDValue();
10945
10946 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10947 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10948 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10949 default:
10950 return SDValue();
10951 case MVT::i16:
10952 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10953 Mask, EVL);
10954 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10955 Mask, EVL);
10956 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10957 case MVT::i32:
10958 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10959 Mask, EVL);
10960 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10961 Mask, EVL);
10962 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10963 Mask, EVL);
10964 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10965 Mask, EVL);
10966 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10967 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10968 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10969 Mask, EVL);
10970 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10971 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10972 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10973 case MVT::i64:
10974 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10975 Mask, EVL);
10976 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10977 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10978 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10979 Mask, EVL);
10980 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10981 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10982 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10983 Mask, EVL);
10984 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10985 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10986 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10987 Mask, EVL);
10988 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10989 Mask, EVL);
10990 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10991 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10992 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10993 Mask, EVL);
10994 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10995 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10996 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10997 Mask, EVL);
10998 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10999 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
11000 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
11001 Mask, EVL);
11002 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
11003 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
11004 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
11005 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
11006 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
11007 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11008 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
11009 }
11010}
11011
11013 SDLoc dl(N);
11014 EVT VT = N->getValueType(0);
11015 SDValue Op = N->getOperand(0);
11016 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11017 unsigned Sz = VT.getScalarSizeInBits();
11018
11019 SDValue Tmp, Tmp2, Tmp3;
11020
11021 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11022 // and finally the i1 pairs.
11023 // TODO: We can easily support i4/i2 legal types if any target ever does.
11024 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11025 // Create the masks - repeating the pattern every byte.
11026 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11027 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11028 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11029
11030 // BSWAP if the type is wider than a single byte.
11031 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11032
11033 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11034 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11035 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11036 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11037 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11038 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11039
11040 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11041 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11042 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11043 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11044 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11045 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11046
11047 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11048 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11049 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11050 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11051 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11052 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11053 return Tmp;
11054 }
11055
11056 Tmp = DAG.getConstant(0, dl, VT);
11057 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11058 if (I < J)
11059 Tmp2 =
11060 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11061 else
11062 Tmp2 =
11063 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11064
11065 APInt Shift = APInt::getOneBitSet(Sz, J);
11066 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11067 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11068 }
11069
11070 return Tmp;
11071}
11072
11074 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11075
11076 SDLoc dl(N);
11077 EVT VT = N->getValueType(0);
11078 SDValue Op = N->getOperand(0);
11079 SDValue Mask = N->getOperand(1);
11080 SDValue EVL = N->getOperand(2);
11081 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11082 unsigned Sz = VT.getScalarSizeInBits();
11083
11084 SDValue Tmp, Tmp2, Tmp3;
11085
11086 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11087 // and finally the i1 pairs.
11088 // TODO: We can easily support i4/i2 legal types if any target ever does.
11089 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11090 // Create the masks - repeating the pattern every byte.
11091 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11092 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11093 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11094
11095 // BSWAP if the type is wider than a single byte.
11096 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11097
11098 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11099 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11100 Mask, EVL);
11101 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11102 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11103 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11104 Mask, EVL);
11105 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11106 Mask, EVL);
11107 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11108
11109 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11110 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11111 Mask, EVL);
11112 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11113 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11114 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11115 Mask, EVL);
11116 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11117 Mask, EVL);
11118 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11119
11120 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11121 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11122 Mask, EVL);
11123 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11124 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11125 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11126 Mask, EVL);
11127 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11128 Mask, EVL);
11129 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11130 return Tmp;
11131 }
11132 return SDValue();
11133}
11134
11135std::pair<SDValue, SDValue>
11137 SelectionDAG &DAG) const {
11138 SDLoc SL(LD);
11139 SDValue Chain = LD->getChain();
11140 SDValue BasePTR = LD->getBasePtr();
11141 EVT SrcVT = LD->getMemoryVT();
11142 EVT DstVT = LD->getValueType(0);
11143 ISD::LoadExtType ExtType = LD->getExtensionType();
11144
11145 if (SrcVT.isScalableVector())
11146 report_fatal_error("Cannot scalarize scalable vector loads");
11147
11148 unsigned NumElem = SrcVT.getVectorNumElements();
11149
11150 EVT SrcEltVT = SrcVT.getScalarType();
11151 EVT DstEltVT = DstVT.getScalarType();
11152
11153 // A vector must always be stored in memory as-is, i.e. without any padding
11154 // between the elements, since various code depend on it, e.g. in the
11155 // handling of a bitcast of a vector type to int, which may be done with a
11156 // vector store followed by an integer load. A vector that does not have
11157 // elements that are byte-sized must therefore be stored as an integer
11158 // built out of the extracted vector elements.
11159 if (!SrcEltVT.isByteSized()) {
11160 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11161 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11162
11163 unsigned NumSrcBits = SrcVT.getSizeInBits();
11164 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11165
11166 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11167 SDValue SrcEltBitMask = DAG.getConstant(
11168 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11169
11170 // Load the whole vector and avoid masking off the top bits as it makes
11171 // the codegen worse.
11172 SDValue Load =
11173 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11174 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11175 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11176
11178 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11179 unsigned ShiftIntoIdx =
11180 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11181 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11182 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11183 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11184 SDValue Elt =
11185 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11186 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11187
11188 if (ExtType != ISD::NON_EXTLOAD) {
11189 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11190 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11191 }
11192
11193 Vals.push_back(Scalar);
11194 }
11195
11196 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11197 return std::make_pair(Value, Load.getValue(1));
11198 }
11199
11200 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11201 assert(SrcEltVT.isByteSized());
11202
11204 SmallVector<SDValue, 8> LoadChains;
11205
11206 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11207 SDValue ScalarLoad = DAG.getExtLoad(
11208 ExtType, SL, DstEltVT, Chain, BasePTR,
11209 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11210 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11211
11212 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11213
11214 Vals.push_back(ScalarLoad.getValue(0));
11215 LoadChains.push_back(ScalarLoad.getValue(1));
11216 }
11217
11218 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11219 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11220
11221 return std::make_pair(Value, NewChain);
11222}
11223
11225 SelectionDAG &DAG) const {
11226 SDLoc SL(ST);
11227
11228 SDValue Chain = ST->getChain();
11229 SDValue BasePtr = ST->getBasePtr();
11230 SDValue Value = ST->getValue();
11231 EVT StVT = ST->getMemoryVT();
11232
11233 if (StVT.isScalableVector())
11234 report_fatal_error("Cannot scalarize scalable vector stores");
11235
11236 // The type of the data we want to save
11237 EVT RegVT = Value.getValueType();
11238 EVT RegSclVT = RegVT.getScalarType();
11239
11240 // The type of data as saved in memory.
11241 EVT MemSclVT = StVT.getScalarType();
11242
11243 unsigned NumElem = StVT.getVectorNumElements();
11244
11245 // A vector must always be stored in memory as-is, i.e. without any padding
11246 // between the elements, since various code depend on it, e.g. in the
11247 // handling of a bitcast of a vector type to int, which may be done with a
11248 // vector store followed by an integer load. A vector that does not have
11249 // elements that are byte-sized must therefore be stored as an integer
11250 // built out of the extracted vector elements.
11251 if (!MemSclVT.isByteSized()) {
11252 unsigned NumBits = StVT.getSizeInBits();
11253 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11254
11255 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11256
11257 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11258 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11259 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11260 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11261 unsigned ShiftIntoIdx =
11262 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11263 SDValue ShiftAmount =
11264 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11265 SDValue ShiftedElt =
11266 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11267 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11268 }
11269
11270 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11271 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11272 ST->getAAInfo());
11273 }
11274
11275 // Store Stride in bytes
11276 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11277 assert(Stride && "Zero stride!");
11278 // Extract each of the elements from the original vector and save them into
11279 // memory individually.
11281 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11282 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11283
11284 SDValue Ptr =
11285 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11286
11287 // This scalar TruncStore may be illegal, but we legalize it later.
11288 SDValue Store = DAG.getTruncStore(
11289 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11290 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11291 ST->getAAInfo());
11292
11293 Stores.push_back(Store);
11294 }
11295
11296 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11297}
11298
11299std::pair<SDValue, SDValue>
11301 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11302 "unaligned indexed loads not implemented!");
11303 SDValue Chain = LD->getChain();
11304 SDValue Ptr = LD->getBasePtr();
11305 EVT VT = LD->getValueType(0);
11306 EVT LoadedVT = LD->getMemoryVT();
11307 SDLoc dl(LD);
11308 auto &MF = DAG.getMachineFunction();
11309
11310 if (VT.isFloatingPoint() || VT.isVector()) {
11311 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11312 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11313 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11314 LoadedVT.isVector()) {
11315 // Scalarize the load and let the individual components be handled.
11316 return scalarizeVectorLoad(LD, DAG);
11317 }
11318
11319 // Expand to a (misaligned) integer load of the same size,
11320 // then bitconvert to floating point or vector.
11321 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11322 LD->getMemOperand());
11323 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11324 if (LoadedVT != VT)
11325 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11326 ISD::ANY_EXTEND, dl, VT, Result);
11327
11328 return std::make_pair(Result, newLoad.getValue(1));
11329 }
11330
11331 // Copy the value to a (aligned) stack slot using (unaligned) integer
11332 // loads and stores, then do a (aligned) load from the stack slot.
11333 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11334 unsigned LoadedBytes = LoadedVT.getStoreSize();
11335 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11336 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11337
11338 // Make sure the stack slot is also aligned for the register type.
11339 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11340 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11342 SDValue StackPtr = StackBase;
11343 unsigned Offset = 0;
11344
11345 EVT PtrVT = Ptr.getValueType();
11346 EVT StackPtrVT = StackPtr.getValueType();
11347
11348 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11349 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11350
11351 // Do all but one copies using the full register width.
11352 for (unsigned i = 1; i < NumRegs; i++) {
11353 // Load one integer register's worth from the original location.
11354 SDValue Load = DAG.getLoad(
11355 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11356 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11357 // Follow the load with a store to the stack slot. Remember the store.
11358 Stores.push_back(DAG.getStore(
11359 Load.getValue(1), dl, Load, StackPtr,
11360 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11361 // Increment the pointers.
11362 Offset += RegBytes;
11363
11364 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11365 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11366 }
11367
11368 // The last copy may be partial. Do an extending load.
11369 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11370 8 * (LoadedBytes - Offset));
11371 SDValue Load = DAG.getExtLoad(
11372 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11373 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11374 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11375 // Follow the load with a store to the stack slot. Remember the store.
11376 // On big-endian machines this requires a truncating store to ensure
11377 // that the bits end up in the right place.
11378 Stores.push_back(DAG.getTruncStore(
11379 Load.getValue(1), dl, Load, StackPtr,
11380 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11381
11382 // The order of the stores doesn't matter - say it with a TokenFactor.
11383 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11384
11385 // Finally, perform the original load only redirected to the stack slot.
11386 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11387 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11388 LoadedVT);
11389
11390 // Callers expect a MERGE_VALUES node.
11391 return std::make_pair(Load, TF);
11392 }
11393
11394 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11395 "Unaligned load of unsupported type.");
11396
11397 // Compute the new VT that is half the size of the old one. This is an
11398 // integer MVT.
11399 unsigned NumBits = LoadedVT.getSizeInBits();
11400 EVT NewLoadedVT;
11401 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11402 NumBits >>= 1;
11403
11404 Align Alignment = LD->getBaseAlign();
11405 unsigned IncrementSize = NumBits / 8;
11406 ISD::LoadExtType HiExtType = LD->getExtensionType();
11407
11408 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11409 if (HiExtType == ISD::NON_EXTLOAD)
11410 HiExtType = ISD::ZEXTLOAD;
11411
11412 // Load the value in two parts
11413 SDValue Lo, Hi;
11414 if (DAG.getDataLayout().isLittleEndian()) {
11415 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11416 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11417 LD->getAAInfo());
11418
11419 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11420 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11421 LD->getPointerInfo().getWithOffset(IncrementSize),
11422 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11423 LD->getAAInfo());
11424 } else {
11425 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11426 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11427 LD->getAAInfo());
11428
11429 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11430 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11431 LD->getPointerInfo().getWithOffset(IncrementSize),
11432 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11433 LD->getAAInfo());
11434 }
11435
11436 // aggregate the two parts
11437 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11438 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11439 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11440
11441 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11442 Hi.getValue(1));
11443
11444 return std::make_pair(Result, TF);
11445}
11446
11448 SelectionDAG &DAG) const {
11449 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11450 "unaligned indexed stores not implemented!");
11451 SDValue Chain = ST->getChain();
11452 SDValue Ptr = ST->getBasePtr();
11453 SDValue Val = ST->getValue();
11454 EVT VT = Val.getValueType();
11455 Align Alignment = ST->getBaseAlign();
11456 auto &MF = DAG.getMachineFunction();
11457 EVT StoreMemVT = ST->getMemoryVT();
11458
11459 SDLoc dl(ST);
11460 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11461 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11462 if (isTypeLegal(intVT)) {
11463 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11464 StoreMemVT.isVector()) {
11465 // Scalarize the store and let the individual components be handled.
11466 SDValue Result = scalarizeVectorStore(ST, DAG);
11467 return Result;
11468 }
11469 // Expand to a bitconvert of the value to the integer type of the
11470 // same size, then a (misaligned) int store.
11471 // FIXME: Does not handle truncating floating point stores!
11472 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11473 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11474 Alignment, ST->getMemOperand()->getFlags());
11475 return Result;
11476 }
11477 // Do a (aligned) store to a stack slot, then copy from the stack slot
11478 // to the final destination using (unaligned) integer loads and stores.
11479 MVT RegVT = getRegisterType(
11480 *DAG.getContext(),
11481 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11482 EVT PtrVT = Ptr.getValueType();
11483 unsigned StoredBytes = StoreMemVT.getStoreSize();
11484 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11485 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11486
11487 // Make sure the stack slot is also aligned for the register type.
11488 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11489 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11490
11491 // Perform the original store, only redirected to the stack slot.
11492 SDValue Store = DAG.getTruncStore(
11493 Chain, dl, Val, StackPtr,
11494 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11495
11496 EVT StackPtrVT = StackPtr.getValueType();
11497
11498 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11499 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11501 unsigned Offset = 0;
11502
11503 // Do all but one copies using the full register width.
11504 for (unsigned i = 1; i < NumRegs; i++) {
11505 // Load one integer register's worth from the stack slot.
11506 SDValue Load = DAG.getLoad(
11507 RegVT, dl, Store, StackPtr,
11508 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11509 // Store it to the final location. Remember the store.
11510 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11511 ST->getPointerInfo().getWithOffset(Offset),
11512 ST->getBaseAlign(),
11513 ST->getMemOperand()->getFlags()));
11514 // Increment the pointers.
11515 Offset += RegBytes;
11516 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11517 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11518 }
11519
11520 // The last store may be partial. Do a truncating store. On big-endian
11521 // machines this requires an extending load from the stack slot to ensure
11522 // that the bits are in the right place.
11523 EVT LoadMemVT =
11524 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11525
11526 // Load from the stack slot.
11527 SDValue Load = DAG.getExtLoad(
11528 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11529 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11530
11531 Stores.push_back(DAG.getTruncStore(
11532 Load.getValue(1), dl, Load, Ptr,
11533 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11534 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11535 // The order of the stores doesn't matter - say it with a TokenFactor.
11536 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11537 return Result;
11538 }
11539
11540 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11541 "Unaligned store of unknown type.");
11542 // Get the half-size VT
11543 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11544 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11545 unsigned IncrementSize = NumBits / 8;
11546
11547 // Divide the stored value in two parts.
11548 SDValue ShiftAmount =
11549 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11550 SDValue Lo = Val;
11551 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11552 // fold and not use the upper bits. A smaller constant may be easier to
11553 // materialize.
11554 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11555 Lo = DAG.getNode(
11556 ISD::AND, dl, VT, Lo,
11557 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11558 VT));
11559 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11560
11561 // Store the two parts
11562 SDValue Store1, Store2;
11563 Store1 = DAG.getTruncStore(Chain, dl,
11564 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11565 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11566 ST->getMemOperand()->getFlags());
11567
11568 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11569 Store2 = DAG.getTruncStore(
11570 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11571 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11572 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11573
11574 SDValue Result =
11575 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11576 return Result;
11577}
11578
11579SDValue
11581 const SDLoc &DL, EVT DataVT,
11582 SelectionDAG &DAG,
11583 bool IsCompressedMemory) const {
11585 EVT AddrVT = Addr.getValueType();
11586 EVT MaskVT = Mask.getValueType();
11587 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11588 "Incompatible types of Data and Mask");
11589 if (IsCompressedMemory) {
11590 // Incrementing the pointer according to number of '1's in the mask.
11591 if (DataVT.isScalableVector()) {
11592 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11593 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11594 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11595 } else {
11596 EVT MaskIntVT =
11597 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11598 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11599 if (MaskIntVT.getSizeInBits() < 32) {
11600 MaskInIntReg =
11601 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11602 MaskIntVT = MVT::i32;
11603 }
11604 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11605 }
11606 // Scale is an element size in bytes.
11607 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11608 AddrVT);
11609 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11610 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11611 } else
11612 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11613
11614 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11615}
11616
11618 EVT VecVT, const SDLoc &dl,
11619 ElementCount SubEC) {
11620 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11621 "Cannot index a scalable vector within a fixed-width vector");
11622
11623 unsigned NElts = VecVT.getVectorMinNumElements();
11624 unsigned NumSubElts = SubEC.getKnownMinValue();
11625 EVT IdxVT = Idx.getValueType();
11626
11627 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11628 // If this is a constant index and we know the value plus the number of the
11629 // elements in the subvector minus one is less than the minimum number of
11630 // elements then it's safe to return Idx.
11631 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11632 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11633 return Idx;
11634 SDValue VS =
11635 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11636 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11637 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11638 DAG.getConstant(NumSubElts, dl, IdxVT));
11639 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11640 }
11641 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11642 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11643 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11644 DAG.getConstant(Imm, dl, IdxVT));
11645 }
11646 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11647 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11648 DAG.getConstant(MaxIndex, dl, IdxVT));
11649}
11650
11651SDValue
11653 EVT VecVT, SDValue Index,
11654 const SDNodeFlags PtrArithFlags) const {
11656 DAG, VecPtr, VecVT,
11658 Index, PtrArithFlags);
11659}
11660
11661SDValue
11663 EVT VecVT, EVT SubVecVT, SDValue Index,
11664 const SDNodeFlags PtrArithFlags) const {
11665 SDLoc dl(Index);
11666 // Make sure the index type is big enough to compute in.
11667 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11668
11669 EVT EltVT = VecVT.getVectorElementType();
11670
11671 // Calculate the element offset and add it to the pointer.
11672 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11673 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11674 "Converting bits to bytes lost precision");
11675 assert(SubVecVT.getVectorElementType() == EltVT &&
11676 "Sub-vector must be a vector with matching element type");
11677 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11678 SubVecVT.getVectorElementCount());
11679
11680 EVT IdxVT = Index.getValueType();
11681 if (SubVecVT.isScalableVector())
11682 Index =
11683 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11684 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11685
11686 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11687 DAG.getConstant(EltSize, dl, IdxVT));
11688 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11689}
11690
11691//===----------------------------------------------------------------------===//
11692// Implementation of Emulated TLS Model
11693//===----------------------------------------------------------------------===//
11694
11696 SelectionDAG &DAG) const {
11697 // Access to address of TLS varialbe xyz is lowered to a function call:
11698 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11699 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11700 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11701 SDLoc dl(GA);
11702
11703 ArgListTy Args;
11704 const GlobalValue *GV =
11706 SmallString<32> NameString("__emutls_v.");
11707 NameString += GV->getName();
11708 StringRef EmuTlsVarName(NameString);
11709 const GlobalVariable *EmuTlsVar =
11710 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11711 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11712 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11713
11714 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11715
11717 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11718 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11719 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11720
11721 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11722 // At last for X86 targets, maybe good for other targets too?
11724 MFI.setAdjustsStack(true); // Is this only for X86 target?
11725 MFI.setHasCalls(true);
11726
11727 assert((GA->getOffset() == 0) &&
11728 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11729 return CallResult.first;
11730}
11731
11733 SelectionDAG &DAG) const {
11734 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11735 if (!isCtlzFast())
11736 return SDValue();
11737 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11738 SDLoc dl(Op);
11739 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11740 EVT VT = Op.getOperand(0).getValueType();
11741 SDValue Zext = Op.getOperand(0);
11742 if (VT.bitsLT(MVT::i32)) {
11743 VT = MVT::i32;
11744 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11745 }
11746 unsigned Log2b = Log2_32(VT.getSizeInBits());
11747 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11748 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11749 DAG.getConstant(Log2b, dl, MVT::i32));
11750 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11751 }
11752 return SDValue();
11753}
11754
11756 SDValue Op0 = Node->getOperand(0);
11757 SDValue Op1 = Node->getOperand(1);
11758 EVT VT = Op0.getValueType();
11759 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11760 unsigned Opcode = Node->getOpcode();
11761 SDLoc DL(Node);
11762
11763 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11764 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11765 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11766 DAG.SignBitIsZero(Op1))
11767 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11768
11769 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11770 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11772 Op0 = DAG.getFreeze(Op0);
11773 SDValue Zero = DAG.getConstant(0, DL, VT);
11774 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11775 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11776 }
11777
11778 // umin(x,y) -> sub(x,usubsat(x,y))
11779 // TODO: Missing freeze(Op0)?
11780 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11782 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11783 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11784 }
11785
11786 // umax(x,y) -> add(x,usubsat(y,x))
11787 // TODO: Missing freeze(Op0)?
11788 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11790 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11791 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11792 }
11793
11794 // FIXME: Should really try to split the vector in case it's legal on a
11795 // subvector.
11797 return DAG.UnrollVectorOp(Node);
11798
11799 // Attempt to find an existing SETCC node that we can reuse.
11800 // TODO: Do we need a generic doesSETCCNodeExist?
11801 // TODO: Missing freeze(Op0)/freeze(Op1)?
11802 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11803 ISD::CondCode PrefCommuteCC,
11804 ISD::CondCode AltCommuteCC) {
11805 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11806 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11807 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11808 {Op0, Op1, DAG.getCondCode(CC)})) {
11809 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11810 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11811 }
11812 }
11813 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11814 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11815 {Op0, Op1, DAG.getCondCode(CC)})) {
11816 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11817 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11818 }
11819 }
11820 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11821 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11822 };
11823
11824 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11825 // -> Y = (A < B) ? B : A
11826 // -> Y = (A >= B) ? A : B
11827 // -> Y = (A <= B) ? B : A
11828 switch (Opcode) {
11829 case ISD::SMAX:
11830 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11831 case ISD::SMIN:
11832 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11833 case ISD::UMAX:
11834 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11835 case ISD::UMIN:
11836 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11837 }
11838
11839 llvm_unreachable("How did we get here?");
11840}
11841
11843 unsigned Opcode = Node->getOpcode();
11844 SDValue LHS = Node->getOperand(0);
11845 SDValue RHS = Node->getOperand(1);
11846 EVT VT = LHS.getValueType();
11847 SDLoc dl(Node);
11848
11849 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11850 assert(VT.isInteger() && "Expected operands to be integers");
11851
11852 // usub.sat(a, b) -> umax(a, b) - b
11853 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11854 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11855 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11856 }
11857
11858 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11859 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
11860 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11862 LHS = DAG.getFreeze(LHS);
11863 SDValue Zero = DAG.getConstant(0, dl, VT);
11864 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11865 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11866 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11867 Subtrahend =
11868 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11869 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11870 }
11871
11872 // uadd.sat(a, b) -> umin(a, ~b) + b
11873 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11874 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11875 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11876 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11877 }
11878
11879 unsigned OverflowOp;
11880 switch (Opcode) {
11881 case ISD::SADDSAT:
11882 OverflowOp = ISD::SADDO;
11883 break;
11884 case ISD::UADDSAT:
11885 OverflowOp = ISD::UADDO;
11886 break;
11887 case ISD::SSUBSAT:
11888 OverflowOp = ISD::SSUBO;
11889 break;
11890 case ISD::USUBSAT:
11891 OverflowOp = ISD::USUBO;
11892 break;
11893 default:
11894 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11895 "addition or subtraction node.");
11896 }
11897
11898 // FIXME: Should really try to split the vector in case it's legal on a
11899 // subvector.
11901 return DAG.UnrollVectorOp(Node);
11902
11903 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11904 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11905 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11906 SDValue SumDiff = Result.getValue(0);
11907 SDValue Overflow = Result.getValue(1);
11908 SDValue Zero = DAG.getConstant(0, dl, VT);
11909 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11910
11911 if (Opcode == ISD::UADDSAT) {
11913 // (LHS + RHS) | OverflowMask
11914 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11915 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11916 }
11917 // Overflow ? 0xffff.... : (LHS + RHS)
11918 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11919 }
11920
11921 if (Opcode == ISD::USUBSAT) {
11923 // (LHS - RHS) & ~OverflowMask
11924 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11925 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11926 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11927 }
11928 // Overflow ? 0 : (LHS - RHS)
11929 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11930 }
11931
11932 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11933 "Expected signed saturating add/sub opcode");
11934
11935 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11936 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11937
11938 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11939 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11940
11941 // If either of the operand signs are known, then they are guaranteed to
11942 // only saturate in one direction. If non-negative they will saturate
11943 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11944 //
11945 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11946 // sign of 'y' has to be flipped.
11947
11948 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11949 bool RHSIsNonNegative =
11950 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11951 if (LHSIsNonNegative || RHSIsNonNegative) {
11952 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11953 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11954 }
11955
11956 bool LHSIsNegative = KnownLHS.isNegative();
11957 bool RHSIsNegative =
11958 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11959 if (LHSIsNegative || RHSIsNegative) {
11960 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11961 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11962 }
11963
11964 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11965 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11966 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11967 DAG.getConstant(BitWidth - 1, dl, VT));
11968 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11969 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11970}
11971
11973 unsigned Opcode = Node->getOpcode();
11974 SDValue LHS = Node->getOperand(0);
11975 SDValue RHS = Node->getOperand(1);
11976 EVT VT = LHS.getValueType();
11977 EVT ResVT = Node->getValueType(0);
11978 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11979 SDLoc dl(Node);
11980
11981 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11982 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11983 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11984 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11985
11986 // We can't perform arithmetic on i1 values. Extending them would
11987 // probably result in worse codegen, so let's just use two selects instead.
11988 // Some targets are also just better off using selects rather than subtraction
11989 // because one of the conditions can be merged with one of the selects.
11990 // And finally, if we don't know the contents of high bits of a boolean value
11991 // we can't perform any arithmetic either.
11993 BoolVT.getScalarSizeInBits() == 1 ||
11995 SDValue SelectZeroOrOne =
11996 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11997 DAG.getConstant(0, dl, ResVT));
11998 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11999 SelectZeroOrOne);
12000 }
12001
12003 std::swap(IsGT, IsLT);
12004 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
12005 ResVT);
12006}
12007
12009 unsigned Opcode = Node->getOpcode();
12010 bool IsSigned = Opcode == ISD::SSHLSAT;
12011 SDValue LHS = Node->getOperand(0);
12012 SDValue RHS = Node->getOperand(1);
12013 EVT VT = LHS.getValueType();
12014 SDLoc dl(Node);
12015
12016 assert((Node->getOpcode() == ISD::SSHLSAT ||
12017 Node->getOpcode() == ISD::USHLSAT) &&
12018 "Expected a SHLSAT opcode");
12019 assert(VT.isInteger() && "Expected operands to be integers");
12020
12022 return DAG.UnrollVectorOp(Node);
12023
12024 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12025
12026 unsigned BW = VT.getScalarSizeInBits();
12027 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12028 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12029 SDValue Orig =
12030 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12031
12032 SDValue SatVal;
12033 if (IsSigned) {
12034 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12035 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12036 SDValue Cond =
12037 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12038 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12039 } else {
12040 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12041 }
12042 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12043 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12044}
12045
12047 bool Signed, SDValue &Lo, SDValue &Hi,
12048 SDValue LHS, SDValue RHS,
12049 SDValue HiLHS, SDValue HiRHS) const {
12050 EVT VT = LHS.getValueType();
12051 assert(RHS.getValueType() == VT && "Mismatching operand types");
12052
12053 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12054 assert((!Signed || !HiLHS) &&
12055 "Signed flag should only be set when HiLHS and RiRHS are null");
12056
12057 // We'll expand the multiplication by brute force because we have no other
12058 // options. This is a trivially-generalized version of the code from
12059 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12060 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12061 // sign bits while calculating the Hi half.
12062 unsigned Bits = VT.getSizeInBits();
12063 unsigned HalfBits = Bits / 2;
12064 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12065 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12066 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12067
12068 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12069 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12070
12071 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12072 // This is always an unsigned shift.
12073 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12074
12075 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12076 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12077 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12078
12079 SDValue U =
12080 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12081 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12082 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12083
12084 SDValue V =
12085 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12086 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12087
12088 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12089 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12090
12091 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12092 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12093
12094 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12095 // the products to Hi.
12096 if (HiLHS) {
12097 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12098 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12099 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12100 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12101 }
12102}
12103
12105 bool Signed, const SDValue LHS,
12106 const SDValue RHS, SDValue &Lo,
12107 SDValue &Hi) const {
12108 EVT VT = LHS.getValueType();
12109 assert(RHS.getValueType() == VT && "Mismatching operand types");
12110 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12111 // We can fall back to a libcall with an illegal type for the MUL if we
12112 // have a libcall big enough.
12113 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12114 if (WideVT == MVT::i16)
12115 LC = RTLIB::MUL_I16;
12116 else if (WideVT == MVT::i32)
12117 LC = RTLIB::MUL_I32;
12118 else if (WideVT == MVT::i64)
12119 LC = RTLIB::MUL_I64;
12120 else if (WideVT == MVT::i128)
12121 LC = RTLIB::MUL_I128;
12122
12123 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12124 if (LibcallImpl == RTLIB::Unsupported) {
12125 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12126 return;
12127 }
12128
12129 SDValue HiLHS, HiRHS;
12130 if (Signed) {
12131 // The high part is obtained by SRA'ing all but one of the bits of low
12132 // part.
12133 unsigned LoSize = VT.getFixedSizeInBits();
12134 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12135 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12136 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12137 } else {
12138 HiLHS = DAG.getConstant(0, dl, VT);
12139 HiRHS = DAG.getConstant(0, dl, VT);
12140 }
12141
12142 // Attempt a libcall.
12143 SDValue Ret;
12145 CallOptions.setIsSigned(Signed);
12146 CallOptions.setIsPostTypeLegalization(true);
12148 // Halves of WideVT are packed into registers in different order
12149 // depending on platform endianness. This is usually handled by
12150 // the C calling convention, but we can't defer to it in
12151 // the legalizer.
12152 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12153 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12154 } else {
12155 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12156 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12157 }
12159 "Ret value is a collection of constituent nodes holding result.");
12160 if (DAG.getDataLayout().isLittleEndian()) {
12161 // Same as above.
12162 Lo = Ret.getOperand(0);
12163 Hi = Ret.getOperand(1);
12164 } else {
12165 Lo = Ret.getOperand(1);
12166 Hi = Ret.getOperand(0);
12167 }
12168}
12169
12170SDValue
12172 assert((Node->getOpcode() == ISD::SMULFIX ||
12173 Node->getOpcode() == ISD::UMULFIX ||
12174 Node->getOpcode() == ISD::SMULFIXSAT ||
12175 Node->getOpcode() == ISD::UMULFIXSAT) &&
12176 "Expected a fixed point multiplication opcode");
12177
12178 SDLoc dl(Node);
12179 SDValue LHS = Node->getOperand(0);
12180 SDValue RHS = Node->getOperand(1);
12181 EVT VT = LHS.getValueType();
12182 unsigned Scale = Node->getConstantOperandVal(2);
12183 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12184 Node->getOpcode() == ISD::UMULFIXSAT);
12185 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12186 Node->getOpcode() == ISD::SMULFIXSAT);
12187 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12188 unsigned VTSize = VT.getScalarSizeInBits();
12189
12190 if (!Scale) {
12191 // [us]mul.fix(a, b, 0) -> mul(a, b)
12192 if (!Saturating) {
12194 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12195 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12196 SDValue Result =
12197 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12198 SDValue Product = Result.getValue(0);
12199 SDValue Overflow = Result.getValue(1);
12200 SDValue Zero = DAG.getConstant(0, dl, VT);
12201
12202 APInt MinVal = APInt::getSignedMinValue(VTSize);
12203 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12204 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12205 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12206 // Xor the inputs, if resulting sign bit is 0 the product will be
12207 // positive, else negative.
12208 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12209 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12210 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12211 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12212 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12213 SDValue Result =
12214 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12215 SDValue Product = Result.getValue(0);
12216 SDValue Overflow = Result.getValue(1);
12217
12218 APInt MaxVal = APInt::getMaxValue(VTSize);
12219 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12220 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12221 }
12222 }
12223
12224 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12225 "Expected scale to be less than the number of bits if signed or at "
12226 "most the number of bits if unsigned.");
12227 assert(LHS.getValueType() == RHS.getValueType() &&
12228 "Expected both operands to be the same type");
12229
12230 // Get the upper and lower bits of the result.
12231 SDValue Lo, Hi;
12232 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12233 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12234 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12235 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12236 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12237 Lo = Result.getValue(0);
12238 Hi = Result.getValue(1);
12239 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12240 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12241 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12242 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12243 // Try for a multiplication using a wider type.
12244 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12245 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12246 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12247 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12248 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12249 SDValue Shifted =
12250 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12251 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12252 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12253 } else if (VT.isVector()) {
12254 return SDValue();
12255 } else {
12256 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12257 }
12258
12259 if (Scale == VTSize)
12260 // Result is just the top half since we'd be shifting by the width of the
12261 // operand. Overflow impossible so this works for both UMULFIX and
12262 // UMULFIXSAT.
12263 return Hi;
12264
12265 // The result will need to be shifted right by the scale since both operands
12266 // are scaled. The result is given to us in 2 halves, so we only want part of
12267 // both in the result.
12268 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12269 DAG.getShiftAmountConstant(Scale, VT, dl));
12270 if (!Saturating)
12271 return Result;
12272
12273 if (!Signed) {
12274 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12275 // widened multiplication) aren't all zeroes.
12276
12277 // Saturate to max if ((Hi >> Scale) != 0),
12278 // which is the same as if (Hi > ((1 << Scale) - 1))
12279 APInt MaxVal = APInt::getMaxValue(VTSize);
12280 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12281 dl, VT);
12282 Result = DAG.getSelectCC(dl, Hi, LowMask,
12283 DAG.getConstant(MaxVal, dl, VT), Result,
12284 ISD::SETUGT);
12285
12286 return Result;
12287 }
12288
12289 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12290 // widened multiplication) aren't all ones or all zeroes.
12291
12292 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12293 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12294
12295 if (Scale == 0) {
12296 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12297 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12298 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12299 // Saturated to SatMin if wide product is negative, and SatMax if wide
12300 // product is positive ...
12301 SDValue Zero = DAG.getConstant(0, dl, VT);
12302 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12303 ISD::SETLT);
12304 // ... but only if we overflowed.
12305 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12306 }
12307
12308 // We handled Scale==0 above so all the bits to examine is in Hi.
12309
12310 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12311 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12312 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12313 dl, VT);
12314 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12315 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12316 // which is the same as if (HI < (-1 << (Scale - 1))
12317 SDValue HighMask =
12318 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12319 dl, VT);
12320 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12321 return Result;
12322}
12323
12324SDValue
12326 SDValue LHS, SDValue RHS,
12327 unsigned Scale, SelectionDAG &DAG) const {
12328 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12329 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12330 "Expected a fixed point division opcode");
12331
12332 EVT VT = LHS.getValueType();
12333 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12334 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12335 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12336
12337 // If there is enough room in the type to upscale the LHS or downscale the
12338 // RHS before the division, we can perform it in this type without having to
12339 // resize. For signed operations, the LHS headroom is the number of
12340 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12341 // The headroom for the RHS is the number of trailing zeroes.
12342 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12344 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12345
12346 // For signed saturating operations, we need to be able to detect true integer
12347 // division overflow; that is, when you have MIN / -EPS. However, this
12348 // is undefined behavior and if we emit divisions that could take such
12349 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12350 // example).
12351 // Avoid this by requiring an extra bit so that we never get this case.
12352 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12353 // signed saturating division, we need to emit a whopping 32-bit division.
12354 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12355 return SDValue();
12356
12357 unsigned LHSShift = std::min(LHSLead, Scale);
12358 unsigned RHSShift = Scale - LHSShift;
12359
12360 // At this point, we know that if we shift the LHS up by LHSShift and the
12361 // RHS down by RHSShift, we can emit a regular division with a final scaling
12362 // factor of Scale.
12363
12364 if (LHSShift)
12365 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12366 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12367 if (RHSShift)
12368 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12369 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12370
12371 SDValue Quot;
12372 if (Signed) {
12373 // For signed operations, if the resulting quotient is negative and the
12374 // remainder is nonzero, subtract 1 from the quotient to round towards
12375 // negative infinity.
12376 SDValue Rem;
12377 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12378 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12379 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12380 if (isTypeLegal(VT) &&
12382 Quot = DAG.getNode(ISD::SDIVREM, dl,
12383 DAG.getVTList(VT, VT),
12384 LHS, RHS);
12385 Rem = Quot.getValue(1);
12386 Quot = Quot.getValue(0);
12387 } else {
12388 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12389 LHS, RHS);
12390 Rem = DAG.getNode(ISD::SREM, dl, VT,
12391 LHS, RHS);
12392 }
12393 SDValue Zero = DAG.getConstant(0, dl, VT);
12394 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12395 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12396 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12397 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12398 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12399 DAG.getConstant(1, dl, VT));
12400 Quot = DAG.getSelect(dl, VT,
12401 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12402 Sub1, Quot);
12403 } else
12404 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12405 LHS, RHS);
12406
12407 return Quot;
12408}
12409
12411 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12412 SDLoc dl(Node);
12413 SDValue LHS = Node->getOperand(0);
12414 SDValue RHS = Node->getOperand(1);
12415 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12416
12417 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12418 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12419 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12420 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12421 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12422 { LHS, RHS, CarryIn });
12423 Result = SDValue(NodeCarry.getNode(), 0);
12424 Overflow = SDValue(NodeCarry.getNode(), 1);
12425 return;
12426 }
12427
12428 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12429 LHS.getValueType(), LHS, RHS);
12430
12431 EVT ResultType = Node->getValueType(1);
12432 EVT SetCCType = getSetCCResultType(
12433 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12434 SDValue SetCC;
12435 if (IsAdd && isOneConstant(RHS)) {
12436 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12437 // the live range of X. We assume comparing with 0 is cheap.
12438 // The general case (X + C) < C is not necessarily beneficial. Although we
12439 // reduce the live range of X, we may introduce the materialization of
12440 // constant C.
12441 SetCC =
12442 DAG.getSetCC(dl, SetCCType, Result,
12443 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12444 } else if (IsAdd && isAllOnesConstant(RHS)) {
12445 // Special case: uaddo X, -1 overflows if X != 0.
12446 SetCC =
12447 DAG.getSetCC(dl, SetCCType, LHS,
12448 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12449 } else {
12450 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12451 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12452 }
12453 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12454}
12455
12457 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12458 SDLoc dl(Node);
12459 SDValue LHS = Node->getOperand(0);
12460 SDValue RHS = Node->getOperand(1);
12461 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12462
12463 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12464 LHS.getValueType(), LHS, RHS);
12465
12466 EVT ResultType = Node->getValueType(1);
12467 EVT OType = getSetCCResultType(
12468 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12469
12470 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12471 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12472 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12473 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12474 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12475 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12476 return;
12477 }
12478
12479 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12480
12481 if (IsAdd) {
12482 // For an addition, the result should be less than one of the operands (LHS)
12483 // if and only if the other operand (RHS) is negative, otherwise there will
12484 // be overflow.
12485 SDValue ResultLowerThanLHS =
12486 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12487 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
12488 Overflow = DAG.getBoolExtOrTrunc(
12489 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
12490 ResultType, ResultType);
12491 } else {
12492 // For subtraction, overflow occurs when the signed comparison of operands
12493 // doesn't match the sign of the result.
12494 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
12495 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
12496 Overflow = DAG.getBoolExtOrTrunc(
12497 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
12498 ResultType, ResultType);
12499 }
12500}
12501
12503 SDValue &Overflow, SelectionDAG &DAG) const {
12504 SDLoc dl(Node);
12505 EVT VT = Node->getValueType(0);
12506 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12507 SDValue LHS = Node->getOperand(0);
12508 SDValue RHS = Node->getOperand(1);
12509 bool isSigned = Node->getOpcode() == ISD::SMULO;
12510
12511 // For power-of-two multiplications we can use a simpler shift expansion.
12512 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12513 const APInt &C = RHSC->getAPIntValue();
12514 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12515 if (C.isPowerOf2()) {
12516 // smulo(x, signed_min) is same as umulo(x, signed_min).
12517 bool UseArithShift = isSigned && !C.isMinSignedValue();
12518 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12519 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12520 Overflow = DAG.getSetCC(dl, SetCCVT,
12521 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12522 dl, VT, Result, ShiftAmt),
12523 LHS, ISD::SETNE);
12524 return true;
12525 }
12526 }
12527
12528 SDValue BottomHalf;
12529 SDValue TopHalf;
12530 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12531
12532 static const unsigned Ops[2][3] =
12535 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12536 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12537 RHS);
12538 TopHalf = BottomHalf.getValue(1);
12539 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12540 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12541 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12542 } else if (isTypeLegal(WideVT)) {
12543 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12544 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12545 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12546 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12547 SDValue ShiftAmt =
12548 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12549 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12550 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12551 } else {
12552 if (VT.isVector())
12553 return false;
12554
12555 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12556 }
12557
12558 Result = BottomHalf;
12559 if (isSigned) {
12560 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12561 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12562 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12563 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12564 } else {
12565 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12566 DAG.getConstant(0, dl, VT), ISD::SETNE);
12567 }
12568
12569 // Truncate the result if SetCC returns a larger type than needed.
12570 EVT RType = Node->getValueType(1);
12571 if (RType.bitsLT(Overflow.getValueType()))
12572 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12573
12574 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12575 "Unexpected result type for S/UMULO legalization");
12576 return true;
12577}
12578
12580 SDLoc dl(Node);
12581 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12582 SDValue Op = Node->getOperand(0);
12583 SDNodeFlags Flags = Node->getFlags();
12584 EVT VT = Op.getValueType();
12585
12586 // Try to use a shuffle reduction for power of two vectors.
12587 if (VT.isPow2VectorType()) {
12588 // See if the reduction opcode is safe to use with widened types.
12589 bool WidenSrc = false;
12590 switch (Node->getOpcode()) {
12593 case ISD::VECREDUCE_ADD:
12594 case ISD::VECREDUCE_MUL:
12595 case ISD::VECREDUCE_AND:
12596 case ISD::VECREDUCE_OR:
12597 case ISD::VECREDUCE_XOR:
12602 WidenSrc = VT.isFixedLengthVector();
12603 break;
12604 }
12605
12607 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12608 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
12609 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
12610 // Attempt to widen the source vectors to a legal op.
12611 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
12612 if (WideVT.isVector() &&
12613 WideVT.getScalarType() == HalfVT.getScalarType() &&
12614 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
12615 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
12616 SDValue Lo, Hi;
12617 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12618 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
12619 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
12620 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
12621 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
12622 VT = HalfVT;
12623 continue;
12624 }
12625 }
12626 break;
12627 }
12628
12629 SDValue Lo, Hi;
12630 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12631 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
12632 VT = HalfVT;
12633
12634 // Stop if splitting is enough to make the reduction legal.
12635 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12636 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12637 Flags);
12638 }
12639 }
12640
12641 if (VT.isScalableVector())
12643 "Expanding reductions for scalable vectors is undefined.");
12644
12645 EVT EltVT = VT.getVectorElementType();
12646 unsigned NumElts = VT.getVectorNumElements();
12647
12649 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12650
12651 SDValue Res = Ops[0];
12652 for (unsigned i = 1; i < NumElts; i++)
12653 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12654
12655 // Result type may be wider than element type.
12656 if (EltVT != Node->getValueType(0))
12657 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12658 return Res;
12659}
12660
12662 SDLoc dl(Node);
12663 SDValue AccOp = Node->getOperand(0);
12664 SDValue VecOp = Node->getOperand(1);
12665 SDNodeFlags Flags = Node->getFlags();
12666
12667 EVT VT = VecOp.getValueType();
12668 EVT EltVT = VT.getVectorElementType();
12669
12670 if (VT.isScalableVector())
12672 "Expanding reductions for scalable vectors is undefined.");
12673
12674 unsigned NumElts = VT.getVectorNumElements();
12675
12677 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12678
12679 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12680
12681 SDValue Res = AccOp;
12682 for (unsigned i = 0; i < NumElts; i++)
12683 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12684
12685 return Res;
12686}
12687
12689 SelectionDAG &DAG) const {
12690 EVT VT = Node->getValueType(0);
12691 SDLoc dl(Node);
12692 bool isSigned = Node->getOpcode() == ISD::SREM;
12693 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12694 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12695 SDValue Dividend = Node->getOperand(0);
12696 SDValue Divisor = Node->getOperand(1);
12697 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12698 SDVTList VTs = DAG.getVTList(VT, VT);
12699 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12700 return true;
12701 }
12702 if (isOperationLegalOrCustom(DivOpc, VT)) {
12703 // X % Y -> X-X/Y*Y
12704 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12705 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12706 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12707 return true;
12708 }
12709 return false;
12710}
12711
12713 SelectionDAG &DAG) const {
12714 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12715 SDLoc dl(SDValue(Node, 0));
12716 SDValue Src = Node->getOperand(0);
12717
12718 // DstVT is the result type, while SatVT is the size to which we saturate
12719 EVT SrcVT = Src.getValueType();
12720 EVT DstVT = Node->getValueType(0);
12721
12722 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12723 unsigned SatWidth = SatVT.getScalarSizeInBits();
12724 unsigned DstWidth = DstVT.getScalarSizeInBits();
12725 assert(SatWidth <= DstWidth &&
12726 "Expected saturation width smaller than result width");
12727
12728 // Determine minimum and maximum integer values and their corresponding
12729 // floating-point values.
12730 APInt MinInt, MaxInt;
12731 if (IsSigned) {
12732 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12733 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12734 } else {
12735 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12736 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12737 }
12738
12739 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12740 // libcall emission cannot handle this. Large result types will fail.
12741 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12742 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12743 SrcVT = Src.getValueType();
12744 }
12745
12746 const fltSemantics &Sem = SrcVT.getFltSemantics();
12747 APFloat MinFloat(Sem);
12748 APFloat MaxFloat(Sem);
12749
12750 APFloat::opStatus MinStatus =
12751 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12752 APFloat::opStatus MaxStatus =
12753 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12754 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12755 !(MaxStatus & APFloat::opStatus::opInexact);
12756
12757 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12758 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12759
12760 // If the integer bounds are exactly representable as floats and min/max are
12761 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12762 // of comparisons and selects.
12763 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12764 bool MayPropagateNaN) {
12765 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12766 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12767 if (!MinMaxLegal)
12768 return SDValue();
12769
12770 SDValue Clamped = Src;
12771
12772 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12773 // then the result is MinFloat.
12774 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12775 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12776 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12777 // Convert clamped value to integer.
12778 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12779 dl, DstVT, Clamped);
12780
12781 // If !MayPropagateNan and the conversion is unsigned case we're done,
12782 // because we mapped NaN to MinFloat, which will cast to zero.
12783 if (!MayPropagateNaN && !IsSigned)
12784 return FpToInt;
12785
12786 // Otherwise, select 0 if Src is NaN.
12787 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12788 EVT SetCCVT =
12789 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12790 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12791 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12792 };
12793 if (AreExactFloatBounds) {
12794 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12795 /*MayPropagateNaN=*/false))
12796 return Res;
12797 // These may propagate NaN for sNaN operands.
12798 if (SDValue Res =
12799 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12800 return Res;
12801 // These always propagate NaN.
12802 if (SDValue Res =
12803 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12804 return Res;
12805 }
12806
12807 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12808 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12809
12810 // Result of direct conversion. The assumption here is that the operation is
12811 // non-trapping and it's fine to apply it to an out-of-range value if we
12812 // select it away later.
12813 SDValue FpToInt =
12814 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12815
12816 SDValue Select = FpToInt;
12817
12818 EVT SetCCVT =
12819 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12820
12821 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12822 // MinInt if Src is NaN.
12823 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12824 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12825 // If Src OGT MaxFloat, select MaxInt.
12826 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12827 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12828
12829 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12830 // is already zero.
12831 if (!IsSigned)
12832 return Select;
12833
12834 // Otherwise, select 0 if Src is NaN.
12835 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12836 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12837 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12838}
12839
12841 const SDLoc &dl,
12842 SelectionDAG &DAG) const {
12843 EVT OperandVT = Op.getValueType();
12844 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12845 return Op;
12846 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12847 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12848 // can induce double-rounding which may alter the results. We can
12849 // correct for this using a trick explained in: Boldo, Sylvie, and
12850 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12851 // World Congress. 2005.
12852 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12853 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12854
12855 // We can keep the narrow value as-is if narrowing was exact (no
12856 // rounding error), the wide value was NaN (the narrow value is also
12857 // NaN and should be preserved) or if we rounded to the odd value.
12858 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12859 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12860 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12861 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12862 EVT ResultIntVTCCVT = getSetCCResultType(
12863 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12864 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12865 // The result is already odd so we don't need to do anything.
12866 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12867
12868 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12869 Op.getValueType());
12870 // We keep results which are exact, odd or NaN.
12871 SDValue KeepNarrow =
12872 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12873 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12874 // We morally performed a round-down if AbsNarrow is smaller than
12875 // AbsWide.
12876 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12877 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12878 SDValue NarrowIsRd =
12879 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12880 // If the narrow value is odd or exact, pick it.
12881 // Otherwise, narrow is even and corresponds to either the rounded-up
12882 // or rounded-down value. If narrow is the rounded-down value, we want
12883 // the rounded-up value as it will be odd.
12884 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12885 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12886 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12887 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12888}
12889
12891 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12892 SDValue Op = Node->getOperand(0);
12893 EVT VT = Node->getValueType(0);
12894 SDLoc dl(Node);
12895 if (VT.getScalarType() == MVT::bf16) {
12896 if (Node->getConstantOperandVal(1) == 1) {
12897 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12898 }
12899 EVT OperandVT = Op.getValueType();
12900 SDValue IsNaN = DAG.getSetCC(
12901 dl,
12902 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12903 Op, Op, ISD::SETUO);
12904
12905 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12906 // can induce double-rounding which may alter the results. We can
12907 // correct for this using a trick explained in: Boldo, Sylvie, and
12908 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12909 // World Congress. 2005.
12910 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12911 EVT I32 = F32.changeTypeToInteger();
12912 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12913 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12914
12915 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12916 // turning into infinities.
12917 SDValue NaN =
12918 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12919
12920 // Factor in the contribution of the low 16 bits.
12921 SDValue One = DAG.getConstant(1, dl, I32);
12922 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12923 DAG.getShiftAmountConstant(16, I32, dl));
12924 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12925 SDValue RoundingBias =
12926 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
12927 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12928
12929 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12930 // 0x80000000.
12931 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12932
12933 // Now that we have rounded, shift the bits into position.
12934 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12935 DAG.getShiftAmountConstant(16, I32, dl));
12936 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12937 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12938 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12939 }
12940 return SDValue();
12941}
12942
12944 SelectionDAG &DAG) const {
12945 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12946 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12947 "Unexpected opcode!");
12948 assert((Node->getValueType(0).isScalableVector() ||
12949 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12950 "Fixed length vector types with constant offsets expected to use "
12951 "SHUFFLE_VECTOR!");
12952
12953 EVT VT = Node->getValueType(0);
12954 SDValue V1 = Node->getOperand(0);
12955 SDValue V2 = Node->getOperand(1);
12956 SDValue Offset = Node->getOperand(2);
12957 SDLoc DL(Node);
12958
12959 // Expand through memory thusly:
12960 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12961 // Store V1, Ptr
12962 // Store V2, Ptr + sizeof(V1)
12963 // if (VECTOR_SPLICE_LEFT)
12964 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12965 // else
12966 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12967 // Res = Load Ptr
12968
12969 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12970
12972 VT.getVectorElementCount() * 2);
12973 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12974 EVT PtrVT = StackPtr.getValueType();
12975 auto &MF = DAG.getMachineFunction();
12976 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12977 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12978
12979 // Store the lo part of CONCAT_VECTORS(V1, V2)
12980 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12981 // Store the hi part of CONCAT_VECTORS(V1, V2)
12982 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12983 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12984 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12985
12986 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12987 SDValue EltByteSize =
12988 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12989 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12990 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12991
12992 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12993
12994 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12995 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12996 else
12997 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12998
12999 // Load the spliced result
13000 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
13002}
13003
13005 SelectionDAG &DAG) const {
13006 SDLoc DL(Node);
13007 SDValue Vec = Node->getOperand(0);
13008 SDValue Mask = Node->getOperand(1);
13009 SDValue Passthru = Node->getOperand(2);
13010
13011 EVT VecVT = Vec.getValueType();
13012 EVT ScalarVT = VecVT.getScalarType();
13013 EVT MaskVT = Mask.getValueType();
13014 EVT MaskScalarVT = MaskVT.getScalarType();
13015
13016 // Needs to be handled by targets that have scalable vector types.
13017 if (VecVT.isScalableVector())
13018 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
13019
13020 SDValue StackPtr = DAG.CreateStackTemporary(
13021 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
13022 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13023 MachinePointerInfo PtrInfo =
13025
13026 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13027 SDValue Chain = DAG.getEntryNode();
13028 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13029
13030 bool HasPassthru = !Passthru.isUndef();
13031
13032 // If we have a passthru vector, store it on the stack, overwrite the matching
13033 // positions and then re-write the last element that was potentially
13034 // overwritten even though mask[i] = false.
13035 if (HasPassthru)
13036 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
13037
13038 SDValue LastWriteVal;
13039 APInt PassthruSplatVal;
13040 bool IsSplatPassthru =
13041 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13042
13043 if (IsSplatPassthru) {
13044 // As we do not know which position we wrote to last, we cannot simply
13045 // access that index from the passthru vector. So we first check if passthru
13046 // is a splat vector, to use any element ...
13047 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13048 } else if (HasPassthru) {
13049 // ... if it is not a splat vector, we need to get the passthru value at
13050 // position = popcount(mask) and re-load it from the stack before it is
13051 // overwritten in the loop below.
13052 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13053 SDValue Popcount = DAG.getNode(
13055 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13056 Popcount = DAG.getNode(
13058 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13059 Popcount);
13060 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13061 SDValue LastElmtPtr =
13062 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13063 LastWriteVal = DAG.getLoad(
13064 ScalarVT, DL, Chain, LastElmtPtr,
13066 Chain = LastWriteVal.getValue(1);
13067 }
13068
13069 unsigned NumElms = VecVT.getVectorNumElements();
13070 for (unsigned I = 0; I < NumElms; I++) {
13071 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13072 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13073 Chain = DAG.getStore(
13074 Chain, DL, ValI, OutPtr,
13076
13077 // Get the mask value and add it to the current output position. This
13078 // either increments by 1 if MaskI is true or adds 0 otherwise.
13079 // Freeze in case we have poison/undef mask entries.
13080 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13081 MaskI = DAG.getFreeze(MaskI);
13082 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13083 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13084 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13085
13086 if (HasPassthru && I == NumElms - 1) {
13087 SDValue EndOfVector =
13088 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13089 SDValue AllLanesSelected =
13090 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13091 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13092 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13093
13094 // Re-write the last ValI if all lanes were selected. Otherwise,
13095 // overwrite the last write it with the passthru value.
13096 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13097 LastWriteVal, SDNodeFlags::Unpredictable);
13098 Chain = DAG.getStore(
13099 Chain, DL, LastWriteVal, OutPtr,
13101 }
13102 }
13103
13104 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
13105}
13106
13108 SDLoc DL(Node);
13109 EVT VT = Node->getValueType(0);
13110
13111 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13112 auto [Mask, StepVec] =
13113 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13114 EVT StepVecVT = StepVec.getValueType();
13115 EVT StepVT = StepVecVT.getVectorElementType();
13116
13117 // Promote the scalar result type early to avoid redundant zexts.
13119 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13120
13121 SDValue VL =
13122 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13123 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13124 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13125 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13126 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13128 StepVecVT.getVectorElementType(), Select);
13129 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13130 DAG.getZExtOrTrunc(Max, DL, StepVT));
13131
13132 return DAG.getZExtOrTrunc(Sub, DL, VT);
13133}
13134
13136 SelectionDAG &DAG) const {
13137 SDLoc DL(N);
13138 SDValue Acc = N->getOperand(0);
13139 SDValue MulLHS = N->getOperand(1);
13140 SDValue MulRHS = N->getOperand(2);
13141 EVT AccVT = Acc.getValueType();
13142 EVT MulOpVT = MulLHS.getValueType();
13143
13144 EVT ExtMulOpVT =
13146 MulOpVT.getVectorElementCount());
13147
13148 unsigned ExtOpcLHS, ExtOpcRHS;
13149 switch (N->getOpcode()) {
13150 default:
13151 llvm_unreachable("Unexpected opcode");
13153 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13154 break;
13156 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13157 break;
13159 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13160 break;
13161 }
13162
13163 if (ExtMulOpVT != MulOpVT) {
13164 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13165 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13166 }
13167 SDValue Input = MulLHS;
13168 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13169 if (!llvm::isOneOrOneSplatFP(MulRHS))
13170 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13171 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13172 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13173 }
13174
13175 unsigned Stride = AccVT.getVectorMinNumElements();
13176 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13177
13178 // Collect all of the subvectors
13179 std::deque<SDValue> Subvectors = {Acc};
13180 for (unsigned I = 0; I < ScaleFactor; I++)
13181 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13182
13183 unsigned FlatNode =
13184 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13185
13186 // Flatten the subvector tree
13187 while (Subvectors.size() > 1) {
13188 Subvectors.push_back(
13189 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13190 Subvectors.pop_front();
13191 Subvectors.pop_front();
13192 }
13193
13194 assert(Subvectors.size() == 1 &&
13195 "There should only be one subvector after tree flattening");
13196
13197 return Subvectors[0];
13198}
13199
13200/// Given a store node \p StoreNode, return true if it is safe to fold that node
13201/// into \p FPNode, which expands to a library call with output pointers.
13203 SDNode *FPNode) {
13205 SmallVector<const SDNode *, 8> DeferredNodes;
13207
13208 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13209 for (SDValue Op : StoreNode->ops())
13210 if (Op.getNode() != FPNode)
13211 Worklist.push_back(Op.getNode());
13212
13214 while (!Worklist.empty()) {
13215 const SDNode *Node = Worklist.pop_back_val();
13216 auto [_, Inserted] = Visited.insert(Node);
13217 if (!Inserted)
13218 continue;
13219
13220 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13221 return false;
13222
13223 // Reached the FPNode (would result in a cycle).
13224 // OR Reached CALLSEQ_START (would result in nested call sequences).
13225 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13226 return false;
13227
13228 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13229 // Defer looking into call sequences (so we can check we're outside one).
13230 // We still need to look through these for the predecessor check.
13231 DeferredNodes.push_back(Node);
13232 continue;
13233 }
13234
13235 for (SDValue Op : Node->ops())
13236 Worklist.push_back(Op.getNode());
13237 }
13238
13239 // True if we're outside a call sequence and don't have the FPNode as a
13240 // predecessor. No cycles or nested call sequences possible.
13241 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13242 MaxSteps);
13243}
13244
13246 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13248 std::optional<unsigned> CallRetResNo) const {
13249 if (LC == RTLIB::UNKNOWN_LIBCALL)
13250 return false;
13251
13252 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13253 if (LibcallImpl == RTLIB::Unsupported)
13254 return false;
13255
13256 LLVMContext &Ctx = *DAG.getContext();
13257 EVT VT = Node->getValueType(0);
13258 unsigned NumResults = Node->getNumValues();
13259
13260 // Find users of the node that store the results (and share input chains). The
13261 // destination pointers can be used instead of creating stack allocations.
13262 SDValue StoresInChain;
13263 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13264 for (SDNode *User : Node->users()) {
13266 continue;
13267 auto *ST = cast<StoreSDNode>(User);
13268 SDValue StoreValue = ST->getValue();
13269 unsigned ResNo = StoreValue.getResNo();
13270 // Ensure the store corresponds to an output pointer.
13271 if (CallRetResNo == ResNo)
13272 continue;
13273 // Ensure the store to the default address space and not atomic or volatile.
13274 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13275 continue;
13276 // Ensure all store chains are the same (so they don't alias).
13277 if (StoresInChain && ST->getChain() != StoresInChain)
13278 continue;
13279 // Ensure the store is properly aligned.
13280 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13281 if (ST->getAlign() <
13282 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13283 continue;
13284 // Avoid:
13285 // 1. Creating cyclic dependencies.
13286 // 2. Expanding the node to a call within a call sequence.
13288 continue;
13289 ResultStores[ResNo] = ST;
13290 StoresInChain = ST->getChain();
13291 }
13292
13293 ArgListTy Args;
13294
13295 // Pass the arguments.
13296 for (const SDValue &Op : Node->op_values()) {
13297 EVT ArgVT = Op.getValueType();
13298 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13299 Args.emplace_back(Op, ArgTy);
13300 }
13301
13302 // Pass the output pointers.
13303 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13305 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13306 if (ResNo == CallRetResNo)
13307 continue;
13308 EVT ResVT = Node->getValueType(ResNo);
13309 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13310 ResultPtrs[ResNo] = ResultPtr;
13311 Args.emplace_back(ResultPtr, PointerTy);
13312 }
13313
13314 SDLoc DL(Node);
13315
13317 // Pass the vector mask (if required).
13318 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13319 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13320 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13321 }
13322
13323 Type *RetType = CallRetResNo.has_value()
13324 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13325 : Type::getVoidTy(Ctx);
13326 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13327 SDValue Callee =
13328 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13330 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13331 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13332
13333 auto [Call, CallChain] = LowerCallTo(CLI);
13334
13335 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13336 if (ResNo == CallRetResNo) {
13337 Results.push_back(Call);
13338 continue;
13339 }
13340 MachinePointerInfo PtrInfo;
13341 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13342 ResultPtr, PtrInfo);
13343 SDValue OutChain = LoadResult.getValue(1);
13344
13345 if (StoreSDNode *ST = ResultStores[ResNo]) {
13346 // Replace store with the library call.
13347 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13348 PtrInfo = ST->getPointerInfo();
13349 } else {
13351 DAG.getMachineFunction(),
13352 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13353 }
13354
13355 Results.push_back(LoadResult);
13356 }
13357
13358 return true;
13359}
13360
13362 SDValue &LHS, SDValue &RHS,
13363 SDValue &CC, SDValue Mask,
13364 SDValue EVL, bool &NeedInvert,
13365 const SDLoc &dl, SDValue &Chain,
13366 bool IsSignaling) const {
13367 MVT OpVT = LHS.getSimpleValueType();
13368 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13369 NeedInvert = false;
13370 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13371 bool IsNonVP = !EVL;
13372 switch (getCondCodeAction(CCCode, OpVT)) {
13373 default:
13374 llvm_unreachable("Unknown condition code action!");
13376 // Nothing to do.
13377 break;
13380 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13381 std::swap(LHS, RHS);
13382 CC = DAG.getCondCode(InvCC);
13383 return true;
13384 }
13385 // Swapping operands didn't work. Try inverting the condition.
13386 bool NeedSwap = false;
13387 InvCC = getSetCCInverse(CCCode, OpVT);
13388 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
13389 // If inverting the condition is not enough, try swapping operands
13390 // on top of it.
13391 InvCC = ISD::getSetCCSwappedOperands(InvCC);
13392 NeedSwap = true;
13393 }
13394 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13395 CC = DAG.getCondCode(InvCC);
13396 NeedInvert = true;
13397 if (NeedSwap)
13398 std::swap(LHS, RHS);
13399 return true;
13400 }
13401
13402 // Special case: expand i1 comparisons using logical operations.
13403 if (OpVT == MVT::i1) {
13404 SDValue Ret;
13405 switch (CCCode) {
13406 default:
13407 llvm_unreachable("Unknown integer setcc!");
13408 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
13409 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
13410 MVT::i1);
13411 break;
13412 case ISD::SETNE: // X != Y --> (X ^ Y)
13413 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
13414 break;
13415 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13416 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13417 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
13418 DAG.getNOT(dl, LHS, MVT::i1));
13419 break;
13420 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13421 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13422 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
13423 DAG.getNOT(dl, RHS, MVT::i1));
13424 break;
13425 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13426 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13427 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13428 DAG.getNOT(dl, LHS, MVT::i1));
13429 break;
13430 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13431 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13432 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13433 DAG.getNOT(dl, RHS, MVT::i1));
13434 break;
13435 }
13436
13437 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13438 RHS = SDValue();
13439 CC = SDValue();
13440 return true;
13441 }
13442
13444 unsigned Opc = 0;
13445 switch (CCCode) {
13446 default:
13447 llvm_unreachable("Don't know how to expand this condition!");
13448 case ISD::SETUO:
13449 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13450 CC1 = ISD::SETUNE;
13451 CC2 = ISD::SETUNE;
13452 Opc = ISD::OR;
13453 break;
13454 }
13456 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13457 NeedInvert = true;
13458 [[fallthrough]];
13459 case ISD::SETO:
13461 "If SETO is expanded, SETOEQ must be legal!");
13462 CC1 = ISD::SETOEQ;
13463 CC2 = ISD::SETOEQ;
13464 Opc = ISD::AND;
13465 break;
13466 case ISD::SETONE:
13467 case ISD::SETUEQ:
13468 // If the SETUO or SETO CC isn't legal, we might be able to use
13469 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13470 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13471 // the operands.
13472 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13473 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13474 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13475 CC1 = ISD::SETOGT;
13476 CC2 = ISD::SETOLT;
13477 Opc = ISD::OR;
13478 NeedInvert = ((unsigned)CCCode & 0x8U);
13479 break;
13480 }
13481 [[fallthrough]];
13482 case ISD::SETOEQ:
13483 case ISD::SETOGT:
13484 case ISD::SETOGE:
13485 case ISD::SETOLT:
13486 case ISD::SETOLE:
13487 case ISD::SETUNE:
13488 case ISD::SETUGT:
13489 case ISD::SETUGE:
13490 case ISD::SETULT:
13491 case ISD::SETULE:
13492 // If we are floating point, assign and break, otherwise fall through.
13493 if (!OpVT.isInteger()) {
13494 // We can use the 4th bit to tell if we are the unordered
13495 // or ordered version of the opcode.
13496 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13497 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13498 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13499 break;
13500 }
13501 // Fallthrough if we are unsigned integer.
13502 [[fallthrough]];
13503 case ISD::SETLE:
13504 case ISD::SETGT:
13505 case ISD::SETGE:
13506 case ISD::SETLT:
13507 case ISD::SETNE:
13508 case ISD::SETEQ:
13509 // If all combinations of inverting the condition and swapping operands
13510 // didn't work then we have no means to expand the condition.
13511 llvm_unreachable("Don't know how to expand this condition!");
13512 }
13513
13514 SDValue SetCC1, SetCC2;
13515 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13516 // If we aren't the ordered or unorder operation,
13517 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13518 if (IsNonVP) {
13519 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13520 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13521 } else {
13522 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13523 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13524 }
13525 } else {
13526 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13527 if (IsNonVP) {
13528 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13529 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13530 } else {
13531 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13532 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13533 }
13534 }
13535 if (Chain)
13536 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13537 SetCC2.getValue(1));
13538 if (IsNonVP)
13539 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13540 else {
13541 // Transform the binary opcode to the VP equivalent.
13542 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13543 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13544 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13545 }
13546 RHS = SDValue();
13547 CC = SDValue();
13548 return true;
13549 }
13550 }
13551 return false;
13552}
13553
13555 SelectionDAG &DAG) const {
13556 EVT VT = Node->getValueType(0);
13557 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13558 // split into two equal parts.
13559 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13560 return SDValue();
13561
13562 // Restrict expansion to cases where both parts can be concatenated.
13563 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13564 if (LoVT != HiVT || !isTypeLegal(LoVT))
13565 return SDValue();
13566
13567 SDLoc DL(Node);
13568 unsigned Opcode = Node->getOpcode();
13569
13570 // Don't expand if the result is likely to be unrolled anyway.
13571 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13572 return SDValue();
13573
13574 SmallVector<SDValue, 4> LoOps, HiOps;
13575 for (const SDValue &V : Node->op_values()) {
13576 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13577 LoOps.push_back(Lo);
13578 HiOps.push_back(Hi);
13579 }
13580
13581 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
13582 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
13583 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13584}
13585
13587 const SDLoc &DL,
13588 EVT InVecVT, SDValue EltNo,
13589 LoadSDNode *OriginalLoad,
13590 SelectionDAG &DAG) const {
13591 assert(OriginalLoad->isSimple());
13592
13593 EVT VecEltVT = InVecVT.getVectorElementType();
13594
13595 // If the vector element type is not a multiple of a byte then we are unable
13596 // to correctly compute an address to load only the extracted element as a
13597 // scalar.
13598 if (!VecEltVT.isByteSized())
13599 return SDValue();
13600
13601 ISD::LoadExtType ExtTy =
13602 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13603 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13604 return SDValue();
13605
13606 std::optional<unsigned> ByteOffset;
13607 Align Alignment = OriginalLoad->getAlign();
13609 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13610 int Elt = ConstEltNo->getZExtValue();
13611 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13612 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13613 Alignment = commonAlignment(Alignment, *ByteOffset);
13614 } else {
13615 // Discard the pointer info except the address space because the memory
13616 // operand can't represent this new access since the offset is variable.
13617 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13618 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13619 }
13620
13621 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13622 return SDValue();
13623
13624 unsigned IsFast = 0;
13625 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13626 OriginalLoad->getAddressSpace(), Alignment,
13627 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13628 !IsFast)
13629 return SDValue();
13630
13631 // The original DAG loaded the entire vector from memory, so arithmetic
13632 // within it must be inbounds.
13634 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13635
13636 // We are replacing a vector load with a scalar load. The new load must have
13637 // identical memory op ordering to the original.
13638 SDValue Load;
13639 if (ResultVT.bitsGT(VecEltVT)) {
13640 // If the result type of vextract is wider than the load, then issue an
13641 // extending load instead.
13642 ISD::LoadExtType ExtType =
13643 isLoadLegal(ResultVT, VecEltVT, Alignment,
13644 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13646 : ISD::EXTLOAD;
13647 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13648 NewPtr, MPI, VecEltVT, Alignment,
13649 OriginalLoad->getMemOperand()->getFlags(),
13650 OriginalLoad->getAAInfo());
13651 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13652 } else {
13653 // The result type is narrower or the same width as the vector element
13654 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13655 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13656 OriginalLoad->getAAInfo());
13657 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13658 if (ResultVT.bitsLT(VecEltVT))
13659 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13660 else
13661 Load = DAG.getBitcast(ResultVT, Load);
13662 }
13663
13664 return Load;
13665}
13666
13667// Set type id for call site info and metadata 'call_target'.
13668// We are filtering for:
13669// a) The call-graph-section use case that wants to know about indirect
13670// calls, or
13671// b) We want to annotate indirect calls.
13673 const CallBase *CB, MachineFunction &MF,
13674 MachineFunction::CallSiteInfo &CSInfo) const {
13675 if (CB && CB->isIndirectCall() &&
13678 CSInfo = MachineFunction::CallSiteInfo(*CB);
13679}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
void changeSign()
Definition APFloat.h:1356
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:868
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3061
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:952
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:452
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:553
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:557
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
fltNanEncoding nanEncoding
Definition APFloat.h:1015