LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
46
47// Define the virtual destructor out-of-line for build efficiency.
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
61 SDValue &Chain) const {
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(Attribute::ZExt) ||
83 CallerAttrs.contains(Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
111 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgIdx);
135 IndirectType = nullptr;
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
163 Args.reserve(Ops.size());
164
165 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
166 for (unsigned i = 0; i < Ops.size(); ++i) {
167 SDValue NewOp = Ops[i];
168 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
169 ? OpsTypeOverrides[i]
170 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
171 TargetLowering::ArgListEntry Entry(NewOp, Ty);
172 if (CallOptions.IsSoften)
173 Entry.OrigTy =
174 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
175
176 Entry.IsSExt =
177 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
178 Entry.IsZExt = !Entry.IsSExt;
179
180 if (CallOptions.IsSoften &&
182 Entry.IsSExt = Entry.IsZExt = false;
183 }
184 Args.push_back(Entry);
185 }
186
187 const char *LibcallName = getLibcallName(LC);
188 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
189 reportFatalInternalError("unsupported library call operation");
190
191 SDValue Callee =
192 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
193
194 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
195 Type *OrigRetTy = RetTy;
197 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
198 bool zeroExtend = !signExtend;
199
200 if (CallOptions.IsSoften) {
201 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
203 signExtend = zeroExtend = false;
204 }
205
206 CLI.setDebugLoc(dl)
207 .setChain(InChain)
208 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
209 std::move(Args))
210 .setNoReturn(CallOptions.DoesNotReturn)
213 .setSExtResult(signExtend)
214 .setZExtResult(zeroExtend);
215 return LowerCallTo(CLI);
216}
217
219 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
220 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
221 const AttributeList &FuncAttributes) const {
222 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
223 Op.getSrcAlign() < Op.getDstAlign())
224 return false;
225
226 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
227
228 if (VT == MVT::Other) {
229 // Use the largest integer type whose alignment constraints are satisfied.
230 // We only need to check DstAlign here as SrcAlign is always greater or
231 // equal to DstAlign (or zero).
232 VT = MVT::LAST_INTEGER_VALUETYPE;
233 if (Op.isFixedDstAlign())
234 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
235 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
237 assert(VT.isInteger());
238
239 // Find the largest legal integer type.
240 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
241 while (!isTypeLegal(LVT))
242 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
243 assert(LVT.isInteger());
244
245 // If the type we've chosen is larger than the largest legal integer type
246 // then use that instead.
247 if (VT.bitsGT(LVT))
248 VT = LVT;
249 }
250
251 unsigned NumMemOps = 0;
252 uint64_t Size = Op.size();
253 while (Size) {
254 unsigned VTSize = VT.getSizeInBits() / 8;
255 while (VTSize > Size) {
256 // For now, only use non-vector load / store's for the left-over pieces.
257 EVT NewVT = VT;
258 unsigned NewVTSize;
259
260 bool Found = false;
261 if (VT.isVector() || VT.isFloatingPoint()) {
262 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
263 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
265 Found = true;
266 else if (NewVT == MVT::i64 &&
267 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
268 isSafeMemOpType(MVT::f64)) {
269 // i64 is usually not legal on 32-bit targets, but f64 may be.
270 NewVT = MVT::f64;
271 Found = true;
272 }
273 }
274
275 if (!Found) {
276 do {
277 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
278 if (NewVT == MVT::i8)
279 break;
280 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
281 }
282 NewVTSize = NewVT.getSizeInBits() / 8;
283
284 // If the new VT cannot cover all of the remaining bits, then consider
285 // issuing a (or a pair of) unaligned and overlapping load / store.
286 unsigned Fast;
287 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
289 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
291 Fast)
292 VTSize = Size;
293 else {
294 VT = NewVT;
295 VTSize = NewVTSize;
296 }
297 }
298
299 if (++NumMemOps > Limit)
300 return false;
301
302 MemOps.push_back(VT);
303 Size -= VTSize;
304 }
305
306 return true;
307}
308
309/// Soften the operands of a comparison. This code is shared among BR_CC,
310/// SELECT_CC, and SETCC handlers.
312 SDValue &NewLHS, SDValue &NewRHS,
313 ISD::CondCode &CCCode,
314 const SDLoc &dl, const SDValue OldLHS,
315 const SDValue OldRHS) const {
316 SDValue Chain;
317 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
318 OldRHS, Chain);
319}
320
322 SDValue &NewLHS, SDValue &NewRHS,
323 ISD::CondCode &CCCode,
324 const SDLoc &dl, const SDValue OldLHS,
325 const SDValue OldRHS,
326 SDValue &Chain,
327 bool IsSignaling) const {
328 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
329 // not supporting it. We can update this code when libgcc provides such
330 // functions.
331
332 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
333 && "Unsupported setcc type!");
334
335 // Expand into one or more soft-fp libcall(s).
336 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
337 bool ShouldInvertCC = false;
338 switch (CCCode) {
339 case ISD::SETEQ:
340 case ISD::SETOEQ:
341 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
342 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
343 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
344 break;
345 case ISD::SETNE:
346 case ISD::SETUNE:
347 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
348 (VT == MVT::f64) ? RTLIB::UNE_F64 :
349 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
350 break;
351 case ISD::SETGE:
352 case ISD::SETOGE:
353 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
354 (VT == MVT::f64) ? RTLIB::OGE_F64 :
355 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
356 break;
357 case ISD::SETLT:
358 case ISD::SETOLT:
359 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
360 (VT == MVT::f64) ? RTLIB::OLT_F64 :
361 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
362 break;
363 case ISD::SETLE:
364 case ISD::SETOLE:
365 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
366 (VT == MVT::f64) ? RTLIB::OLE_F64 :
367 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
368 break;
369 case ISD::SETGT:
370 case ISD::SETOGT:
371 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372 (VT == MVT::f64) ? RTLIB::OGT_F64 :
373 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374 break;
375 case ISD::SETO:
376 ShouldInvertCC = true;
377 [[fallthrough]];
378 case ISD::SETUO:
379 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
380 (VT == MVT::f64) ? RTLIB::UO_F64 :
381 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
382 break;
383 case ISD::SETONE:
384 // SETONE = O && UNE
385 ShouldInvertCC = true;
386 [[fallthrough]];
387 case ISD::SETUEQ:
388 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
389 (VT == MVT::f64) ? RTLIB::UO_F64 :
390 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
391 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
392 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
393 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
394 break;
395 default:
396 // Invert CC for unordered comparisons
397 ShouldInvertCC = true;
398 switch (CCCode) {
399 case ISD::SETULT:
400 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
401 (VT == MVT::f64) ? RTLIB::OGE_F64 :
402 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
403 break;
404 case ISD::SETULE:
405 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
406 (VT == MVT::f64) ? RTLIB::OGT_F64 :
407 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
408 break;
409 case ISD::SETUGT:
410 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
411 (VT == MVT::f64) ? RTLIB::OLE_F64 :
412 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
413 break;
414 case ISD::SETUGE:
415 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
416 (VT == MVT::f64) ? RTLIB::OLT_F64 :
417 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
418 break;
419 default: llvm_unreachable("Do not know how to soften this setcc!");
420 }
421 }
422
423 // Use the target specific return value for comparison lib calls.
425 SDValue Ops[2] = {NewLHS, NewRHS};
427 EVT OpsVT[2] = { OldLHS.getValueType(),
428 OldRHS.getValueType() };
429 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
430 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
431 NewLHS = Call.first;
432 NewRHS = DAG.getConstant(0, dl, RetVT);
433
434 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
435 if (LC1Impl == RTLIB::Unsupported) {
437 "no libcall available to soften floating-point compare");
438 }
439
440 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
441 if (ShouldInvertCC) {
442 assert(RetVT.isInteger());
443 CCCode = getSetCCInverse(CCCode, RetVT);
444 }
445
446 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
447 // Update Chain.
448 Chain = Call.second;
449 } else {
450 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
451 if (LC2Impl == RTLIB::Unsupported) {
453 "no libcall available to soften floating-point compare");
454 }
455
456 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
457 "unordered call should be simple boolean");
458
459 EVT SetCCVT =
460 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
462 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
463 DAG.getValueType(MVT::i1));
464 }
465
466 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
467 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
468 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
469 if (ShouldInvertCC)
470 CCCode = getSetCCInverse(CCCode, RetVT);
471 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
472 if (Chain)
473 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
474 Call2.second);
475 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
476 Tmp.getValueType(), Tmp, NewLHS);
477 NewRHS = SDValue();
478 }
479}
480
481/// Return the entry encoding for a jump table in the current function. The
482/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
484 // In non-pic modes, just use the address of a block.
487
488 // Otherwise, use a label difference.
490}
491
493 SelectionDAG &DAG) const {
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable, the same as
498/// getPICJumpTableRelocBase, but as an MCExpr.
499const MCExpr *
501 unsigned JTI,MCContext &Ctx) const{
502 // The normal PIC reloc base is the label at the start of the jump table.
503 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
504}
505
507 SDValue Addr, int JTI,
508 SelectionDAG &DAG) const {
509 SDValue Chain = Value;
510 // Jump table debug info is only needed if CodeView is enabled.
512 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
513 }
514 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
515}
516
517bool
519 const TargetMachine &TM = getTargetMachine();
520 const GlobalValue *GV = GA->getGlobal();
521
522 // If the address is not even local to this DSO we will have to load it from
523 // a got and then add the offset.
524 if (!TM.shouldAssumeDSOLocal(GV))
525 return false;
526
527 // If the code is position independent we will have to add a base register.
529 return false;
530
531 // Otherwise we can do it.
532 return true;
533}
534
535//===----------------------------------------------------------------------===//
536// Optimization Methods
537//===----------------------------------------------------------------------===//
538
539/// If the specified instruction has a constant integer operand and there are
540/// bits set in that constant that are not demanded, then clear those bits and
541/// return true.
543 const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const {
546 SDLoc DL(Op);
547 unsigned Opcode = Op.getOpcode();
548
549 // Early-out if we've ended up calling an undemanded node, leave this to
550 // constant folding.
551 if (DemandedBits.isZero() || DemandedElts.isZero())
552 return false;
553
554 // Do target-specific constant optimization.
555 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
556 return TLO.New.getNode();
557
558 // FIXME: ISD::SELECT, ISD::SELECT_CC
559 switch (Opcode) {
560 default:
561 break;
562 case ISD::XOR:
563 case ISD::AND:
564 case ISD::OR: {
565 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
566 if (!Op1C || Op1C->isOpaque())
567 return false;
568
569 // If this is a 'not' op, don't touch it because that's a canonical form.
570 const APInt &C = Op1C->getAPIntValue();
571 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
572 return false;
573
574 if (!C.isSubsetOf(DemandedBits)) {
575 EVT VT = Op.getValueType();
576 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
577 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
578 Op->getFlags());
579 return TLO.CombineTo(Op, NewOp);
580 }
581
582 break;
583 }
584 }
585
586 return false;
587}
588
590 const APInt &DemandedBits,
591 TargetLoweringOpt &TLO) const {
592 EVT VT = Op.getValueType();
593 APInt DemandedElts = VT.isVector()
595 : APInt(1, 1);
596 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
597}
598
599/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
600/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
601/// but it could be generalized for targets with other types of implicit
602/// widening casts.
604 const APInt &DemandedBits,
605 TargetLoweringOpt &TLO) const {
606 assert(Op.getNumOperands() == 2 &&
607 "ShrinkDemandedOp only supports binary operators!");
608 assert(Op.getNode()->getNumValues() == 1 &&
609 "ShrinkDemandedOp only supports nodes with one result!");
610
611 EVT VT = Op.getValueType();
612 SelectionDAG &DAG = TLO.DAG;
613 SDLoc dl(Op);
614
615 // Early return, as this function cannot handle vector types.
616 if (VT.isVector())
617 return false;
618
619 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
620 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
621 "ShrinkDemandedOp only supports operands that have the same size!");
622
623 // Don't do this if the node has another user, which may require the
624 // full value.
625 if (!Op.getNode()->hasOneUse())
626 return false;
627
628 // Search for the smallest integer type with free casts to and from
629 // Op's type. For expedience, just check power-of-2 integer types.
630 unsigned DemandedSize = DemandedBits.getActiveBits();
631 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
632 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
633 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
634 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
635 // We found a type with free casts.
636
637 // If the operation has the 'disjoint' flag, then the
638 // operands on the new node are also disjoint.
639 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
641 SDValue X = DAG.getNode(
642 Op.getOpcode(), dl, SmallVT,
643 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
644 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
645 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
646 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
647 return TLO.CombineTo(Op, Z);
648 }
649 }
650 return false;
651}
652
654 DAGCombinerInfo &DCI) const {
655 SelectionDAG &DAG = DCI.DAG;
656 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
657 !DCI.isBeforeLegalizeOps());
658 KnownBits Known;
659
660 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
661 if (Simplified) {
662 DCI.AddToWorklist(Op.getNode());
664 }
665 return Simplified;
666}
667
669 const APInt &DemandedElts,
670 DAGCombinerInfo &DCI) const {
671 SelectionDAG &DAG = DCI.DAG;
672 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
673 !DCI.isBeforeLegalizeOps());
674 KnownBits Known;
675
676 bool Simplified =
677 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
678 if (Simplified) {
679 DCI.AddToWorklist(Op.getNode());
681 }
682 return Simplified;
683}
684
686 KnownBits &Known,
688 unsigned Depth,
689 bool AssumeSingleUse) const {
690 EVT VT = Op.getValueType();
691
692 // Since the number of lanes in a scalable vector is unknown at compile time,
693 // we track one bit which is implicitly broadcast to all lanes. This means
694 // that all lanes in a scalable vector are considered demanded.
695 APInt DemandedElts = VT.isFixedLengthVector()
697 : APInt(1, 1);
698 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
699 AssumeSingleUse);
700}
701
702// TODO: Under what circumstances can we create nodes? Constant folding?
704 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
705 SelectionDAG &DAG, unsigned Depth) const {
706 EVT VT = Op.getValueType();
707
708 // Limit search depth.
710 return SDValue();
711
712 // Ignore UNDEFs.
713 if (Op.isUndef())
714 return SDValue();
715
716 // Not demanding any bits/elts from Op.
717 if (DemandedBits == 0 || DemandedElts == 0)
718 return DAG.getUNDEF(VT);
719
720 bool IsLE = DAG.getDataLayout().isLittleEndian();
721 unsigned NumElts = DemandedElts.getBitWidth();
722 unsigned BitWidth = DemandedBits.getBitWidth();
723 KnownBits LHSKnown, RHSKnown;
724 switch (Op.getOpcode()) {
725 case ISD::BITCAST: {
726 if (VT.isScalableVector())
727 return SDValue();
728
729 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
730 EVT SrcVT = Src.getValueType();
731 EVT DstVT = Op.getValueType();
732 if (SrcVT == DstVT)
733 return Src;
734
735 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
736 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
737 if (NumSrcEltBits == NumDstEltBits)
739 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741
742 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
743 unsigned Scale = NumDstEltBits / NumSrcEltBits;
744 unsigned NumSrcElts = SrcVT.getVectorNumElements();
745 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
746 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
747 for (unsigned i = 0; i != Scale; ++i) {
748 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
749 unsigned BitOffset = EltOffset * NumSrcEltBits;
750 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
751 if (!Sub.isZero()) {
752 DemandedSrcBits |= Sub;
753 for (unsigned j = 0; j != NumElts; ++j)
754 if (DemandedElts[j])
755 DemandedSrcElts.setBit((j * Scale) + i);
756 }
757 }
758
760 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
761 return DAG.getBitcast(DstVT, V);
762 }
763
764 // TODO - bigendian once we have test coverage.
765 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
766 unsigned Scale = NumSrcEltBits / NumDstEltBits;
767 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
768 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
769 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
770 for (unsigned i = 0; i != NumElts; ++i)
771 if (DemandedElts[i]) {
772 unsigned Offset = (i % Scale) * NumDstEltBits;
773 DemandedSrcBits.insertBits(DemandedBits, Offset);
774 DemandedSrcElts.setBit(i / Scale);
775 }
776
778 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
779 return DAG.getBitcast(DstVT, V);
780 }
781
782 break;
783 }
784 case ISD::AND: {
785 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787
788 // If all of the demanded bits are known 1 on one side, return the other.
789 // These bits cannot contribute to the result of the 'and' in this
790 // context.
791 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
792 return Op.getOperand(0);
793 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
794 return Op.getOperand(1);
795 break;
796 }
797 case ISD::OR: {
798 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
799 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
800
801 // If all of the demanded bits are known zero on one side, return the
802 // other. These bits cannot contribute to the result of the 'or' in this
803 // context.
804 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
805 return Op.getOperand(0);
806 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
807 return Op.getOperand(1);
808 break;
809 }
810 case ISD::XOR: {
811 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
812 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
813
814 // If all of the demanded bits are known zero on one side, return the
815 // other.
816 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
817 return Op.getOperand(0);
818 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
819 return Op.getOperand(1);
820 break;
821 }
822 case ISD::ADD: {
823 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
824 if (RHSKnown.isZero())
825 return Op.getOperand(0);
826
827 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
828 if (LHSKnown.isZero())
829 return Op.getOperand(1);
830 break;
831 }
832 case ISD::SHL: {
833 // If we are only demanding sign bits then we can use the shift source
834 // directly.
835 if (std::optional<unsigned> MaxSA =
836 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
837 SDValue Op0 = Op.getOperand(0);
838 unsigned ShAmt = *MaxSA;
839 unsigned NumSignBits =
840 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
841 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
842 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
843 return Op0;
844 }
845 break;
846 }
847 case ISD::SRL: {
848 // If we are only demanding sign bits then we can use the shift source
849 // directly.
850 if (std::optional<unsigned> MaxSA =
851 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
852 SDValue Op0 = Op.getOperand(0);
853 unsigned ShAmt = *MaxSA;
854 // Must already be signbits in DemandedBits bounds, and can't demand any
855 // shifted in zeroes.
856 if (DemandedBits.countl_zero() >= ShAmt) {
857 unsigned NumSignBits =
858 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
859 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
860 return Op0;
861 }
862 }
863 break;
864 }
865 case ISD::SETCC: {
866 SDValue Op0 = Op.getOperand(0);
867 SDValue Op1 = Op.getOperand(1);
868 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
869 // If (1) we only need the sign-bit, (2) the setcc operands are the same
870 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
871 // -1, we may be able to bypass the setcc.
872 if (DemandedBits.isSignMask() &&
876 // If we're testing X < 0, then this compare isn't needed - just use X!
877 // FIXME: We're limiting to integer types here, but this should also work
878 // if we don't care about FP signed-zero. The use of SETLT with FP means
879 // that we don't care about NaNs.
880 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
882 return Op0;
883 }
884 break;
885 }
887 // If none of the extended bits are demanded, eliminate the sextinreg.
888 SDValue Op0 = Op.getOperand(0);
889 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
890 unsigned ExBits = ExVT.getScalarSizeInBits();
891 if (DemandedBits.getActiveBits() <= ExBits &&
893 return Op0;
894 // If the input is already sign extended, just drop the extension.
895 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
896 if (NumSignBits >= (BitWidth - ExBits + 1))
897 return Op0;
898 break;
899 }
903 if (VT.isScalableVector())
904 return SDValue();
905
906 // If we only want the lowest element and none of extended bits, then we can
907 // return the bitcasted source vector.
908 SDValue Src = Op.getOperand(0);
909 EVT SrcVT = Src.getValueType();
910 EVT DstVT = Op.getValueType();
911 if (IsLE && DemandedElts == 1 &&
912 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
913 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
914 return DAG.getBitcast(DstVT, Src);
915 }
916 break;
917 }
919 if (VT.isScalableVector())
920 return SDValue();
921
922 // If we don't demand the inserted element, return the base vector.
923 SDValue Vec = Op.getOperand(0);
924 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
925 EVT VecVT = Vec.getValueType();
926 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
927 !DemandedElts[CIdx->getZExtValue()])
928 return Vec;
929 break;
930 }
932 if (VT.isScalableVector())
933 return SDValue();
934
935 SDValue Vec = Op.getOperand(0);
936 SDValue Sub = Op.getOperand(1);
937 uint64_t Idx = Op.getConstantOperandVal(2);
938 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
939 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
940 // If we don't demand the inserted subvector, return the base vector.
941 if (DemandedSubElts == 0)
942 return Vec;
943 break;
944 }
945 case ISD::VECTOR_SHUFFLE: {
947 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
948
949 // If all the demanded elts are from one operand and are inline,
950 // then we can use the operand directly.
951 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
952 for (unsigned i = 0; i != NumElts; ++i) {
953 int M = ShuffleMask[i];
954 if (M < 0 || !DemandedElts[i])
955 continue;
956 AllUndef = false;
957 IdentityLHS &= (M == (int)i);
958 IdentityRHS &= ((M - NumElts) == i);
959 }
960
961 if (AllUndef)
962 return DAG.getUNDEF(Op.getValueType());
963 if (IdentityLHS)
964 return Op.getOperand(0);
965 if (IdentityRHS)
966 return Op.getOperand(1);
967 break;
968 }
969 default:
970 // TODO: Probably okay to remove after audit; here to reduce change size
971 // in initial enablement patch for scalable vectors
972 if (VT.isScalableVector())
973 return SDValue();
974
975 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
977 Op, DemandedBits, DemandedElts, DAG, Depth))
978 return V;
979 break;
980 }
981 return SDValue();
982}
983
986 unsigned Depth) const {
987 EVT VT = Op.getValueType();
988 // Since the number of lanes in a scalable vector is unknown at compile time,
989 // we track one bit which is implicitly broadcast to all lanes. This means
990 // that all lanes in a scalable vector are considered demanded.
991 APInt DemandedElts = VT.isFixedLengthVector()
993 : APInt(1, 1);
994 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
995 Depth);
996}
997
999 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1000 unsigned Depth) const {
1001 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1002 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1003 Depth);
1004}
1005
1006// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1007// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010 const TargetLowering &TLI,
1011 const APInt &DemandedBits,
1012 const APInt &DemandedElts, unsigned Depth) {
1013 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1014 "SRL or SRA node is required here!");
1015 // Is the right shift using an immediate value of 1?
1016 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1017 if (!N1C || !N1C->isOne())
1018 return SDValue();
1019
1020 // We are looking for an avgfloor
1021 // add(ext, ext)
1022 // or one of these as a avgceil
1023 // add(add(ext, ext), 1)
1024 // add(add(ext, 1), ext)
1025 // add(ext, add(ext, 1))
1026 SDValue Add = Op.getOperand(0);
1027 if (Add.getOpcode() != ISD::ADD)
1028 return SDValue();
1029
1030 SDValue ExtOpA = Add.getOperand(0);
1031 SDValue ExtOpB = Add.getOperand(1);
1032 SDValue Add2;
1033 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1034 ConstantSDNode *ConstOp;
1035 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1036 ConstOp->isOne()) {
1037 ExtOpA = Op1;
1038 ExtOpB = Op3;
1039 Add2 = A;
1040 return true;
1041 }
1042 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op2;
1046 Add2 = A;
1047 return true;
1048 }
1049 return false;
1050 };
1051 bool IsCeil =
1052 (ExtOpA.getOpcode() == ISD::ADD &&
1053 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1054 (ExtOpB.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1056
1057 // If the shift is signed (sra):
1058 // - Needs >= 2 sign bit for both operands.
1059 // - Needs >= 2 zero bits.
1060 // If the shift is unsigned (srl):
1061 // - Needs >= 1 zero bit for both operands.
1062 // - Needs 1 demanded bit zero and >= 2 sign bits.
1063 SelectionDAG &DAG = TLO.DAG;
1064 unsigned ShiftOpc = Op.getOpcode();
1065 bool IsSigned = false;
1066 unsigned KnownBits;
1067 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1068 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1069 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1070 unsigned NumZeroA =
1071 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1072 unsigned NumZeroB =
1073 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1075
1076 switch (ShiftOpc) {
1077 default:
1078 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1079 case ISD::SRA: {
1080 if (NumZero >= 2 && NumSigned < NumZero) {
1081 IsSigned = false;
1082 KnownBits = NumZero;
1083 break;
1084 }
1085 if (NumSigned >= 1) {
1086 IsSigned = true;
1087 KnownBits = NumSigned;
1088 break;
1089 }
1090 return SDValue();
1091 }
1092 case ISD::SRL: {
1093 if (NumZero >= 1 && NumSigned < NumZero) {
1094 IsSigned = false;
1095 KnownBits = NumZero;
1096 break;
1097 }
1098 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1099 IsSigned = true;
1100 KnownBits = NumSigned;
1101 break;
1102 }
1103 return SDValue();
1104 }
1105 }
1106
1107 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1108 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1109
1110 // Find the smallest power-2 type that is legal for this vector size and
1111 // operation, given the original type size and the number of known sign/zero
1112 // bits.
1113 EVT VT = Op.getValueType();
1114 unsigned MinWidth =
1115 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1116 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1118 return SDValue();
1119 if (VT.isVector())
1120 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1121 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1122 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1123 // larger type size to do the transform.
1124 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1125 return SDValue();
1126 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1127 Add.getOperand(1)) &&
1128 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1129 Add2.getOperand(1))))
1130 NVT = VT;
1131 else
1132 return SDValue();
1133 }
1134
1135 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1136 // this is likely to stop other folds (reassociation, value tracking etc.)
1137 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1138 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1139 return SDValue();
1140
1141 SDLoc DL(Op);
1142 SDValue ResultAVG =
1143 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1144 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1145 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1146}
1147
1148/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1149/// result of Op are ever used downstream. If we can use this information to
1150/// simplify Op, create a new simplified DAG node and return true, returning the
1151/// original and new nodes in Old and New. Otherwise, analyze the expression and
1152/// return a mask of Known bits for the expression (used to simplify the
1153/// caller). The Known bits may only be accurate for those bits in the
1154/// OriginalDemandedBits and OriginalDemandedElts.
1156 SDValue Op, const APInt &OriginalDemandedBits,
1157 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1158 unsigned Depth, bool AssumeSingleUse) const {
1159 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1160 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1161 "Mask size mismatches value type size!");
1162
1163 // Don't know anything.
1164 Known = KnownBits(BitWidth);
1165
1166 EVT VT = Op.getValueType();
1167 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1168 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1169 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1170 "Unexpected vector size");
1171
1172 APInt DemandedBits = OriginalDemandedBits;
1173 APInt DemandedElts = OriginalDemandedElts;
1174 SDLoc dl(Op);
1175
1176 // Undef operand.
1177 if (Op.isUndef())
1178 return false;
1179
1180 // We can't simplify target constants.
1181 if (Op.getOpcode() == ISD::TargetConstant)
1182 return false;
1183
1184 if (Op.getOpcode() == ISD::Constant) {
1185 // We know all of the bits for a constant!
1186 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1187 return false;
1188 }
1189
1190 if (Op.getOpcode() == ISD::ConstantFP) {
1191 // We know all of the bits for a floating point constant!
1193 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1194 return false;
1195 }
1196
1197 // Other users may use these bits.
1198 bool HasMultiUse = false;
1199 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1201 // Limit search depth.
1202 return false;
1203 }
1204 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1206 DemandedElts = APInt::getAllOnes(NumElts);
1207 HasMultiUse = true;
1208 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1209 // Not demanding any bits/elts from Op.
1210 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1211 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1212 // Limit search depth.
1213 return false;
1214 }
1215
1216 KnownBits Known2;
1217 switch (Op.getOpcode()) {
1218 case ISD::SCALAR_TO_VECTOR: {
1219 if (VT.isScalableVector())
1220 return false;
1221 if (!DemandedElts[0])
1222 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1223
1224 KnownBits SrcKnown;
1225 SDValue Src = Op.getOperand(0);
1226 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1227 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1228 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1229 return true;
1230
1231 // Upper elements are undef, so only get the knownbits if we just demand
1232 // the bottom element.
1233 if (DemandedElts == 1)
1234 Known = SrcKnown.anyextOrTrunc(BitWidth);
1235 break;
1236 }
1237 case ISD::BUILD_VECTOR:
1238 // Collect the known bits that are shared by every demanded element.
1239 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1240 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1241 return false; // Don't fall through, will infinitely loop.
1242 case ISD::SPLAT_VECTOR: {
1243 SDValue Scl = Op.getOperand(0);
1244 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1245 KnownBits KnownScl;
1246 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1247 return true;
1248
1249 // Implicitly truncate the bits to match the official semantics of
1250 // SPLAT_VECTOR.
1251 Known = KnownScl.trunc(BitWidth);
1252 break;
1253 }
1254 case ISD::LOAD: {
1255 auto *LD = cast<LoadSDNode>(Op);
1256 if (getTargetConstantFromLoad(LD)) {
1257 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1258 return false; // Don't fall through, will infinitely loop.
1259 }
1260 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1261 // If this is a ZEXTLoad and we are looking at the loaded value.
1262 EVT MemVT = LD->getMemoryVT();
1263 unsigned MemBits = MemVT.getScalarSizeInBits();
1264 Known.Zero.setBitsFrom(MemBits);
1265 return false; // Don't fall through, will infinitely loop.
1266 }
1267 break;
1268 }
1270 if (VT.isScalableVector())
1271 return false;
1272 SDValue Vec = Op.getOperand(0);
1273 SDValue Scl = Op.getOperand(1);
1274 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1275 EVT VecVT = Vec.getValueType();
1276
1277 // If index isn't constant, assume we need all vector elements AND the
1278 // inserted element.
1279 APInt DemandedVecElts(DemandedElts);
1280 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1281 unsigned Idx = CIdx->getZExtValue();
1282 DemandedVecElts.clearBit(Idx);
1283
1284 // Inserted element is not required.
1285 if (!DemandedElts[Idx])
1286 return TLO.CombineTo(Op, Vec);
1287 }
1288
1289 KnownBits KnownScl;
1290 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1291 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1292 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1293 return true;
1294
1295 Known = KnownScl.anyextOrTrunc(BitWidth);
1296
1297 KnownBits KnownVec;
1298 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1299 Depth + 1))
1300 return true;
1301
1302 if (!!DemandedVecElts)
1303 Known = Known.intersectWith(KnownVec);
1304
1305 return false;
1306 }
1307 case ISD::INSERT_SUBVECTOR: {
1308 if (VT.isScalableVector())
1309 return false;
1310 // Demand any elements from the subvector and the remainder from the src its
1311 // inserted into.
1312 SDValue Src = Op.getOperand(0);
1313 SDValue Sub = Op.getOperand(1);
1314 uint64_t Idx = Op.getConstantOperandVal(2);
1315 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1316 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1317 APInt DemandedSrcElts = DemandedElts;
1318 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1319
1320 KnownBits KnownSub, KnownSrc;
1321 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1322 Depth + 1))
1323 return true;
1324 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1325 Depth + 1))
1326 return true;
1327
1328 Known.Zero.setAllBits();
1329 Known.One.setAllBits();
1330 if (!!DemandedSubElts)
1331 Known = Known.intersectWith(KnownSub);
1332 if (!!DemandedSrcElts)
1333 Known = Known.intersectWith(KnownSrc);
1334
1335 // Attempt to avoid multi-use src if we don't need anything from it.
1336 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1337 !DemandedSrcElts.isAllOnes()) {
1339 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1341 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1342 if (NewSub || NewSrc) {
1343 NewSub = NewSub ? NewSub : Sub;
1344 NewSrc = NewSrc ? NewSrc : Src;
1345 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1346 Op.getOperand(2));
1347 return TLO.CombineTo(Op, NewOp);
1348 }
1349 }
1350 break;
1351 }
1353 if (VT.isScalableVector())
1354 return false;
1355 // Offset the demanded elts by the subvector index.
1356 SDValue Src = Op.getOperand(0);
1357 if (Src.getValueType().isScalableVector())
1358 break;
1359 uint64_t Idx = Op.getConstantOperandVal(1);
1360 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1361 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1362
1363 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1364 Depth + 1))
1365 return true;
1366
1367 // Attempt to avoid multi-use src if we don't need anything from it.
1368 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1370 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1371 if (DemandedSrc) {
1372 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1373 Op.getOperand(1));
1374 return TLO.CombineTo(Op, NewOp);
1375 }
1376 }
1377 break;
1378 }
1379 case ISD::CONCAT_VECTORS: {
1380 if (VT.isScalableVector())
1381 return false;
1382 Known.Zero.setAllBits();
1383 Known.One.setAllBits();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.Zero.setAllBits();
1414 Known.One.setAllBits();
1415 if (!!DemandedLHS) {
1416 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1417 Depth + 1))
1418 return true;
1419 Known = Known.intersectWith(Known2);
1420 }
1421 if (!!DemandedRHS) {
1422 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1423 Depth + 1))
1424 return true;
1425 Known = Known.intersectWith(Known2);
1426 }
1427
1428 // Attempt to avoid multi-use ops if we don't need anything from them.
1430 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1432 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1433 if (DemandedOp0 || DemandedOp1) {
1434 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1435 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1436 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1437 return TLO.CombineTo(Op, NewOp);
1438 }
1439 }
1440 break;
1441 }
1442 case ISD::AND: {
1443 SDValue Op0 = Op.getOperand(0);
1444 SDValue Op1 = Op.getOperand(1);
1445
1446 // If the RHS is a constant, check to see if the LHS would be zero without
1447 // using the bits from the RHS. Below, we use knowledge about the RHS to
1448 // simplify the LHS, here we're using information from the LHS to simplify
1449 // the RHS.
1450 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1451 // Do not increment Depth here; that can cause an infinite loop.
1452 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1453 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1454 if ((LHSKnown.Zero & DemandedBits) ==
1455 (~RHSC->getAPIntValue() & DemandedBits))
1456 return TLO.CombineTo(Op, Op0);
1457
1458 // If any of the set bits in the RHS are known zero on the LHS, shrink
1459 // the constant.
1460 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1461 DemandedElts, TLO))
1462 return true;
1463
1464 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1465 // constant, but if this 'and' is only clearing bits that were just set by
1466 // the xor, then this 'and' can be eliminated by shrinking the mask of
1467 // the xor. For example, for a 32-bit X:
1468 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1469 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1470 LHSKnown.One == ~RHSC->getAPIntValue()) {
1471 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1472 return TLO.CombineTo(Op, Xor);
1473 }
1474 }
1475
1476 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1477 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1478 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1479 (Op0.getOperand(0).isUndef() ||
1481 Op0->hasOneUse()) {
1482 unsigned NumSubElts =
1484 unsigned SubIdx = Op0.getConstantOperandVal(2);
1485 APInt DemandedSub =
1486 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1487 KnownBits KnownSubMask =
1488 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1489 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1490 SDValue NewAnd =
1491 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1492 SDValue NewInsert =
1493 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1494 Op0.getOperand(1), Op0.getOperand(2));
1495 return TLO.CombineTo(Op, NewInsert);
1496 }
1497 }
1498
1499 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1500 Depth + 1))
1501 return true;
1502 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1503 Known2, TLO, Depth + 1))
1504 return true;
1505
1506 // If all of the demanded bits are known one on one side, return the other.
1507 // These bits cannot contribute to the result of the 'and'.
1508 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1509 return TLO.CombineTo(Op, Op0);
1510 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1511 return TLO.CombineTo(Op, Op1);
1512 // If all of the demanded bits in the inputs are known zeros, return zero.
1513 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1514 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1515 // If the RHS is a constant, see if we can simplify it.
1516 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1517 TLO))
1518 return true;
1519 // If the operation can be done in a smaller type, do so.
1521 return true;
1522
1523 // Attempt to avoid multi-use ops if we don't need anything from them.
1524 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1526 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1528 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1529 if (DemandedOp0 || DemandedOp1) {
1530 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1531 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1532 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1533 return TLO.CombineTo(Op, NewOp);
1534 }
1535 }
1536
1537 Known &= Known2;
1538 break;
1539 }
1540 case ISD::OR: {
1541 SDValue Op0 = Op.getOperand(0);
1542 SDValue Op1 = Op.getOperand(1);
1543 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1544 Depth + 1)) {
1545 Op->dropFlags(SDNodeFlags::Disjoint);
1546 return true;
1547 }
1548
1549 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1550 Known2, TLO, Depth + 1)) {
1551 Op->dropFlags(SDNodeFlags::Disjoint);
1552 return true;
1553 }
1554
1555 // If all of the demanded bits are known zero on one side, return the other.
1556 // These bits cannot contribute to the result of the 'or'.
1557 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1558 return TLO.CombineTo(Op, Op0);
1559 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1560 return TLO.CombineTo(Op, Op1);
1561 // If the RHS is a constant, see if we can simplify it.
1562 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1563 return true;
1564 // If the operation can be done in a smaller type, do so.
1566 return true;
1567
1568 // Attempt to avoid multi-use ops if we don't need anything from them.
1569 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1571 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1573 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1574 if (DemandedOp0 || DemandedOp1) {
1575 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1576 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1577 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1578 return TLO.CombineTo(Op, NewOp);
1579 }
1580 }
1581
1582 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1583 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1584 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1585 Op0->hasOneUse() && Op1->hasOneUse()) {
1586 // Attempt to match all commutations - m_c_Or would've been useful!
1587 for (int I = 0; I != 2; ++I) {
1588 SDValue X = Op.getOperand(I).getOperand(0);
1589 SDValue C1 = Op.getOperand(I).getOperand(1);
1590 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1591 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1592 if (Alt.getOpcode() == ISD::OR) {
1593 for (int J = 0; J != 2; ++J) {
1594 if (X == Alt.getOperand(J)) {
1595 SDValue Y = Alt.getOperand(1 - J);
1596 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1597 {C1, C2})) {
1598 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1599 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1600 return TLO.CombineTo(
1601 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1602 }
1603 }
1604 }
1605 }
1606 }
1607 }
1608
1609 Known |= Known2;
1610 break;
1611 }
1612 case ISD::XOR: {
1613 SDValue Op0 = Op.getOperand(0);
1614 SDValue Op1 = Op.getOperand(1);
1615
1616 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1617 Depth + 1))
1618 return true;
1619 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1620 Depth + 1))
1621 return true;
1622
1623 // If all of the demanded bits are known zero on one side, return the other.
1624 // These bits cannot contribute to the result of the 'xor'.
1625 if (DemandedBits.isSubsetOf(Known.Zero))
1626 return TLO.CombineTo(Op, Op0);
1627 if (DemandedBits.isSubsetOf(Known2.Zero))
1628 return TLO.CombineTo(Op, Op1);
1629 // If the operation can be done in a smaller type, do so.
1631 return true;
1632
1633 // If all of the unknown bits are known to be zero on one side or the other
1634 // turn this into an *inclusive* or.
1635 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1636 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1637 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1638
1639 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1640 if (C) {
1641 // If one side is a constant, and all of the set bits in the constant are
1642 // also known set on the other side, turn this into an AND, as we know
1643 // the bits will be cleared.
1644 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1645 // NB: it is okay if more bits are known than are requested
1646 if (C->getAPIntValue() == Known2.One) {
1647 SDValue ANDC =
1648 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1649 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1650 }
1651
1652 // If the RHS is a constant, see if we can change it. Don't alter a -1
1653 // constant because that's a 'not' op, and that is better for combining
1654 // and codegen.
1655 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1656 // We're flipping all demanded bits. Flip the undemanded bits too.
1657 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1658 return TLO.CombineTo(Op, New);
1659 }
1660
1661 unsigned Op0Opcode = Op0.getOpcode();
1662 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1663 if (ConstantSDNode *ShiftC =
1664 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1665 // Don't crash on an oversized shift. We can not guarantee that a
1666 // bogus shift has been simplified to undef.
1667 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1668 uint64_t ShiftAmt = ShiftC->getZExtValue();
1670 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1671 : Ones.lshr(ShiftAmt);
1672 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1674 // If the xor constant is a demanded mask, do a 'not' before the
1675 // shift:
1676 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1677 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1678 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1679 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1680 Op0.getOperand(1)));
1681 }
1682 }
1683 }
1684 }
1685 }
1686
1687 // If we can't turn this into a 'not', try to shrink the constant.
1688 if (!C || !C->isAllOnes())
1689 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1690 return true;
1691
1692 // Attempt to avoid multi-use ops if we don't need anything from them.
1693 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1695 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1697 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1698 if (DemandedOp0 || DemandedOp1) {
1699 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1700 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1701 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1702 return TLO.CombineTo(Op, NewOp);
1703 }
1704 }
1705
1706 Known ^= Known2;
1707 break;
1708 }
1709 case ISD::SELECT:
1710 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1711 Known, TLO, Depth + 1))
1712 return true;
1713 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1714 Known2, TLO, Depth + 1))
1715 return true;
1716
1717 // If the operands are constants, see if we can simplify them.
1718 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1719 return true;
1720
1721 // Only known if known in both the LHS and RHS.
1722 Known = Known.intersectWith(Known2);
1723 break;
1724 case ISD::VSELECT:
1725 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1726 Known, TLO, Depth + 1))
1727 return true;
1728 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1729 Known2, TLO, Depth + 1))
1730 return true;
1731
1732 // Only known if known in both the LHS and RHS.
1733 Known = Known.intersectWith(Known2);
1734 break;
1735 case ISD::SELECT_CC:
1736 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1737 Known, TLO, Depth + 1))
1738 return true;
1739 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1740 Known2, TLO, Depth + 1))
1741 return true;
1742
1743 // If the operands are constants, see if we can simplify them.
1744 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1745 return true;
1746
1747 // Only known if known in both the LHS and RHS.
1748 Known = Known.intersectWith(Known2);
1749 break;
1750 case ISD::SETCC: {
1751 SDValue Op0 = Op.getOperand(0);
1752 SDValue Op1 = Op.getOperand(1);
1753 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1754 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1755 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1756 // -1, we may be able to bypass the setcc.
1757 if (DemandedBits.isSignMask() &&
1761 // If we're testing X < 0, then this compare isn't needed - just use X!
1762 // FIXME: We're limiting to integer types here, but this should also work
1763 // if we don't care about FP signed-zero. The use of SETLT with FP means
1764 // that we don't care about NaNs.
1765 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1767 return TLO.CombineTo(Op, Op0);
1768
1769 // TODO: Should we check for other forms of sign-bit comparisons?
1770 // Examples: X <= -1, X >= 0
1771 }
1772 if (getBooleanContents(Op0.getValueType()) ==
1774 BitWidth > 1)
1775 Known.Zero.setBitsFrom(1);
1776 break;
1777 }
1778 case ISD::SHL: {
1779 SDValue Op0 = Op.getOperand(0);
1780 SDValue Op1 = Op.getOperand(1);
1781 EVT ShiftVT = Op1.getValueType();
1782
1783 if (std::optional<unsigned> KnownSA =
1784 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1785 unsigned ShAmt = *KnownSA;
1786 if (ShAmt == 0)
1787 return TLO.CombineTo(Op, Op0);
1788
1789 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1790 // single shift. We can do this if the bottom bits (which are shifted
1791 // out) are never demanded.
1792 // TODO - support non-uniform vector amounts.
1793 if (Op0.getOpcode() == ISD::SRL) {
1794 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1795 if (std::optional<unsigned> InnerSA =
1796 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1797 unsigned C1 = *InnerSA;
1798 unsigned Opc = ISD::SHL;
1799 int Diff = ShAmt - C1;
1800 if (Diff < 0) {
1801 Diff = -Diff;
1802 Opc = ISD::SRL;
1803 }
1804 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1805 return TLO.CombineTo(
1806 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1807 }
1808 }
1809 }
1810
1811 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1812 // are not demanded. This will likely allow the anyext to be folded away.
1813 // TODO - support non-uniform vector amounts.
1814 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1815 SDValue InnerOp = Op0.getOperand(0);
1816 EVT InnerVT = InnerOp.getValueType();
1817 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1818 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1819 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1820 SDValue NarrowShl = TLO.DAG.getNode(
1821 ISD::SHL, dl, InnerVT, InnerOp,
1822 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1823 return TLO.CombineTo(
1824 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1825 }
1826
1827 // Repeat the SHL optimization above in cases where an extension
1828 // intervenes: (shl (anyext (shr x, c1)), c2) to
1829 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1830 // aren't demanded (as above) and that the shifted upper c1 bits of
1831 // x aren't demanded.
1832 // TODO - support non-uniform vector amounts.
1833 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1834 InnerOp.hasOneUse()) {
1835 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1836 InnerOp, DemandedElts, Depth + 2)) {
1837 unsigned InnerShAmt = *SA2;
1838 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1839 DemandedBits.getActiveBits() <=
1840 (InnerBits - InnerShAmt + ShAmt) &&
1841 DemandedBits.countr_zero() >= ShAmt) {
1842 SDValue NewSA =
1843 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1844 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1845 InnerOp.getOperand(0));
1846 return TLO.CombineTo(
1847 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1848 }
1849 }
1850 }
1851 }
1852
1853 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1854 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1855 Depth + 1)) {
1856 // Disable the nsw and nuw flags. We can no longer guarantee that we
1857 // won't wrap after simplification.
1858 Op->dropFlags(SDNodeFlags::NoWrap);
1859 return true;
1860 }
1861 Known <<= ShAmt;
1862 // low bits known zero.
1863 Known.Zero.setLowBits(ShAmt);
1864
1865 // Attempt to avoid multi-use ops if we don't need anything from them.
1866 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1868 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1869 if (DemandedOp0) {
1870 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1871 return TLO.CombineTo(Op, NewOp);
1872 }
1873 }
1874
1875 // TODO: Can we merge this fold with the one below?
1876 // Try shrinking the operation as long as the shift amount will still be
1877 // in range.
1878 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1879 Op.getNode()->hasOneUse()) {
1880 // Search for the smallest integer type with free casts to and from
1881 // Op's type. For expedience, just check power-of-2 integer types.
1882 unsigned DemandedSize = DemandedBits.getActiveBits();
1883 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1884 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1885 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1886 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1887 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1888 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1889 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1890 assert(DemandedSize <= SmallVTBits &&
1891 "Narrowed below demanded bits?");
1892 // We found a type with free casts.
1893 SDValue NarrowShl = TLO.DAG.getNode(
1894 ISD::SHL, dl, SmallVT,
1895 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1896 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1897 return TLO.CombineTo(
1898 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1899 }
1900 }
1901 }
1902
1903 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1904 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1905 // Only do this if we demand the upper half so the knownbits are correct.
1906 unsigned HalfWidth = BitWidth / 2;
1907 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1908 DemandedBits.countLeadingOnes() >= HalfWidth) {
1909 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1910 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1911 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1912 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1913 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1914 // If we're demanding the upper bits at all, we must ensure
1915 // that the upper bits of the shift result are known to be zero,
1916 // which is equivalent to the narrow shift being NUW.
1917 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1918 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1919 SDNodeFlags Flags;
1920 Flags.setNoSignedWrap(IsNSW);
1921 Flags.setNoUnsignedWrap(IsNUW);
1922 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1923 SDValue NewShiftAmt =
1924 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1925 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1926 NewShiftAmt, Flags);
1927 SDValue NewExt =
1928 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1929 return TLO.CombineTo(Op, NewExt);
1930 }
1931 }
1932 }
1933 } else {
1934 // This is a variable shift, so we can't shift the demand mask by a known
1935 // amount. But if we are not demanding high bits, then we are not
1936 // demanding those bits from the pre-shifted operand either.
1937 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1938 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1939 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1940 Depth + 1)) {
1941 // Disable the nsw and nuw flags. We can no longer guarantee that we
1942 // won't wrap after simplification.
1943 Op->dropFlags(SDNodeFlags::NoWrap);
1944 return true;
1945 }
1946 Known.resetAll();
1947 }
1948 }
1949
1950 // If we are only demanding sign bits then we can use the shift source
1951 // directly.
1952 if (std::optional<unsigned> MaxSA =
1953 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1954 unsigned ShAmt = *MaxSA;
1955 unsigned NumSignBits =
1956 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1957 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1958 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1959 return TLO.CombineTo(Op, Op0);
1960 }
1961 break;
1962 }
1963 case ISD::SRL: {
1964 SDValue Op0 = Op.getOperand(0);
1965 SDValue Op1 = Op.getOperand(1);
1966 EVT ShiftVT = Op1.getValueType();
1967
1968 if (std::optional<unsigned> KnownSA =
1969 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1970 unsigned ShAmt = *KnownSA;
1971 if (ShAmt == 0)
1972 return TLO.CombineTo(Op, Op0);
1973
1974 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1975 // single shift. We can do this if the top bits (which are shifted out)
1976 // are never demanded.
1977 // TODO - support non-uniform vector amounts.
1978 if (Op0.getOpcode() == ISD::SHL) {
1979 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980 if (std::optional<unsigned> InnerSA =
1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1982 unsigned C1 = *InnerSA;
1983 unsigned Opc = ISD::SRL;
1984 int Diff = ShAmt - C1;
1985 if (Diff < 0) {
1986 Diff = -Diff;
1987 Opc = ISD::SHL;
1988 }
1989 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1990 return TLO.CombineTo(
1991 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1992 }
1993 }
1994 }
1995
1996 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1997 // single sra. We can do this if the top bits are never demanded.
1998 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1999 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2000 if (std::optional<unsigned> InnerSA =
2001 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2002 unsigned C1 = *InnerSA;
2003 // Clamp the combined shift amount if it exceeds the bit width.
2004 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2005 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2006 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2007 Op0.getOperand(0), NewSA));
2008 }
2009 }
2010 }
2011
2012 APInt InDemandedMask = (DemandedBits << ShAmt);
2013
2014 // If the shift is exact, then it does demand the low bits (and knows that
2015 // they are zero).
2016 if (Op->getFlags().hasExact())
2017 InDemandedMask.setLowBits(ShAmt);
2018
2019 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2020 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2021 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2023 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2024 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2025 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2026 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2027 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2028 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2029 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2030 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2031 SDValue NewShiftAmt =
2032 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2033 SDValue NewShift =
2034 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2035 return TLO.CombineTo(
2036 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2037 }
2038 }
2039
2040 // Compute the new bits that are at the top now.
2041 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2042 Depth + 1))
2043 return true;
2044 Known >>= ShAmt;
2045 // High bits known zero.
2046 Known.Zero.setHighBits(ShAmt);
2047
2048 // Attempt to avoid multi-use ops if we don't need anything from them.
2049 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2051 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2052 if (DemandedOp0) {
2053 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2054 return TLO.CombineTo(Op, NewOp);
2055 }
2056 }
2057 } else {
2058 // Use generic knownbits computation as it has support for non-uniform
2059 // shift amounts.
2060 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2061 }
2062
2063 // If we are only demanding sign bits then we can use the shift source
2064 // directly.
2065 if (std::optional<unsigned> MaxSA =
2066 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2067 unsigned ShAmt = *MaxSA;
2068 // Must already be signbits in DemandedBits bounds, and can't demand any
2069 // shifted in zeroes.
2070 if (DemandedBits.countl_zero() >= ShAmt) {
2071 unsigned NumSignBits =
2072 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2073 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2074 return TLO.CombineTo(Op, Op0);
2075 }
2076 }
2077
2078 // Try to match AVG patterns (after shift simplification).
2079 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2080 DemandedElts, Depth + 1))
2081 return TLO.CombineTo(Op, AVG);
2082
2083 break;
2084 }
2085 case ISD::SRA: {
2086 SDValue Op0 = Op.getOperand(0);
2087 SDValue Op1 = Op.getOperand(1);
2088 EVT ShiftVT = Op1.getValueType();
2089
2090 // If we only want bits that already match the signbit then we don't need
2091 // to shift.
2092 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2093 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2094 NumHiDemandedBits)
2095 return TLO.CombineTo(Op, Op0);
2096
2097 // If this is an arithmetic shift right and only the low-bit is set, we can
2098 // always convert this into a logical shr, even if the shift amount is
2099 // variable. The low bit of the shift cannot be an input sign bit unless
2100 // the shift amount is >= the size of the datatype, which is undefined.
2101 if (DemandedBits.isOne())
2102 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2103
2104 if (std::optional<unsigned> KnownSA =
2105 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2106 unsigned ShAmt = *KnownSA;
2107 if (ShAmt == 0)
2108 return TLO.CombineTo(Op, Op0);
2109
2110 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2111 // supports sext_inreg.
2112 if (Op0.getOpcode() == ISD::SHL) {
2113 if (std::optional<unsigned> InnerSA =
2114 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2115 unsigned LowBits = BitWidth - ShAmt;
2116 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2117 if (VT.isVector())
2118 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2120
2121 if (*InnerSA == ShAmt) {
2122 if (!TLO.LegalOperations() ||
2124 return TLO.CombineTo(
2125 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2126 Op0.getOperand(0),
2127 TLO.DAG.getValueType(ExtVT)));
2128
2129 // Even if we can't convert to sext_inreg, we might be able to
2130 // remove this shift pair if the input is already sign extended.
2131 unsigned NumSignBits =
2132 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2133 if (NumSignBits > ShAmt)
2134 return TLO.CombineTo(Op, Op0.getOperand(0));
2135 }
2136 }
2137 }
2138
2139 APInt InDemandedMask = (DemandedBits << ShAmt);
2140
2141 // If the shift is exact, then it does demand the low bits (and knows that
2142 // they are zero).
2143 if (Op->getFlags().hasExact())
2144 InDemandedMask.setLowBits(ShAmt);
2145
2146 // If any of the demanded bits are produced by the sign extension, we also
2147 // demand the input sign bit.
2148 if (DemandedBits.countl_zero() < ShAmt)
2149 InDemandedMask.setSignBit();
2150
2151 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2152 Depth + 1))
2153 return true;
2154 Known >>= ShAmt;
2155
2156 // If the input sign bit is known to be zero, or if none of the top bits
2157 // are demanded, turn this into an unsigned shift right.
2158 if (Known.Zero[BitWidth - ShAmt - 1] ||
2159 DemandedBits.countl_zero() >= ShAmt) {
2160 SDNodeFlags Flags;
2161 Flags.setExact(Op->getFlags().hasExact());
2162 return TLO.CombineTo(
2163 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2164 }
2165
2166 int Log2 = DemandedBits.exactLogBase2();
2167 if (Log2 >= 0) {
2168 // The bit must come from the sign.
2169 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2170 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2171 }
2172
2173 if (Known.One[BitWidth - ShAmt - 1])
2174 // New bits are known one.
2175 Known.One.setHighBits(ShAmt);
2176
2177 // Attempt to avoid multi-use ops if we don't need anything from them.
2178 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2180 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2181 if (DemandedOp0) {
2182 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2183 return TLO.CombineTo(Op, NewOp);
2184 }
2185 }
2186 }
2187
2188 // Try to match AVG patterns (after shift simplification).
2189 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2190 DemandedElts, Depth + 1))
2191 return TLO.CombineTo(Op, AVG);
2192
2193 break;
2194 }
2195 case ISD::FSHL:
2196 case ISD::FSHR: {
2197 SDValue Op0 = Op.getOperand(0);
2198 SDValue Op1 = Op.getOperand(1);
2199 SDValue Op2 = Op.getOperand(2);
2200 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2201
2202 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2203 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2204
2205 // For fshl, 0-shift returns the 1st arg.
2206 // For fshr, 0-shift returns the 2nd arg.
2207 if (Amt == 0) {
2208 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2209 Known, TLO, Depth + 1))
2210 return true;
2211 break;
2212 }
2213
2214 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2215 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2216 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2217 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2218 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2219 Depth + 1))
2220 return true;
2221 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2222 Depth + 1))
2223 return true;
2224
2225 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2226 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2227 Known = Known.unionWith(Known2);
2228
2229 // Attempt to avoid multi-use ops if we don't need anything from them.
2230 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2231 !DemandedElts.isAllOnes()) {
2233 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2235 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2236 if (DemandedOp0 || DemandedOp1) {
2237 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2238 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2239 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2240 DemandedOp1, Op2);
2241 return TLO.CombineTo(Op, NewOp);
2242 }
2243 }
2244 }
2245
2246 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2247 if (isPowerOf2_32(BitWidth)) {
2248 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2249 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2250 Known2, TLO, Depth + 1))
2251 return true;
2252 }
2253 break;
2254 }
2255 case ISD::ROTL:
2256 case ISD::ROTR: {
2257 SDValue Op0 = Op.getOperand(0);
2258 SDValue Op1 = Op.getOperand(1);
2259 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2260
2261 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2262 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2263 return TLO.CombineTo(Op, Op0);
2264
2265 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2266 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2267 unsigned RevAmt = BitWidth - Amt;
2268
2269 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2270 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2271 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2272 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2273 Depth + 1))
2274 return true;
2275
2276 // rot*(x, 0) --> x
2277 if (Amt == 0)
2278 return TLO.CombineTo(Op, Op0);
2279
2280 // See if we don't demand either half of the rotated bits.
2281 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2282 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2283 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2284 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2285 }
2286 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2287 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2288 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2289 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2290 }
2291 }
2292
2293 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2294 if (isPowerOf2_32(BitWidth)) {
2295 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2296 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2297 Depth + 1))
2298 return true;
2299 }
2300 break;
2301 }
2302 case ISD::SMIN:
2303 case ISD::SMAX:
2304 case ISD::UMIN:
2305 case ISD::UMAX: {
2306 unsigned Opc = Op.getOpcode();
2307 SDValue Op0 = Op.getOperand(0);
2308 SDValue Op1 = Op.getOperand(1);
2309
2310 // If we're only demanding signbits, then we can simplify to OR/AND node.
2311 unsigned BitOp =
2312 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2313 unsigned NumSignBits =
2314 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2315 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2316 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2317 if (NumSignBits >= NumDemandedUpperBits)
2318 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2319
2320 // Check if one arg is always less/greater than (or equal) to the other arg.
2321 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2322 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2323 switch (Opc) {
2324 case ISD::SMIN:
2325 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2326 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2327 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2328 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2329 Known = KnownBits::smin(Known0, Known1);
2330 break;
2331 case ISD::SMAX:
2332 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2333 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2334 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2335 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2336 Known = KnownBits::smax(Known0, Known1);
2337 break;
2338 case ISD::UMIN:
2339 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2340 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2341 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2342 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2343 Known = KnownBits::umin(Known0, Known1);
2344 break;
2345 case ISD::UMAX:
2346 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2347 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2348 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2349 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2350 Known = KnownBits::umax(Known0, Known1);
2351 break;
2352 }
2353 break;
2354 }
2355 case ISD::BITREVERSE: {
2356 SDValue Src = Op.getOperand(0);
2357 APInt DemandedSrcBits = DemandedBits.reverseBits();
2358 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2359 Depth + 1))
2360 return true;
2361 Known = Known2.reverseBits();
2362 break;
2363 }
2364 case ISD::BSWAP: {
2365 SDValue Src = Op.getOperand(0);
2366
2367 // If the only bits demanded come from one byte of the bswap result,
2368 // just shift the input byte into position to eliminate the bswap.
2369 unsigned NLZ = DemandedBits.countl_zero();
2370 unsigned NTZ = DemandedBits.countr_zero();
2371
2372 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2373 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2374 // have 14 leading zeros, round to 8.
2375 NLZ = alignDown(NLZ, 8);
2376 NTZ = alignDown(NTZ, 8);
2377 // If we need exactly one byte, we can do this transformation.
2378 if (BitWidth - NLZ - NTZ == 8) {
2379 // Replace this with either a left or right shift to get the byte into
2380 // the right place.
2381 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2382 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2383 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2384 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2385 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2386 return TLO.CombineTo(Op, NewOp);
2387 }
2388 }
2389
2390 APInt DemandedSrcBits = DemandedBits.byteSwap();
2391 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2392 Depth + 1))
2393 return true;
2394 Known = Known2.byteSwap();
2395 break;
2396 }
2397 case ISD::CTPOP: {
2398 // If only 1 bit is demanded, replace with PARITY as long as we're before
2399 // op legalization.
2400 // FIXME: Limit to scalars for now.
2401 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2402 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2403 Op.getOperand(0)));
2404
2405 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2406 break;
2407 }
2409 SDValue Op0 = Op.getOperand(0);
2410 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2411 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2412
2413 // If we only care about the highest bit, don't bother shifting right.
2414 if (DemandedBits.isSignMask()) {
2415 unsigned MinSignedBits =
2416 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2417 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2418 // However if the input is already sign extended we expect the sign
2419 // extension to be dropped altogether later and do not simplify.
2420 if (!AlreadySignExtended) {
2421 // Compute the correct shift amount type, which must be getShiftAmountTy
2422 // for scalar types after legalization.
2423 SDValue ShiftAmt =
2424 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2425 return TLO.CombineTo(Op,
2426 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2427 }
2428 }
2429
2430 // If none of the extended bits are demanded, eliminate the sextinreg.
2431 if (DemandedBits.getActiveBits() <= ExVTBits)
2432 return TLO.CombineTo(Op, Op0);
2433
2434 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2435
2436 // Since the sign extended bits are demanded, we know that the sign
2437 // bit is demanded.
2438 InputDemandedBits.setBit(ExVTBits - 1);
2439
2440 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2441 Depth + 1))
2442 return true;
2443
2444 // If the sign bit of the input is known set or clear, then we know the
2445 // top bits of the result.
2446
2447 // If the input sign bit is known zero, convert this into a zero extension.
2448 if (Known.Zero[ExVTBits - 1])
2449 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2450
2451 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2452 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2453 Known.One.setBitsFrom(ExVTBits);
2454 Known.Zero &= Mask;
2455 } else { // Input sign bit unknown
2456 Known.Zero &= Mask;
2457 Known.One &= Mask;
2458 }
2459 break;
2460 }
2461 case ISD::BUILD_PAIR: {
2462 EVT HalfVT = Op.getOperand(0).getValueType();
2463 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2464
2465 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2466 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2467
2468 KnownBits KnownLo, KnownHi;
2469
2470 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2471 return true;
2472
2473 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2474 return true;
2475
2476 Known = KnownHi.concat(KnownLo);
2477 break;
2478 }
2480 if (VT.isScalableVector())
2481 return false;
2482 [[fallthrough]];
2483 case ISD::ZERO_EXTEND: {
2484 SDValue Src = Op.getOperand(0);
2485 EVT SrcVT = Src.getValueType();
2486 unsigned InBits = SrcVT.getScalarSizeInBits();
2487 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2488 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2489
2490 // If none of the top bits are demanded, convert this into an any_extend.
2491 if (DemandedBits.getActiveBits() <= InBits) {
2492 // If we only need the non-extended bits of the bottom element
2493 // then we can just bitcast to the result.
2494 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2495 VT.getSizeInBits() == SrcVT.getSizeInBits())
2496 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2497
2498 unsigned Opc =
2500 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2501 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2502 }
2503
2504 APInt InDemandedBits = DemandedBits.trunc(InBits);
2505 APInt InDemandedElts = DemandedElts.zext(InElts);
2506 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2507 Depth + 1)) {
2508 Op->dropFlags(SDNodeFlags::NonNeg);
2509 return true;
2510 }
2511 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2512 Known = Known.zext(BitWidth);
2513
2514 // Attempt to avoid multi-use ops if we don't need anything from them.
2516 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2517 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::SIGN_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2530
2531 APInt InDemandedElts = DemandedElts.zext(InElts);
2532 APInt InDemandedBits = DemandedBits.trunc(InBits);
2533
2534 // Since some of the sign extended bits are demanded, we know that the sign
2535 // bit is demanded.
2536 InDemandedBits.setBit(InBits - 1);
2537
2538 // If none of the top bits are demanded, convert this into an any_extend.
2539 if (DemandedBits.getActiveBits() <= InBits) {
2540 // If we only need the non-extended bits of the bottom element
2541 // then we can just bitcast to the result.
2542 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2543 VT.getSizeInBits() == SrcVT.getSizeInBits())
2544 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2545
2546 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2548 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2549 InBits) {
2550 unsigned Opc =
2552 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2553 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2554 }
2555 }
2556
2557 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2558 Depth + 1))
2559 return true;
2560 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2561
2562 // If the sign bit is known one, the top bits match.
2563 Known = Known.sext(BitWidth);
2564
2565 // If the sign bit is known zero, convert this to a zero extend.
2566 if (Known.isNonNegative()) {
2567 unsigned Opc =
2569 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2570 SDNodeFlags Flags;
2571 if (!IsVecInReg)
2572 Flags |= SDNodeFlags::NonNeg;
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2574 }
2575 }
2576
2577 // Attempt to avoid multi-use ops if we don't need anything from them.
2579 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2580 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2581 break;
2582 }
2584 if (VT.isScalableVector())
2585 return false;
2586 [[fallthrough]];
2587 case ISD::ANY_EXTEND: {
2588 SDValue Src = Op.getOperand(0);
2589 EVT SrcVT = Src.getValueType();
2590 unsigned InBits = SrcVT.getScalarSizeInBits();
2591 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2592 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2593
2594 // If we only need the bottom element then we can just bitcast.
2595 // TODO: Handle ANY_EXTEND?
2596 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2597 VT.getSizeInBits() == SrcVT.getSizeInBits())
2598 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2599
2600 APInt InDemandedBits = DemandedBits.trunc(InBits);
2601 APInt InDemandedElts = DemandedElts.zext(InElts);
2602 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2603 Depth + 1))
2604 return true;
2605 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2606 Known = Known.anyext(BitWidth);
2607
2608 // Attempt to avoid multi-use ops if we don't need anything from them.
2610 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2611 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2612 break;
2613 }
2614 case ISD::TRUNCATE: {
2615 SDValue Src = Op.getOperand(0);
2616
2617 // Simplify the input, using demanded bit information, and compute the known
2618 // zero/one bits live out.
2619 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2620 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2621 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2622 Depth + 1)) {
2623 // Disable the nsw and nuw flags. We can no longer guarantee that we
2624 // won't wrap after simplification.
2625 Op->dropFlags(SDNodeFlags::NoWrap);
2626 return true;
2627 }
2628 Known = Known.trunc(BitWidth);
2629
2630 // Attempt to avoid multi-use ops if we don't need anything from them.
2632 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2633 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2634
2635 // If the input is only used by this truncate, see if we can shrink it based
2636 // on the known demanded bits.
2637 switch (Src.getOpcode()) {
2638 default:
2639 break;
2640 case ISD::SRL:
2641 // Shrink SRL by a constant if none of the high bits shifted in are
2642 // demanded.
2643 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2644 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2645 // undesirable.
2646 break;
2647
2648 if (Src.getNode()->hasOneUse()) {
2649 if (isTruncateFree(Src, VT) &&
2650 !isTruncateFree(Src.getValueType(), VT)) {
2651 // If truncate is only free at trunc(srl), do not turn it into
2652 // srl(trunc). The check is done by first check the truncate is free
2653 // at Src's opcode(srl), then check the truncate is not done by
2654 // referencing sub-register. In test, if both trunc(srl) and
2655 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2656 // trunc(srl)'s trunc is free, trunc(srl) is better.
2657 break;
2658 }
2659
2660 std::optional<unsigned> ShAmtC =
2661 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2662 if (!ShAmtC || *ShAmtC >= BitWidth)
2663 break;
2664 unsigned ShVal = *ShAmtC;
2665
2666 APInt HighBits =
2667 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2668 HighBits.lshrInPlace(ShVal);
2669 HighBits = HighBits.trunc(BitWidth);
2670 if (!(HighBits & DemandedBits)) {
2671 // None of the shifted in bits are needed. Add a truncate of the
2672 // shift input, then shift it.
2673 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2674 SDValue NewTrunc =
2675 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2676 return TLO.CombineTo(
2677 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2678 }
2679 }
2680 break;
2681 }
2682
2683 break;
2684 }
2685 case ISD::AssertZext: {
2686 // AssertZext demands all of the high bits, plus any of the low bits
2687 // demanded by its users.
2688 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2690 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2691 TLO, Depth + 1))
2692 return true;
2693
2694 Known.Zero |= ~InMask;
2695 Known.One &= (~Known.Zero);
2696 break;
2697 }
2699 SDValue Src = Op.getOperand(0);
2700 SDValue Idx = Op.getOperand(1);
2701 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2702 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2703
2704 if (SrcEltCnt.isScalable())
2705 return false;
2706
2707 // Demand the bits from every vector element without a constant index.
2708 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2709 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2710 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2711 if (CIdx->getAPIntValue().ult(NumSrcElts))
2712 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2713
2714 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2715 // anything about the extended bits.
2716 APInt DemandedSrcBits = DemandedBits;
2717 if (BitWidth > EltBitWidth)
2718 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2719
2720 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2721 Depth + 1))
2722 return true;
2723
2724 // Attempt to avoid multi-use ops if we don't need anything from them.
2725 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2726 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2727 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2728 SDValue NewOp =
2729 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2730 return TLO.CombineTo(Op, NewOp);
2731 }
2732 }
2733
2734 Known = Known2;
2735 if (BitWidth > EltBitWidth)
2736 Known = Known.anyext(BitWidth);
2737 break;
2738 }
2739 case ISD::BITCAST: {
2740 if (VT.isScalableVector())
2741 return false;
2742 SDValue Src = Op.getOperand(0);
2743 EVT SrcVT = Src.getValueType();
2744 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2745
2746 // If this is an FP->Int bitcast and if the sign bit is the only
2747 // thing demanded, turn this into a FGETSIGN.
2748 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2749 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2750 SrcVT.isFloatingPoint()) {
2751 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2752 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2753 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2754 SrcVT != MVT::f128) {
2755 // Cannot eliminate/lower SHL for f128 yet.
2756 EVT Ty = OpVTLegal ? VT : MVT::i32;
2757 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2758 // place. We expect the SHL to be eliminated by other optimizations.
2759 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2760 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2761 if (!OpVTLegal && OpVTSizeInBits > 32)
2762 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2763 unsigned ShVal = Op.getValueSizeInBits() - 1;
2764 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2765 return TLO.CombineTo(Op,
2766 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2767 }
2768 }
2769
2770 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2771 // Demand the elt/bit if any of the original elts/bits are demanded.
2772 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2773 unsigned Scale = BitWidth / NumSrcEltBits;
2774 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2777 for (unsigned i = 0; i != Scale; ++i) {
2778 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2779 unsigned BitOffset = EltOffset * NumSrcEltBits;
2780 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2781 if (!Sub.isZero()) {
2782 DemandedSrcBits |= Sub;
2783 for (unsigned j = 0; j != NumElts; ++j)
2784 if (DemandedElts[j])
2785 DemandedSrcElts.setBit((j * Scale) + i);
2786 }
2787 }
2788
2789 APInt KnownSrcUndef, KnownSrcZero;
2790 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2791 KnownSrcZero, TLO, Depth + 1))
2792 return true;
2793
2794 KnownBits KnownSrcBits;
2795 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2796 KnownSrcBits, TLO, Depth + 1))
2797 return true;
2798 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2799 // TODO - bigendian once we have test coverage.
2800 unsigned Scale = NumSrcEltBits / BitWidth;
2801 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2802 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2803 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2804 for (unsigned i = 0; i != NumElts; ++i)
2805 if (DemandedElts[i]) {
2806 unsigned Offset = (i % Scale) * BitWidth;
2807 DemandedSrcBits.insertBits(DemandedBits, Offset);
2808 DemandedSrcElts.setBit(i / Scale);
2809 }
2810
2811 if (SrcVT.isVector()) {
2812 APInt KnownSrcUndef, KnownSrcZero;
2813 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2814 KnownSrcZero, TLO, Depth + 1))
2815 return true;
2816 }
2817
2818 KnownBits KnownSrcBits;
2819 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2820 KnownSrcBits, TLO, Depth + 1))
2821 return true;
2822
2823 // Attempt to avoid multi-use ops if we don't need anything from them.
2824 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2825 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2826 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2827 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2828 return TLO.CombineTo(Op, NewOp);
2829 }
2830 }
2831 }
2832
2833 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2834 // recursive call where Known may be useful to the caller.
2835 if (Depth > 0) {
2836 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2837 return false;
2838 }
2839 break;
2840 }
2841 case ISD::MUL:
2842 if (DemandedBits.isPowerOf2()) {
2843 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2844 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2845 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2846 unsigned CTZ = DemandedBits.countr_zero();
2847 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2848 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2849 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2850 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2851 return TLO.CombineTo(Op, Shl);
2852 }
2853 }
2854 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2855 // X * X is odd iff X is odd.
2856 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2857 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2858 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2859 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2860 return TLO.CombineTo(Op, And1);
2861 }
2862 [[fallthrough]];
2863 case ISD::ADD:
2864 case ISD::SUB: {
2865 // Add, Sub, and Mul don't demand any bits in positions beyond that
2866 // of the highest bit demanded of them.
2867 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2868 SDNodeFlags Flags = Op.getNode()->getFlags();
2869 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2870 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2871 KnownBits KnownOp0, KnownOp1;
2872 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2873 const KnownBits &KnownRHS) {
2874 if (Op.getOpcode() == ISD::MUL)
2875 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2876 return Demanded;
2877 };
2878 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2879 Depth + 1) ||
2880 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2881 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2882 // See if the operation should be performed at a smaller bit width.
2884 // Disable the nsw and nuw flags. We can no longer guarantee that we
2885 // won't wrap after simplification.
2886 Op->dropFlags(SDNodeFlags::NoWrap);
2887 return true;
2888 }
2889
2890 // neg x with only low bit demanded is simply x.
2891 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2892 isNullConstant(Op0))
2893 return TLO.CombineTo(Op, Op1);
2894
2895 // Attempt to avoid multi-use ops if we don't need anything from them.
2896 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2898 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2900 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2901 if (DemandedOp0 || DemandedOp1) {
2902 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2903 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2904 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2905 Flags & ~SDNodeFlags::NoWrap);
2906 return TLO.CombineTo(Op, NewOp);
2907 }
2908 }
2909
2910 // If we have a constant operand, we may be able to turn it into -1 if we
2911 // do not demand the high bits. This can make the constant smaller to
2912 // encode, allow more general folding, or match specialized instruction
2913 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2914 // is probably not useful (and could be detrimental).
2916 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2917 if (C && !C->isAllOnes() && !C->isOne() &&
2918 (C->getAPIntValue() | HighMask).isAllOnes()) {
2919 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2920 // Disable the nsw and nuw flags. We can no longer guarantee that we
2921 // won't wrap after simplification.
2922 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2923 Flags & ~SDNodeFlags::NoWrap);
2924 return TLO.CombineTo(Op, NewOp);
2925 }
2926
2927 // Match a multiply with a disguised negated-power-of-2 and convert to a
2928 // an equivalent shift-left amount.
2929 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2930 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2931 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2932 return 0;
2933
2934 // Don't touch opaque constants. Also, ignore zero and power-of-2
2935 // multiplies. Those will get folded later.
2936 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2937 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2938 !MulC->getAPIntValue().isPowerOf2()) {
2939 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2940 if (UnmaskedC.isNegatedPowerOf2())
2941 return (-UnmaskedC).logBase2();
2942 }
2943 return 0;
2944 };
2945
2946 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2947 unsigned ShlAmt) {
2948 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2949 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2950 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2951 return TLO.CombineTo(Op, Res);
2952 };
2953
2955 if (Op.getOpcode() == ISD::ADD) {
2956 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2957 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2958 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2959 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2960 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2961 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2962 }
2963 if (Op.getOpcode() == ISD::SUB) {
2964 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2965 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2966 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2967 }
2968 }
2969
2970 if (Op.getOpcode() == ISD::MUL) {
2971 Known = KnownBits::mul(KnownOp0, KnownOp1);
2972 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2974 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2975 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2976 }
2977 break;
2978 }
2979 case ISD::FABS: {
2980 SDValue Op0 = Op.getOperand(0);
2981 APInt SignMask = APInt::getSignMask(BitWidth);
2982
2983 if (!DemandedBits.intersects(SignMask))
2984 return TLO.CombineTo(Op, Op0);
2985
2986 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2987 Depth + 1))
2988 return true;
2989
2990 if (Known.isNonNegative())
2991 return TLO.CombineTo(Op, Op0);
2992 if (Known.isNegative())
2993 return TLO.CombineTo(
2994 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
2995
2996 Known.Zero |= SignMask;
2997 Known.One &= ~SignMask;
2998
2999 break;
3000 }
3001 case ISD::FCOPYSIGN: {
3002 SDValue Op0 = Op.getOperand(0);
3003 SDValue Op1 = Op.getOperand(1);
3004
3005 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3006 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3007 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3008 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3009
3010 if (!DemandedBits.intersects(SignMask0))
3011 return TLO.CombineTo(Op, Op0);
3012
3013 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3014 Known, TLO, Depth + 1) ||
3015 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3016 Depth + 1))
3017 return true;
3018
3019 if (Known2.isNonNegative())
3020 return TLO.CombineTo(
3021 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3022
3023 if (Known2.isNegative())
3024 return TLO.CombineTo(
3025 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3026 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3027
3028 Known.Zero &= ~SignMask0;
3029 Known.One &= ~SignMask0;
3030 break;
3031 }
3032 case ISD::FNEG: {
3033 SDValue Op0 = Op.getOperand(0);
3034 APInt SignMask = APInt::getSignMask(BitWidth);
3035
3036 if (!DemandedBits.intersects(SignMask))
3037 return TLO.CombineTo(Op, Op0);
3038
3039 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3040 Depth + 1))
3041 return true;
3042
3043 if (!Known.isSignUnknown()) {
3044 Known.Zero ^= SignMask;
3045 Known.One ^= SignMask;
3046 }
3047
3048 break;
3049 }
3050 default:
3051 // We also ask the target about intrinsics (which could be specific to it).
3052 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3053 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3054 // TODO: Probably okay to remove after audit; here to reduce change size
3055 // in initial enablement patch for scalable vectors
3056 if (Op.getValueType().isScalableVector())
3057 break;
3059 Known, TLO, Depth))
3060 return true;
3061 break;
3062 }
3063
3064 // Just use computeKnownBits to compute output bits.
3065 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3066 break;
3067 }
3068
3069 // If we know the value of all of the demanded bits, return this as a
3070 // constant.
3072 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3073 // Avoid folding to a constant if any OpaqueConstant is involved.
3074 if (llvm::any_of(Op->ops(), [](SDValue V) {
3075 auto *C = dyn_cast<ConstantSDNode>(V);
3076 return C && C->isOpaque();
3077 }))
3078 return false;
3079 if (VT.isInteger())
3080 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3081 if (VT.isFloatingPoint())
3082 return TLO.CombineTo(
3083 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3084 dl, VT));
3085 }
3086
3087 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3088 // Try again just for the original demanded elts.
3089 // Ensure we do this AFTER constant folding above.
3090 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3091 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3092
3093 return false;
3094}
3095
3097 const APInt &DemandedElts,
3098 DAGCombinerInfo &DCI) const {
3099 SelectionDAG &DAG = DCI.DAG;
3100 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3101 !DCI.isBeforeLegalizeOps());
3102
3103 APInt KnownUndef, KnownZero;
3104 bool Simplified =
3105 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3106 if (Simplified) {
3107 DCI.AddToWorklist(Op.getNode());
3108 DCI.CommitTargetLoweringOpt(TLO);
3109 }
3110
3111 return Simplified;
3112}
3113
3114/// Given a vector binary operation and known undefined elements for each input
3115/// operand, compute whether each element of the output is undefined.
3117 const APInt &UndefOp0,
3118 const APInt &UndefOp1) {
3119 EVT VT = BO.getValueType();
3121 "Vector binop only");
3122
3123 EVT EltVT = VT.getVectorElementType();
3124 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3125 assert(UndefOp0.getBitWidth() == NumElts &&
3126 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3127
3128 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3129 const APInt &UndefVals) {
3130 if (UndefVals[Index])
3131 return DAG.getUNDEF(EltVT);
3132
3133 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3134 // Try hard to make sure that the getNode() call is not creating temporary
3135 // nodes. Ignore opaque integers because they do not constant fold.
3136 SDValue Elt = BV->getOperand(Index);
3137 auto *C = dyn_cast<ConstantSDNode>(Elt);
3138 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3139 return Elt;
3140 }
3141
3142 return SDValue();
3143 };
3144
3145 APInt KnownUndef = APInt::getZero(NumElts);
3146 for (unsigned i = 0; i != NumElts; ++i) {
3147 // If both inputs for this element are either constant or undef and match
3148 // the element type, compute the constant/undef result for this element of
3149 // the vector.
3150 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3151 // not handle FP constants. The code within getNode() should be refactored
3152 // to avoid the danger of creating a bogus temporary node here.
3153 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3154 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3155 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3156 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3157 KnownUndef.setBit(i);
3158 }
3159 return KnownUndef;
3160}
3161
3163 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3164 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3165 bool AssumeSingleUse) const {
3166 EVT VT = Op.getValueType();
3167 unsigned Opcode = Op.getOpcode();
3168 APInt DemandedElts = OriginalDemandedElts;
3169 unsigned NumElts = DemandedElts.getBitWidth();
3170 assert(VT.isVector() && "Expected vector op");
3171
3172 KnownUndef = KnownZero = APInt::getZero(NumElts);
3173
3175 return false;
3176
3177 // TODO: For now we assume we know nothing about scalable vectors.
3178 if (VT.isScalableVector())
3179 return false;
3180
3181 assert(VT.getVectorNumElements() == NumElts &&
3182 "Mask size mismatches value type element count!");
3183
3184 // Undef operand.
3185 if (Op.isUndef()) {
3186 KnownUndef.setAllBits();
3187 return false;
3188 }
3189
3190 // If Op has other users, assume that all elements are needed.
3191 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3192 DemandedElts.setAllBits();
3193
3194 // Not demanding any elements from Op.
3195 if (DemandedElts == 0) {
3196 KnownUndef.setAllBits();
3197 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3198 }
3199
3200 // Limit search depth.
3202 return false;
3203
3204 SDLoc DL(Op);
3205 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3206 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3207
3208 // Helper for demanding the specified elements and all the bits of both binary
3209 // operands.
3210 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3211 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3212 TLO.DAG, Depth + 1);
3213 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3214 TLO.DAG, Depth + 1);
3215 if (NewOp0 || NewOp1) {
3216 SDValue NewOp =
3217 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3218 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3219 return TLO.CombineTo(Op, NewOp);
3220 }
3221 return false;
3222 };
3223
3224 switch (Opcode) {
3225 case ISD::SCALAR_TO_VECTOR: {
3226 if (!DemandedElts[0]) {
3227 KnownUndef.setAllBits();
3228 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3229 }
3230 KnownUndef.setHighBits(NumElts - 1);
3231 break;
3232 }
3233 case ISD::BITCAST: {
3234 SDValue Src = Op.getOperand(0);
3235 EVT SrcVT = Src.getValueType();
3236
3237 if (!SrcVT.isVector()) {
3238 // TODO - bigendian once we have test coverage.
3239 if (IsLE) {
3240 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3241 unsigned EltSize = VT.getScalarSizeInBits();
3242 for (unsigned I = 0; I != NumElts; ++I) {
3243 if (DemandedElts[I]) {
3244 unsigned Offset = I * EltSize;
3245 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3246 }
3247 }
3248 KnownBits Known;
3249 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3250 return true;
3251 }
3252 break;
3253 }
3254
3255 // Fast handling of 'identity' bitcasts.
3256 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3257 if (NumSrcElts == NumElts)
3258 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3259 KnownZero, TLO, Depth + 1);
3260
3261 APInt SrcDemandedElts, SrcZero, SrcUndef;
3262
3263 // Bitcast from 'large element' src vector to 'small element' vector, we
3264 // must demand a source element if any DemandedElt maps to it.
3265 if ((NumElts % NumSrcElts) == 0) {
3266 unsigned Scale = NumElts / NumSrcElts;
3267 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3268 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3269 TLO, Depth + 1))
3270 return true;
3271
3272 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3273 // of the large element.
3274 // TODO - bigendian once we have test coverage.
3275 if (IsLE) {
3276 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3277 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3278 for (unsigned i = 0; i != NumElts; ++i)
3279 if (DemandedElts[i]) {
3280 unsigned Ofs = (i % Scale) * EltSizeInBits;
3281 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3282 }
3283
3284 KnownBits Known;
3285 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3286 TLO, Depth + 1))
3287 return true;
3288
3289 // The bitcast has split each wide element into a number of
3290 // narrow subelements. We have just computed the Known bits
3291 // for wide elements. See if element splitting results in
3292 // some subelements being zero. Only for demanded elements!
3293 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3294 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3295 .isAllOnes())
3296 continue;
3297 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3298 unsigned Elt = Scale * SrcElt + SubElt;
3299 if (DemandedElts[Elt])
3300 KnownZero.setBit(Elt);
3301 }
3302 }
3303 }
3304
3305 // If the src element is zero/undef then all the output elements will be -
3306 // only demanded elements are guaranteed to be correct.
3307 for (unsigned i = 0; i != NumSrcElts; ++i) {
3308 if (SrcDemandedElts[i]) {
3309 if (SrcZero[i])
3310 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3311 if (SrcUndef[i])
3312 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3313 }
3314 }
3315 }
3316
3317 // Bitcast from 'small element' src vector to 'large element' vector, we
3318 // demand all smaller source elements covered by the larger demanded element
3319 // of this vector.
3320 if ((NumSrcElts % NumElts) == 0) {
3321 unsigned Scale = NumSrcElts / NumElts;
3322 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3323 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3324 TLO, Depth + 1))
3325 return true;
3326
3327 // If all the src elements covering an output element are zero/undef, then
3328 // the output element will be as well, assuming it was demanded.
3329 for (unsigned i = 0; i != NumElts; ++i) {
3330 if (DemandedElts[i]) {
3331 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3332 KnownZero.setBit(i);
3333 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3334 KnownUndef.setBit(i);
3335 }
3336 }
3337 }
3338 break;
3339 }
3340 case ISD::FREEZE: {
3341 SDValue N0 = Op.getOperand(0);
3342 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3343 /*PoisonOnly=*/false,
3344 Depth + 1))
3345 return TLO.CombineTo(Op, N0);
3346
3347 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3348 // freeze(op(x, ...)) -> op(freeze(x), ...).
3349 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3350 return TLO.CombineTo(
3352 TLO.DAG.getFreeze(N0.getOperand(0))));
3353 break;
3354 }
3355 case ISD::BUILD_VECTOR: {
3356 // Check all elements and simplify any unused elements with UNDEF.
3357 if (!DemandedElts.isAllOnes()) {
3358 // Don't simplify BROADCASTS.
3359 if (llvm::any_of(Op->op_values(),
3360 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3362 bool Updated = false;
3363 for (unsigned i = 0; i != NumElts; ++i) {
3364 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3365 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3366 KnownUndef.setBit(i);
3367 Updated = true;
3368 }
3369 }
3370 if (Updated)
3371 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3372 }
3373 }
3374 for (unsigned i = 0; i != NumElts; ++i) {
3375 SDValue SrcOp = Op.getOperand(i);
3376 if (SrcOp.isUndef()) {
3377 KnownUndef.setBit(i);
3378 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3380 KnownZero.setBit(i);
3381 }
3382 }
3383 break;
3384 }
3385 case ISD::CONCAT_VECTORS: {
3386 EVT SubVT = Op.getOperand(0).getValueType();
3387 unsigned NumSubVecs = Op.getNumOperands();
3388 unsigned NumSubElts = SubVT.getVectorNumElements();
3389 for (unsigned i = 0; i != NumSubVecs; ++i) {
3390 SDValue SubOp = Op.getOperand(i);
3391 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3392 APInt SubUndef, SubZero;
3393 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3394 Depth + 1))
3395 return true;
3396 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3397 KnownZero.insertBits(SubZero, i * NumSubElts);
3398 }
3399
3400 // Attempt to avoid multi-use ops if we don't need anything from them.
3401 if (!DemandedElts.isAllOnes()) {
3402 bool FoundNewSub = false;
3403 SmallVector<SDValue, 2> DemandedSubOps;
3404 for (unsigned i = 0; i != NumSubVecs; ++i) {
3405 SDValue SubOp = Op.getOperand(i);
3406 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3408 SubOp, SubElts, TLO.DAG, Depth + 1);
3409 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3410 FoundNewSub = NewSubOp ? true : FoundNewSub;
3411 }
3412 if (FoundNewSub) {
3413 SDValue NewOp =
3414 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3415 return TLO.CombineTo(Op, NewOp);
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::INSERT_SUBVECTOR: {
3421 // Demand any elements from the subvector and the remainder from the src it
3422 // is inserted into.
3423 SDValue Src = Op.getOperand(0);
3424 SDValue Sub = Op.getOperand(1);
3425 uint64_t Idx = Op.getConstantOperandVal(2);
3426 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3427 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3428 APInt DemandedSrcElts = DemandedElts;
3429 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3430
3431 // If none of the sub operand elements are demanded, bypass the insert.
3432 if (!DemandedSubElts)
3433 return TLO.CombineTo(Op, Src);
3434
3435 APInt SubUndef, SubZero;
3436 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3437 Depth + 1))
3438 return true;
3439
3440 // If none of the src operand elements are demanded, replace it with undef.
3441 if (!DemandedSrcElts && !Src.isUndef())
3442 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3443 TLO.DAG.getUNDEF(VT), Sub,
3444 Op.getOperand(2)));
3445
3446 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3447 TLO, Depth + 1))
3448 return true;
3449 KnownUndef.insertBits(SubUndef, Idx);
3450 KnownZero.insertBits(SubZero, Idx);
3451
3452 // Attempt to avoid multi-use ops if we don't need anything from them.
3453 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3455 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3457 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3458 if (NewSrc || NewSub) {
3459 NewSrc = NewSrc ? NewSrc : Src;
3460 NewSub = NewSub ? NewSub : Sub;
3461 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3462 NewSub, Op.getOperand(2));
3463 return TLO.CombineTo(Op, NewOp);
3464 }
3465 }
3466 break;
3467 }
3469 // Offset the demanded elts by the subvector index.
3470 SDValue Src = Op.getOperand(0);
3471 if (Src.getValueType().isScalableVector())
3472 break;
3473 uint64_t Idx = Op.getConstantOperandVal(1);
3474 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3475 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3476
3477 APInt SrcUndef, SrcZero;
3478 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3479 Depth + 1))
3480 return true;
3481 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3482 KnownZero = SrcZero.extractBits(NumElts, Idx);
3483
3484 // Attempt to avoid multi-use ops if we don't need anything from them.
3485 if (!DemandedElts.isAllOnes()) {
3487 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3488 if (NewSrc) {
3489 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3490 Op.getOperand(1));
3491 return TLO.CombineTo(Op, NewOp);
3492 }
3493 }
3494 break;
3495 }
3497 SDValue Vec = Op.getOperand(0);
3498 SDValue Scl = Op.getOperand(1);
3499 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3500
3501 // For a legal, constant insertion index, if we don't need this insertion
3502 // then strip it, else remove it from the demanded elts.
3503 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3504 unsigned Idx = CIdx->getZExtValue();
3505 if (!DemandedElts[Idx])
3506 return TLO.CombineTo(Op, Vec);
3507
3508 APInt DemandedVecElts(DemandedElts);
3509 DemandedVecElts.clearBit(Idx);
3510 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3511 KnownZero, TLO, Depth + 1))
3512 return true;
3513
3514 KnownUndef.setBitVal(Idx, Scl.isUndef());
3515
3516 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3517 break;
3518 }
3519
3520 APInt VecUndef, VecZero;
3521 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3522 Depth + 1))
3523 return true;
3524 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3525 break;
3526 }
3527 case ISD::VSELECT: {
3528 SDValue Sel = Op.getOperand(0);
3529 SDValue LHS = Op.getOperand(1);
3530 SDValue RHS = Op.getOperand(2);
3531
3532 // Try to transform the select condition based on the current demanded
3533 // elements.
3534 APInt UndefSel, ZeroSel;
3535 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3536 Depth + 1))
3537 return true;
3538
3539 // See if we can simplify either vselect operand.
3540 APInt DemandedLHS(DemandedElts);
3541 APInt DemandedRHS(DemandedElts);
3542 APInt UndefLHS, ZeroLHS;
3543 APInt UndefRHS, ZeroRHS;
3544 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3545 Depth + 1))
3546 return true;
3547 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3548 Depth + 1))
3549 return true;
3550
3551 KnownUndef = UndefLHS & UndefRHS;
3552 KnownZero = ZeroLHS & ZeroRHS;
3553
3554 // If we know that the selected element is always zero, we don't need the
3555 // select value element.
3556 APInt DemandedSel = DemandedElts & ~KnownZero;
3557 if (DemandedSel != DemandedElts)
3558 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3559 Depth + 1))
3560 return true;
3561
3562 break;
3563 }
3564 case ISD::VECTOR_SHUFFLE: {
3565 SDValue LHS = Op.getOperand(0);
3566 SDValue RHS = Op.getOperand(1);
3567 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3568
3569 // Collect demanded elements from shuffle operands..
3570 APInt DemandedLHS(NumElts, 0);
3571 APInt DemandedRHS(NumElts, 0);
3572 for (unsigned i = 0; i != NumElts; ++i) {
3573 int M = ShuffleMask[i];
3574 if (M < 0 || !DemandedElts[i])
3575 continue;
3576 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3577 if (M < (int)NumElts)
3578 DemandedLHS.setBit(M);
3579 else
3580 DemandedRHS.setBit(M - NumElts);
3581 }
3582
3583 // If either side isn't demanded, replace it by UNDEF. We handle this
3584 // explicitly here to also simplify in case of multiple uses (on the
3585 // contrary to the SimplifyDemandedVectorElts calls below).
3586 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3587 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3588 if (FoldLHS || FoldRHS) {
3589 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3590 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3591 SDValue NewOp =
3592 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3593 return TLO.CombineTo(Op, NewOp);
3594 }
3595
3596 // See if we can simplify either shuffle operand.
3597 APInt UndefLHS, ZeroLHS;
3598 APInt UndefRHS, ZeroRHS;
3599 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3600 Depth + 1))
3601 return true;
3602 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3603 Depth + 1))
3604 return true;
3605
3606 // Simplify mask using undef elements from LHS/RHS.
3607 bool Updated = false;
3608 bool IdentityLHS = true, IdentityRHS = true;
3609 SmallVector<int, 32> NewMask(ShuffleMask);
3610 for (unsigned i = 0; i != NumElts; ++i) {
3611 int &M = NewMask[i];
3612 if (M < 0)
3613 continue;
3614 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3615 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3616 Updated = true;
3617 M = -1;
3618 }
3619 IdentityLHS &= (M < 0) || (M == (int)i);
3620 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3621 }
3622
3623 // Update legal shuffle masks based on demanded elements if it won't reduce
3624 // to Identity which can cause premature removal of the shuffle mask.
3625 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3626 SDValue LegalShuffle =
3627 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3628 if (LegalShuffle)
3629 return TLO.CombineTo(Op, LegalShuffle);
3630 }
3631
3632 // Propagate undef/zero elements from LHS/RHS.
3633 for (unsigned i = 0; i != NumElts; ++i) {
3634 int M = ShuffleMask[i];
3635 if (M < 0) {
3636 KnownUndef.setBit(i);
3637 } else if (M < (int)NumElts) {
3638 if (UndefLHS[M])
3639 KnownUndef.setBit(i);
3640 if (ZeroLHS[M])
3641 KnownZero.setBit(i);
3642 } else {
3643 if (UndefRHS[M - NumElts])
3644 KnownUndef.setBit(i);
3645 if (ZeroRHS[M - NumElts])
3646 KnownZero.setBit(i);
3647 }
3648 }
3649 break;
3650 }
3654 APInt SrcUndef, SrcZero;
3655 SDValue Src = Op.getOperand(0);
3656 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3657 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3658 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3659 Depth + 1))
3660 return true;
3661 KnownZero = SrcZero.zextOrTrunc(NumElts);
3662 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3663
3664 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3665 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3666 DemandedSrcElts == 1) {
3667 // aext - if we just need the bottom element then we can bitcast.
3668 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3669 }
3670
3671 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3672 // zext(undef) upper bits are guaranteed to be zero.
3673 if (DemandedElts.isSubsetOf(KnownUndef))
3674 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3675 KnownUndef.clearAllBits();
3676
3677 // zext - if we just need the bottom element then we can mask:
3678 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3679 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3680 Op->isOnlyUserOf(Src.getNode()) &&
3681 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3682 SDLoc DL(Op);
3683 EVT SrcVT = Src.getValueType();
3684 EVT SrcSVT = SrcVT.getScalarType();
3685 SmallVector<SDValue> MaskElts;
3686 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3687 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3688 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3689 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3690 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3691 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3692 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3693 }
3694 }
3695 }
3696 break;
3697 }
3698
3699 // TODO: There are more binop opcodes that could be handled here - MIN,
3700 // MAX, saturated math, etc.
3701 case ISD::ADD: {
3702 SDValue Op0 = Op.getOperand(0);
3703 SDValue Op1 = Op.getOperand(1);
3704 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3705 APInt UndefLHS, ZeroLHS;
3706 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3707 Depth + 1, /*AssumeSingleUse*/ true))
3708 return true;
3709 }
3710 [[fallthrough]];
3711 }
3712 case ISD::AVGCEILS:
3713 case ISD::AVGCEILU:
3714 case ISD::AVGFLOORS:
3715 case ISD::AVGFLOORU:
3716 case ISD::OR:
3717 case ISD::XOR:
3718 case ISD::SUB:
3719 case ISD::FADD:
3720 case ISD::FSUB:
3721 case ISD::FMUL:
3722 case ISD::FDIV:
3723 case ISD::FREM: {
3724 SDValue Op0 = Op.getOperand(0);
3725 SDValue Op1 = Op.getOperand(1);
3726
3727 APInt UndefRHS, ZeroRHS;
3728 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3729 Depth + 1))
3730 return true;
3731 APInt UndefLHS, ZeroLHS;
3732 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3733 Depth + 1))
3734 return true;
3735
3736 KnownZero = ZeroLHS & ZeroRHS;
3737 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3738
3739 // Attempt to avoid multi-use ops if we don't need anything from them.
3740 // TODO - use KnownUndef to relax the demandedelts?
3741 if (!DemandedElts.isAllOnes())
3742 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3743 return true;
3744 break;
3745 }
3746 case ISD::SHL:
3747 case ISD::SRL:
3748 case ISD::SRA:
3749 case ISD::ROTL:
3750 case ISD::ROTR: {
3751 SDValue Op0 = Op.getOperand(0);
3752 SDValue Op1 = Op.getOperand(1);
3753
3754 APInt UndefRHS, ZeroRHS;
3755 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3756 Depth + 1))
3757 return true;
3758 APInt UndefLHS, ZeroLHS;
3759 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3760 Depth + 1))
3761 return true;
3762
3763 KnownZero = ZeroLHS;
3764 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3765
3766 // Attempt to avoid multi-use ops if we don't need anything from them.
3767 // TODO - use KnownUndef to relax the demandedelts?
3768 if (!DemandedElts.isAllOnes())
3769 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3770 return true;
3771 break;
3772 }
3773 case ISD::MUL:
3774 case ISD::MULHU:
3775 case ISD::MULHS:
3776 case ISD::AND: {
3777 SDValue Op0 = Op.getOperand(0);
3778 SDValue Op1 = Op.getOperand(1);
3779
3780 APInt SrcUndef, SrcZero;
3781 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3782 Depth + 1))
3783 return true;
3784 // If we know that a demanded element was zero in Op1 we don't need to
3785 // demand it in Op0 - its guaranteed to be zero.
3786 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3787 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3788 TLO, Depth + 1))
3789 return true;
3790
3791 KnownUndef &= DemandedElts0;
3792 KnownZero &= DemandedElts0;
3793
3794 // If every element pair has a zero/undef then just fold to zero.
3795 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3796 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3797 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3798 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3799
3800 // If either side has a zero element, then the result element is zero, even
3801 // if the other is an UNDEF.
3802 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3803 // and then handle 'and' nodes with the rest of the binop opcodes.
3804 KnownZero |= SrcZero;
3805 KnownUndef &= SrcUndef;
3806 KnownUndef &= ~KnownZero;
3807
3808 // Attempt to avoid multi-use ops if we don't need anything from them.
3809 if (!DemandedElts.isAllOnes())
3810 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3811 return true;
3812 break;
3813 }
3814 case ISD::TRUNCATE:
3815 case ISD::SIGN_EXTEND:
3816 case ISD::ZERO_EXTEND:
3817 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3818 KnownZero, TLO, Depth + 1))
3819 return true;
3820
3821 if (!DemandedElts.isAllOnes())
3823 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3824 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3825
3826 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3827 // zext(undef) upper bits are guaranteed to be zero.
3828 if (DemandedElts.isSubsetOf(KnownUndef))
3829 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3830 KnownUndef.clearAllBits();
3831 }
3832 break;
3833 case ISD::SINT_TO_FP:
3834 case ISD::UINT_TO_FP:
3835 case ISD::FP_TO_SINT:
3836 case ISD::FP_TO_UINT:
3837 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3838 KnownZero, TLO, Depth + 1))
3839 return true;
3840 // Don't fall through to generic undef -> undef handling.
3841 return false;
3842 default: {
3843 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3844 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3845 KnownZero, TLO, Depth))
3846 return true;
3847 } else {
3848 KnownBits Known;
3849 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3850 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3851 TLO, Depth, AssumeSingleUse))
3852 return true;
3853 }
3854 break;
3855 }
3856 }
3857 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3858
3859 // Constant fold all undef cases.
3860 // TODO: Handle zero cases as well.
3861 if (DemandedElts.isSubsetOf(KnownUndef))
3862 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3863
3864 return false;
3865}
3866
3867/// Determine which of the bits specified in Mask are known to be either zero or
3868/// one and return them in the Known.
3870 KnownBits &Known,
3871 const APInt &DemandedElts,
3872 const SelectionDAG &DAG,
3873 unsigned Depth) const {
3874 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3875 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3876 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3877 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3878 "Should use MaskedValueIsZero if you don't know whether Op"
3879 " is a target node!");
3880 Known.resetAll();
3881}
3882
3885 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3886 unsigned Depth) const {
3887 Known.resetAll();
3888}
3889
3892 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3893 unsigned Depth) const {
3894 Known.resetAll();
3895}
3896
3898 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3899 // The low bits are known zero if the pointer is aligned.
3900 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3901}
3902
3908
3909/// This method can be implemented by targets that want to expose additional
3910/// information about sign bits to the DAG Combiner.
3912 const APInt &,
3913 const SelectionDAG &,
3914 unsigned Depth) const {
3915 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3916 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3917 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3918 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3919 "Should use ComputeNumSignBits if you don't know whether Op"
3920 " is a target node!");
3921 return 1;
3922}
3923
3925 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3926 const MachineRegisterInfo &MRI, unsigned Depth) const {
3927 return 1;
3928}
3929
3931 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3932 TargetLoweringOpt &TLO, unsigned Depth) const {
3933 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3934 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3935 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3936 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3937 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3938 " is a target node!");
3939 return false;
3940}
3941
3943 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3944 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3945 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3946 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3947 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3948 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3949 "Should use SimplifyDemandedBits if you don't know whether Op"
3950 " is a target node!");
3951 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3952 return false;
3953}
3954
3956 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3957 SelectionDAG &DAG, unsigned Depth) const {
3958 assert(
3959 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3960 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3961 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3962 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3963 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3964 " is a target node!");
3965 return SDValue();
3966}
3967
3968SDValue
3971 SelectionDAG &DAG) const {
3972 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3973 if (!LegalMask) {
3974 std::swap(N0, N1);
3976 LegalMask = isShuffleMaskLegal(Mask, VT);
3977 }
3978
3979 if (!LegalMask)
3980 return SDValue();
3981
3982 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3983}
3984
3986 return nullptr;
3987}
3988
3990 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3991 bool PoisonOnly, unsigned Depth) const {
3992 assert(
3993 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3994 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3995 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3996 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3997 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3998 " is a target node!");
3999
4000 // If Op can't create undef/poison and none of its operands are undef/poison
4001 // then Op is never undef/poison.
4002 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4003 /*ConsiderFlags*/ true, Depth) &&
4004 all_of(Op->ops(), [&](SDValue V) {
4005 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4006 Depth + 1);
4007 });
4008}
4009
4011 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4012 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4013 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4014 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4015 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4016 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4017 "Should use canCreateUndefOrPoison if you don't know whether Op"
4018 " is a target node!");
4019 // Be conservative and return true.
4020 return true;
4021}
4022
4024 const APInt &DemandedElts,
4025 const SelectionDAG &DAG,
4026 bool SNaN,
4027 unsigned Depth) const {
4028 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4029 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4030 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4031 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4032 "Should use isKnownNeverNaN if you don't know whether Op"
4033 " is a target node!");
4034 return false;
4035}
4036
4038 const APInt &DemandedElts,
4039 APInt &UndefElts,
4040 const SelectionDAG &DAG,
4041 unsigned Depth) const {
4042 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4043 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4044 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4045 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4046 "Should use isSplatValue if you don't know whether Op"
4047 " is a target node!");
4048 return false;
4049}
4050
4051// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4052// work with truncating build vectors and vectors with elements of less than
4053// 8 bits.
4055 if (!N)
4056 return false;
4057
4058 unsigned EltWidth;
4059 APInt CVal;
4060 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4061 /*AllowTruncation=*/true)) {
4062 CVal = CN->getAPIntValue();
4063 EltWidth = N.getValueType().getScalarSizeInBits();
4064 } else
4065 return false;
4066
4067 // If this is a truncating splat, truncate the splat value.
4068 // Otherwise, we may fail to match the expected values below.
4069 if (EltWidth < CVal.getBitWidth())
4070 CVal = CVal.trunc(EltWidth);
4071
4072 switch (getBooleanContents(N.getValueType())) {
4074 return CVal[0];
4076 return CVal.isOne();
4078 return CVal.isAllOnes();
4079 }
4080
4081 llvm_unreachable("Invalid boolean contents");
4082}
4083
4085 if (!N)
4086 return false;
4087
4089 if (!CN) {
4091 if (!BV)
4092 return false;
4093
4094 // Only interested in constant splats, we don't care about undef
4095 // elements in identifying boolean constants and getConstantSplatNode
4096 // returns NULL if all ops are undef;
4097 CN = BV->getConstantSplatNode();
4098 if (!CN)
4099 return false;
4100 }
4101
4102 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4103 return !CN->getAPIntValue()[0];
4104
4105 return CN->isZero();
4106}
4107
4109 bool SExt) const {
4110 if (VT == MVT::i1)
4111 return N->isOne();
4112
4114 switch (Cnt) {
4116 // An extended value of 1 is always true, unless its original type is i1,
4117 // in which case it will be sign extended to -1.
4118 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4121 return N->isAllOnes() && SExt;
4122 }
4123 llvm_unreachable("Unexpected enumeration.");
4124}
4125
4126/// This helper function of SimplifySetCC tries to optimize the comparison when
4127/// either operand of the SetCC node is a bitwise-and instruction.
4128SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4129 ISD::CondCode Cond, const SDLoc &DL,
4130 DAGCombinerInfo &DCI) const {
4131 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4132 std::swap(N0, N1);
4133
4134 SelectionDAG &DAG = DCI.DAG;
4135 EVT OpVT = N0.getValueType();
4136 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4137 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4138 return SDValue();
4139
4140 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4141 // iff everything but LSB is known zero:
4142 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4145 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4146 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4147 if (DAG.MaskedValueIsZero(N0, UpperBits))
4148 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4149 }
4150
4151 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4152 // test in a narrow type that we can truncate to with no cost. Examples:
4153 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4154 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4155 // TODO: This conservatively checks for type legality on the source and
4156 // destination types. That may inhibit optimizations, but it also
4157 // allows setcc->shift transforms that may be more beneficial.
4158 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4159 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4160 isTypeLegal(OpVT) && N0.hasOneUse()) {
4161 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4162 AndC->getAPIntValue().getActiveBits());
4163 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4164 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4165 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4166 return DAG.getSetCC(DL, VT, Trunc, Zero,
4168 }
4169 }
4170
4171 // Match these patterns in any of their permutations:
4172 // (X & Y) == Y
4173 // (X & Y) != Y
4174 SDValue X, Y;
4175 if (N0.getOperand(0) == N1) {
4176 X = N0.getOperand(1);
4177 Y = N0.getOperand(0);
4178 } else if (N0.getOperand(1) == N1) {
4179 X = N0.getOperand(0);
4180 Y = N0.getOperand(1);
4181 } else {
4182 return SDValue();
4183 }
4184
4185 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4186 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4187 // its liable to create and infinite loop.
4188 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4189 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4191 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4192 // Note that where Y is variable and is known to have at most one bit set
4193 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4194 // equivalent when Y == 0.
4195 assert(OpVT.isInteger());
4197 if (DCI.isBeforeLegalizeOps() ||
4199 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4200 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4201 // If the target supports an 'and-not' or 'and-complement' logic operation,
4202 // try to use that to make a comparison operation more efficient.
4203 // But don't do this transform if the mask is a single bit because there are
4204 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4205 // 'rlwinm' on PPC).
4206
4207 // Bail out if the compare operand that we want to turn into a zero is
4208 // already a zero (otherwise, infinite loop).
4209 if (isNullConstant(Y))
4210 return SDValue();
4211
4212 // Transform this into: ~X & Y == 0.
4213 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4214 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4215 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4216 }
4217
4218 return SDValue();
4219}
4220
4221/// This helper function of SimplifySetCC tries to optimize the comparison when
4222/// either operand of the SetCC node is a bitwise-or instruction.
4223/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4224SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4225 ISD::CondCode Cond, const SDLoc &DL,
4226 DAGCombinerInfo &DCI) const {
4227 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4228 std::swap(N0, N1);
4229
4230 SelectionDAG &DAG = DCI.DAG;
4231 EVT OpVT = N0.getValueType();
4232 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4233 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4234 return SDValue();
4235
4236 // (X | Y) == Y
4237 // (X | Y) != Y
4238 SDValue X;
4239 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4240 // If the target supports an 'and-not' or 'and-complement' logic operation,
4241 // try to use that to make a comparison operation more efficient.
4242
4243 // Bail out if the compare operand that we want to turn into a zero is
4244 // already a zero (otherwise, infinite loop).
4245 if (isNullConstant(N1))
4246 return SDValue();
4247
4248 // Transform this into: X & ~Y ==/!= 0.
4249 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4250 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4251 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4252 }
4253
4254 return SDValue();
4255}
4256
4257/// There are multiple IR patterns that could be checking whether certain
4258/// truncation of a signed number would be lossy or not. The pattern which is
4259/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4260/// We are looking for the following pattern: (KeptBits is a constant)
4261/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4262/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4263/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4264/// We will unfold it into the natural trunc+sext pattern:
4265/// ((%x << C) a>> C) dstcond %x
4266/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4267SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4268 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4269 const SDLoc &DL) const {
4270 // We must be comparing with a constant.
4271 ConstantSDNode *C1;
4272 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4273 return SDValue();
4274
4275 // N0 should be: add %x, (1 << (KeptBits-1))
4276 if (N0->getOpcode() != ISD::ADD)
4277 return SDValue();
4278
4279 // And we must be 'add'ing a constant.
4280 ConstantSDNode *C01;
4281 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4282 return SDValue();
4283
4284 SDValue X = N0->getOperand(0);
4285 EVT XVT = X.getValueType();
4286
4287 // Validate constants ...
4288
4289 APInt I1 = C1->getAPIntValue();
4290
4291 ISD::CondCode NewCond;
4292 if (Cond == ISD::CondCode::SETULT) {
4293 NewCond = ISD::CondCode::SETEQ;
4294 } else if (Cond == ISD::CondCode::SETULE) {
4295 NewCond = ISD::CondCode::SETEQ;
4296 // But need to 'canonicalize' the constant.
4297 I1 += 1;
4298 } else if (Cond == ISD::CondCode::SETUGT) {
4299 NewCond = ISD::CondCode::SETNE;
4300 // But need to 'canonicalize' the constant.
4301 I1 += 1;
4302 } else if (Cond == ISD::CondCode::SETUGE) {
4303 NewCond = ISD::CondCode::SETNE;
4304 } else
4305 return SDValue();
4306
4307 APInt I01 = C01->getAPIntValue();
4308
4309 auto checkConstants = [&I1, &I01]() -> bool {
4310 // Both of them must be power-of-two, and the constant from setcc is bigger.
4311 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4312 };
4313
4314 if (checkConstants()) {
4315 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4316 } else {
4317 // What if we invert constants? (and the target predicate)
4318 I1.negate();
4319 I01.negate();
4320 assert(XVT.isInteger());
4321 NewCond = getSetCCInverse(NewCond, XVT);
4322 if (!checkConstants())
4323 return SDValue();
4324 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4325 }
4326
4327 // They are power-of-two, so which bit is set?
4328 const unsigned KeptBits = I1.logBase2();
4329 const unsigned KeptBitsMinusOne = I01.logBase2();
4330
4331 // Magic!
4332 if (KeptBits != (KeptBitsMinusOne + 1))
4333 return SDValue();
4334 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4335
4336 // We don't want to do this in every single case.
4337 SelectionDAG &DAG = DCI.DAG;
4338 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4339 return SDValue();
4340
4341 // Unfold into: sext_inreg(%x) cond %x
4342 // Where 'cond' will be either 'eq' or 'ne'.
4343 SDValue SExtInReg = DAG.getNode(
4345 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4346 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4347}
4348
4349// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4350SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4351 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4352 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4354 "Should be a comparison with 0.");
4355 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4356 "Valid only for [in]equality comparisons.");
4357
4358 unsigned NewShiftOpcode;
4359 SDValue X, C, Y;
4360
4361 SelectionDAG &DAG = DCI.DAG;
4362
4363 // Look for '(C l>>/<< Y)'.
4364 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4365 // The shift should be one-use.
4366 if (!V.hasOneUse())
4367 return false;
4368 unsigned OldShiftOpcode = V.getOpcode();
4369 switch (OldShiftOpcode) {
4370 case ISD::SHL:
4371 NewShiftOpcode = ISD::SRL;
4372 break;
4373 case ISD::SRL:
4374 NewShiftOpcode = ISD::SHL;
4375 break;
4376 default:
4377 return false; // must be a logical shift.
4378 }
4379 // We should be shifting a constant.
4380 // FIXME: best to use isConstantOrConstantVector().
4381 C = V.getOperand(0);
4382 ConstantSDNode *CC =
4383 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4384 if (!CC)
4385 return false;
4386 Y = V.getOperand(1);
4387
4388 ConstantSDNode *XC =
4389 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4391 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4392 };
4393
4394 // LHS of comparison should be an one-use 'and'.
4395 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4396 return SDValue();
4397
4398 X = N0.getOperand(0);
4399 SDValue Mask = N0.getOperand(1);
4400
4401 // 'and' is commutative!
4402 if (!Match(Mask)) {
4403 std::swap(X, Mask);
4404 if (!Match(Mask))
4405 return SDValue();
4406 }
4407
4408 EVT VT = X.getValueType();
4409
4410 // Produce:
4411 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4412 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4413 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4414 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4415 return T2;
4416}
4417
4418/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4419/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4420/// handle the commuted versions of these patterns.
4421SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4422 ISD::CondCode Cond, const SDLoc &DL,
4423 DAGCombinerInfo &DCI) const {
4424 unsigned BOpcode = N0.getOpcode();
4425 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4426 "Unexpected binop");
4427 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4428
4429 // (X + Y) == X --> Y == 0
4430 // (X - Y) == X --> Y == 0
4431 // (X ^ Y) == X --> Y == 0
4432 SelectionDAG &DAG = DCI.DAG;
4433 EVT OpVT = N0.getValueType();
4434 SDValue X = N0.getOperand(0);
4435 SDValue Y = N0.getOperand(1);
4436 if (X == N1)
4437 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4438
4439 if (Y != N1)
4440 return SDValue();
4441
4442 // (X + Y) == Y --> X == 0
4443 // (X ^ Y) == Y --> X == 0
4444 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4445 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4446
4447 // The shift would not be valid if the operands are boolean (i1).
4448 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4449 return SDValue();
4450
4451 // (X - Y) == Y --> X == Y << 1
4452 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4453 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4454 if (!DCI.isCalledByLegalizer())
4455 DCI.AddToWorklist(YShl1.getNode());
4456 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4457}
4458
4460 SDValue N0, const APInt &C1,
4461 ISD::CondCode Cond, const SDLoc &dl,
4462 SelectionDAG &DAG) {
4463 // Look through truncs that don't change the value of a ctpop.
4464 // FIXME: Add vector support? Need to be careful with setcc result type below.
4465 SDValue CTPOP = N0;
4466 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4468 CTPOP = N0.getOperand(0);
4469
4470 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4471 return SDValue();
4472
4473 EVT CTVT = CTPOP.getValueType();
4474 SDValue CTOp = CTPOP.getOperand(0);
4475
4476 // Expand a power-of-2-or-zero comparison based on ctpop:
4477 // (ctpop x) u< 2 -> (x & x-1) == 0
4478 // (ctpop x) u> 1 -> (x & x-1) != 0
4479 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4480 // Keep the CTPOP if it is a cheap vector op.
4481 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4482 return SDValue();
4483
4484 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4485 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4486 return SDValue();
4487 if (C1 == 0 && (Cond == ISD::SETULT))
4488 return SDValue(); // This is handled elsewhere.
4489
4490 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4491
4492 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4493 SDValue Result = CTOp;
4494 for (unsigned i = 0; i < Passes; i++) {
4495 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4496 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4497 }
4499 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4500 }
4501
4502 // Expand a power-of-2 comparison based on ctpop
4503 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4504 // Keep the CTPOP if it is cheap.
4505 if (TLI.isCtpopFast(CTVT))
4506 return SDValue();
4507
4508 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4509 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4510 assert(CTVT.isInteger());
4511 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4512
4513 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4514 // check before emitting a potentially unnecessary op.
4515 if (DAG.isKnownNeverZero(CTOp)) {
4516 // (ctpop x) == 1 --> (x & x-1) == 0
4517 // (ctpop x) != 1 --> (x & x-1) != 0
4518 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4519 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4520 return RHS;
4521 }
4522
4523 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4524 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4525 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4527 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4528 }
4529
4530 return SDValue();
4531}
4532
4534 ISD::CondCode Cond, const SDLoc &dl,
4535 SelectionDAG &DAG) {
4536 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4537 return SDValue();
4538
4539 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4540 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4541 return SDValue();
4542
4543 auto getRotateSource = [](SDValue X) {
4544 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4545 return X.getOperand(0);
4546 return SDValue();
4547 };
4548
4549 // Peek through a rotated value compared against 0 or -1:
4550 // (rot X, Y) == 0/-1 --> X == 0/-1
4551 // (rot X, Y) != 0/-1 --> X != 0/-1
4552 if (SDValue R = getRotateSource(N0))
4553 return DAG.getSetCC(dl, VT, R, N1, Cond);
4554
4555 // Peek through an 'or' of a rotated value compared against 0:
4556 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4557 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4558 //
4559 // TODO: Add the 'and' with -1 sibling.
4560 // TODO: Recurse through a series of 'or' ops to find the rotate.
4561 EVT OpVT = N0.getValueType();
4562 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4563 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4564 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4565 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4566 }
4567 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4568 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4569 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4570 }
4571 }
4572
4573 return SDValue();
4574}
4575
4577 ISD::CondCode Cond, const SDLoc &dl,
4578 SelectionDAG &DAG) {
4579 // If we are testing for all-bits-clear, we might be able to do that with
4580 // less shifting since bit-order does not matter.
4581 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4582 return SDValue();
4583
4584 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4585 if (!C1 || !C1->isZero())
4586 return SDValue();
4587
4588 if (!N0.hasOneUse() ||
4589 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4590 return SDValue();
4591
4592 unsigned BitWidth = N0.getScalarValueSizeInBits();
4593 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4594 if (!ShAmtC)
4595 return SDValue();
4596
4597 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4598 if (ShAmt == 0)
4599 return SDValue();
4600
4601 // Canonicalize fshr as fshl to reduce pattern-matching.
4602 if (N0.getOpcode() == ISD::FSHR)
4603 ShAmt = BitWidth - ShAmt;
4604
4605 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4606 SDValue X, Y;
4607 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4608 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4609 return false;
4610 if (Or.getOperand(0) == Other) {
4611 X = Or.getOperand(0);
4612 Y = Or.getOperand(1);
4613 return true;
4614 }
4615 if (Or.getOperand(1) == Other) {
4616 X = Or.getOperand(1);
4617 Y = Or.getOperand(0);
4618 return true;
4619 }
4620 return false;
4621 };
4622
4623 EVT OpVT = N0.getValueType();
4624 EVT ShAmtVT = N0.getOperand(2).getValueType();
4625 SDValue F0 = N0.getOperand(0);
4626 SDValue F1 = N0.getOperand(1);
4627 if (matchOr(F0, F1)) {
4628 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4629 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4630 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4631 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4632 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4633 }
4634 if (matchOr(F1, F0)) {
4635 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4636 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4637 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4638 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4639 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4640 }
4641
4642 return SDValue();
4643}
4644
4645/// Try to simplify a setcc built with the specified operands and cc. If it is
4646/// unable to simplify it, return a null SDValue.
4648 ISD::CondCode Cond, bool foldBooleans,
4649 DAGCombinerInfo &DCI,
4650 const SDLoc &dl) const {
4651 SelectionDAG &DAG = DCI.DAG;
4652 const DataLayout &Layout = DAG.getDataLayout();
4653 EVT OpVT = N0.getValueType();
4654 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4655
4656 // Constant fold or commute setcc.
4657 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4658 return Fold;
4659
4660 bool N0ConstOrSplat =
4661 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4662 bool N1ConstOrSplat =
4663 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4664
4665 // Canonicalize toward having the constant on the RHS.
4666 // TODO: Handle non-splat vector constants. All undef causes trouble.
4667 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4668 // infinite loop here when we encounter one.
4670 if (N0ConstOrSplat && !N1ConstOrSplat &&
4671 (DCI.isBeforeLegalizeOps() ||
4672 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4673 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4674
4675 // If we have a subtract with the same 2 non-constant operands as this setcc
4676 // -- but in reverse order -- then try to commute the operands of this setcc
4677 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4678 // instruction on some targets.
4679 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4680 (DCI.isBeforeLegalizeOps() ||
4681 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4682 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4683 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4684 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4685
4686 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4687 return V;
4688
4689 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4690 return V;
4691
4692 if (auto *N1C = isConstOrConstSplat(N1)) {
4693 const APInt &C1 = N1C->getAPIntValue();
4694
4695 // Optimize some CTPOP cases.
4696 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4697 return V;
4698
4699 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4700 // X * Y == 0 --> (X == 0) || (Y == 0)
4701 // X * Y != 0 --> (X != 0) && (Y != 0)
4702 // TODO: This bails out if minsize is set, but if the target doesn't have a
4703 // single instruction multiply for this type, it would likely be
4704 // smaller to decompose.
4705 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4706 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4707 (N0->getFlags().hasNoUnsignedWrap() ||
4708 N0->getFlags().hasNoSignedWrap()) &&
4709 !Attr.hasFnAttr(Attribute::MinSize)) {
4710 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4711 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4712 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4713 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4714 }
4715
4716 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4717 // equality comparison, then we're just comparing whether X itself is
4718 // zero.
4719 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4720 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4722 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4723 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4724 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4725 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4726 // (srl (ctlz x), 5) == 0 -> X != 0
4727 // (srl (ctlz x), 5) != 1 -> X != 0
4728 Cond = ISD::SETNE;
4729 } else {
4730 // (srl (ctlz x), 5) != 0 -> X == 0
4731 // (srl (ctlz x), 5) == 1 -> X == 0
4732 Cond = ISD::SETEQ;
4733 }
4734 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4735 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4736 Cond);
4737 }
4738 }
4739 }
4740 }
4741
4742 // FIXME: Support vectors.
4743 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4744 const APInt &C1 = N1C->getAPIntValue();
4745
4746 // (zext x) == C --> x == (trunc C)
4747 // (sext x) == C --> x == (trunc C)
4748 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4749 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4750 unsigned MinBits = N0.getValueSizeInBits();
4751 SDValue PreExt;
4752 bool Signed = false;
4753 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4754 // ZExt
4755 MinBits = N0->getOperand(0).getValueSizeInBits();
4756 PreExt = N0->getOperand(0);
4757 } else if (N0->getOpcode() == ISD::AND) {
4758 // DAGCombine turns costly ZExts into ANDs
4759 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4760 if ((C->getAPIntValue()+1).isPowerOf2()) {
4761 MinBits = C->getAPIntValue().countr_one();
4762 PreExt = N0->getOperand(0);
4763 }
4764 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4765 // SExt
4766 MinBits = N0->getOperand(0).getValueSizeInBits();
4767 PreExt = N0->getOperand(0);
4768 Signed = true;
4769 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4770 // ZEXTLOAD / SEXTLOAD
4771 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4772 MinBits = LN0->getMemoryVT().getSizeInBits();
4773 PreExt = N0;
4774 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4775 Signed = true;
4776 MinBits = LN0->getMemoryVT().getSizeInBits();
4777 PreExt = N0;
4778 }
4779 }
4780
4781 // Figure out how many bits we need to preserve this constant.
4782 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4783
4784 // Make sure we're not losing bits from the constant.
4785 if (MinBits > 0 &&
4786 MinBits < C1.getBitWidth() &&
4787 MinBits >= ReqdBits) {
4788 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4789 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4790 // Will get folded away.
4791 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4792 if (MinBits == 1 && C1 == 1)
4793 // Invert the condition.
4794 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4796 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4797 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4798 }
4799
4800 // If truncating the setcc operands is not desirable, we can still
4801 // simplify the expression in some cases:
4802 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4803 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4804 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4805 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4806 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4807 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4808 SDValue TopSetCC = N0->getOperand(0);
4809 unsigned N0Opc = N0->getOpcode();
4810 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4811 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4812 TopSetCC.getOpcode() == ISD::SETCC &&
4813 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4814 (isConstFalseVal(N1) ||
4815 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4816
4817 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4818 (!N1C->isZero() && Cond == ISD::SETNE);
4819
4820 if (!Inverse)
4821 return TopSetCC;
4822
4824 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4825 TopSetCC.getOperand(0).getValueType());
4826 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4827 TopSetCC.getOperand(1),
4828 InvCond);
4829 }
4830 }
4831 }
4832
4833 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4834 // equality or unsigned, and all 1 bits of the const are in the same
4835 // partial word, see if we can shorten the load.
4836 if (DCI.isBeforeLegalize() &&
4838 N0.getOpcode() == ISD::AND && C1 == 0 &&
4839 N0.getNode()->hasOneUse() &&
4840 isa<LoadSDNode>(N0.getOperand(0)) &&
4841 N0.getOperand(0).getNode()->hasOneUse() &&
4843 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4844 APInt bestMask;
4845 unsigned bestWidth = 0, bestOffset = 0;
4846 if (Lod->isSimple() && Lod->isUnindexed() &&
4847 (Lod->getMemoryVT().isByteSized() ||
4848 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4849 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4850 unsigned origWidth = N0.getValueSizeInBits();
4851 unsigned maskWidth = origWidth;
4852 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4853 // 8 bits, but have to be careful...
4854 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4855 origWidth = Lod->getMemoryVT().getSizeInBits();
4856 const APInt &Mask = N0.getConstantOperandAPInt(1);
4857 // Only consider power-of-2 widths (and at least one byte) as candiates
4858 // for the narrowed load.
4859 for (unsigned width = 8; width < origWidth; width *= 2) {
4860 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4861 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4862 // Avoid accessing any padding here for now (we could use memWidth
4863 // instead of origWidth here otherwise).
4864 unsigned maxOffset = origWidth - width;
4865 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4866 if (Mask.isSubsetOf(newMask)) {
4867 unsigned ptrOffset =
4868 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4869 unsigned IsFast = 0;
4870 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4871 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4873 ptrOffset / 8) &&
4875 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4876 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4877 IsFast) {
4878 bestOffset = ptrOffset / 8;
4879 bestMask = Mask.lshr(offset);
4880 bestWidth = width;
4881 break;
4882 }
4883 }
4884 newMask <<= 8;
4885 }
4886 if (bestWidth)
4887 break;
4888 }
4889 }
4890 if (bestWidth) {
4891 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4892 SDValue Ptr = Lod->getBasePtr();
4893 if (bestOffset != 0)
4894 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4895 SDValue NewLoad =
4896 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4897 Lod->getPointerInfo().getWithOffset(bestOffset),
4898 Lod->getBaseAlign());
4899 SDValue And =
4900 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4901 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4902 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4903 }
4904 }
4905
4906 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4907 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4908 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4909
4910 // If the comparison constant has bits in the upper part, the
4911 // zero-extended value could never match.
4913 C1.getBitWidth() - InSize))) {
4914 switch (Cond) {
4915 case ISD::SETUGT:
4916 case ISD::SETUGE:
4917 case ISD::SETEQ:
4918 return DAG.getConstant(0, dl, VT);
4919 case ISD::SETULT:
4920 case ISD::SETULE:
4921 case ISD::SETNE:
4922 return DAG.getConstant(1, dl, VT);
4923 case ISD::SETGT:
4924 case ISD::SETGE:
4925 // True if the sign bit of C1 is set.
4926 return DAG.getConstant(C1.isNegative(), dl, VT);
4927 case ISD::SETLT:
4928 case ISD::SETLE:
4929 // True if the sign bit of C1 isn't set.
4930 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4931 default:
4932 break;
4933 }
4934 }
4935
4936 // Otherwise, we can perform the comparison with the low bits.
4937 switch (Cond) {
4938 case ISD::SETEQ:
4939 case ISD::SETNE:
4940 case ISD::SETUGT:
4941 case ISD::SETUGE:
4942 case ISD::SETULT:
4943 case ISD::SETULE: {
4944 EVT newVT = N0.getOperand(0).getValueType();
4945 // FIXME: Should use isNarrowingProfitable.
4946 if (DCI.isBeforeLegalizeOps() ||
4947 (isOperationLegal(ISD::SETCC, newVT) &&
4948 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4950 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4951 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4952
4953 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4954 NewConst, Cond);
4955 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4956 }
4957 break;
4958 }
4959 default:
4960 break; // todo, be more careful with signed comparisons
4961 }
4962 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4963 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4965 OpVT)) {
4966 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4967 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4968 EVT ExtDstTy = N0.getValueType();
4969 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4970
4971 // If the constant doesn't fit into the number of bits for the source of
4972 // the sign extension, it is impossible for both sides to be equal.
4973 if (C1.getSignificantBits() > ExtSrcTyBits)
4974 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4975
4976 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4977 ExtDstTy != ExtSrcTy && "Unexpected types!");
4978 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4979 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4980 DAG.getConstant(Imm, dl, ExtDstTy));
4981 if (!DCI.isCalledByLegalizer())
4982 DCI.AddToWorklist(ZextOp.getNode());
4983 // Otherwise, make this a use of a zext.
4984 return DAG.getSetCC(dl, VT, ZextOp,
4985 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4986 } else if ((N1C->isZero() || N1C->isOne()) &&
4987 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4988 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4989 // excluded as they are handled below whilst checking for foldBooleans.
4990 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4991 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4992 (N0.getValueType() == MVT::i1 ||
4996 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4997 if (TrueWhenTrue)
4998 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4999 // Invert the condition.
5000 if (N0.getOpcode() == ISD::SETCC) {
5003 if (DCI.isBeforeLegalizeOps() ||
5005 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5006 }
5007 }
5008
5009 if ((N0.getOpcode() == ISD::XOR ||
5010 (N0.getOpcode() == ISD::AND &&
5011 N0.getOperand(0).getOpcode() == ISD::XOR &&
5012 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5013 isOneConstant(N0.getOperand(1))) {
5014 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5015 // can only do this if the top bits are known zero.
5016 unsigned BitWidth = N0.getValueSizeInBits();
5017 if (DAG.MaskedValueIsZero(N0,
5019 BitWidth-1))) {
5020 // Okay, get the un-inverted input value.
5021 SDValue Val;
5022 if (N0.getOpcode() == ISD::XOR) {
5023 Val = N0.getOperand(0);
5024 } else {
5025 assert(N0.getOpcode() == ISD::AND &&
5026 N0.getOperand(0).getOpcode() == ISD::XOR);
5027 // ((X^1)&1)^1 -> X & 1
5028 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5029 N0.getOperand(0).getOperand(0),
5030 N0.getOperand(1));
5031 }
5032
5033 return DAG.getSetCC(dl, VT, Val, N1,
5035 }
5036 } else if (N1C->isOne()) {
5037 SDValue Op0 = N0;
5038 if (Op0.getOpcode() == ISD::TRUNCATE)
5039 Op0 = Op0.getOperand(0);
5040
5041 if ((Op0.getOpcode() == ISD::XOR) &&
5042 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5043 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5044 SDValue XorLHS = Op0.getOperand(0);
5045 SDValue XorRHS = Op0.getOperand(1);
5046 // Ensure that the input setccs return an i1 type or 0/1 value.
5047 if (Op0.getValueType() == MVT::i1 ||
5052 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5054 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5055 }
5056 }
5057 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5058 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5059 if (Op0.getValueType().bitsGT(VT))
5060 Op0 = DAG.getNode(ISD::AND, dl, VT,
5061 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5062 DAG.getConstant(1, dl, VT));
5063 else if (Op0.getValueType().bitsLT(VT))
5064 Op0 = DAG.getNode(ISD::AND, dl, VT,
5065 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5066 DAG.getConstant(1, dl, VT));
5067
5068 return DAG.getSetCC(dl, VT, Op0,
5069 DAG.getConstant(0, dl, Op0.getValueType()),
5071 }
5072 if (Op0.getOpcode() == ISD::AssertZext &&
5073 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5074 return DAG.getSetCC(dl, VT, Op0,
5075 DAG.getConstant(0, dl, Op0.getValueType()),
5077 }
5078 }
5079
5080 // Given:
5081 // icmp eq/ne (urem %x, %y), 0
5082 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5083 // icmp eq/ne %x, 0
5084 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5085 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5086 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5087 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5088 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5089 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5090 }
5091
5092 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5093 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5094 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5096 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5097 N1C->isAllOnes()) {
5098 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5099 DAG.getConstant(0, dl, OpVT),
5101 }
5102
5103 // fold (setcc (trunc x) c) -> (setcc x c)
5104 if (N0.getOpcode() == ISD::TRUNCATE &&
5106 (N0->getFlags().hasNoSignedWrap() &&
5109 EVT NewVT = N0.getOperand(0).getValueType();
5110 SDValue NewConst = DAG.getConstant(
5112 ? C1.sext(NewVT.getSizeInBits())
5113 : C1.zext(NewVT.getSizeInBits()),
5114 dl, NewVT);
5115 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5116 }
5117
5118 if (SDValue V =
5119 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5120 return V;
5121 }
5122
5123 // These simplifications apply to splat vectors as well.
5124 // TODO: Handle more splat vector cases.
5125 if (auto *N1C = isConstOrConstSplat(N1)) {
5126 const APInt &C1 = N1C->getAPIntValue();
5127
5128 APInt MinVal, MaxVal;
5129 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5131 MinVal = APInt::getSignedMinValue(OperandBitSize);
5132 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5133 } else {
5134 MinVal = APInt::getMinValue(OperandBitSize);
5135 MaxVal = APInt::getMaxValue(OperandBitSize);
5136 }
5137
5138 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5139 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5140 // X >= MIN --> true
5141 if (C1 == MinVal)
5142 return DAG.getBoolConstant(true, dl, VT, OpVT);
5143
5144 if (!VT.isVector()) { // TODO: Support this for vectors.
5145 // X >= C0 --> X > (C0 - 1)
5146 APInt C = C1 - 1;
5148 if ((DCI.isBeforeLegalizeOps() ||
5149 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5150 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5151 isLegalICmpImmediate(C.getSExtValue())))) {
5152 return DAG.getSetCC(dl, VT, N0,
5153 DAG.getConstant(C, dl, N1.getValueType()),
5154 NewCC);
5155 }
5156 }
5157 }
5158
5159 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5160 // X <= MAX --> true
5161 if (C1 == MaxVal)
5162 return DAG.getBoolConstant(true, dl, VT, OpVT);
5163
5164 // X <= C0 --> X < (C0 + 1)
5165 if (!VT.isVector()) { // TODO: Support this for vectors.
5166 APInt C = C1 + 1;
5168 if ((DCI.isBeforeLegalizeOps() ||
5169 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5170 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5171 isLegalICmpImmediate(C.getSExtValue())))) {
5172 return DAG.getSetCC(dl, VT, N0,
5173 DAG.getConstant(C, dl, N1.getValueType()),
5174 NewCC);
5175 }
5176 }
5177 }
5178
5179 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5180 if (C1 == MinVal)
5181 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5182
5183 // TODO: Support this for vectors after legalize ops.
5184 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5185 // Canonicalize setlt X, Max --> setne X, Max
5186 if (C1 == MaxVal)
5187 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5188
5189 // If we have setult X, 1, turn it into seteq X, 0
5190 if (C1 == MinVal+1)
5191 return DAG.getSetCC(dl, VT, N0,
5192 DAG.getConstant(MinVal, dl, N0.getValueType()),
5193 ISD::SETEQ);
5194 }
5195 }
5196
5197 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5198 if (C1 == MaxVal)
5199 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5200
5201 // TODO: Support this for vectors after legalize ops.
5202 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5203 // Canonicalize setgt X, Min --> setne X, Min
5204 if (C1 == MinVal)
5205 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5206
5207 // If we have setugt X, Max-1, turn it into seteq X, Max
5208 if (C1 == MaxVal-1)
5209 return DAG.getSetCC(dl, VT, N0,
5210 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5211 ISD::SETEQ);
5212 }
5213 }
5214
5215 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5216 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5217 if (C1.isZero())
5218 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5219 VT, N0, N1, Cond, DCI, dl))
5220 return CC;
5221
5222 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5223 // For example, when high 32-bits of i64 X are known clear:
5224 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5225 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5226 bool CmpZero = N1C->isZero();
5227 bool CmpNegOne = N1C->isAllOnes();
5228 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5229 // Match or(lo,shl(hi,bw/2)) pattern.
5230 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5231 unsigned EltBits = V.getScalarValueSizeInBits();
5232 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5233 return false;
5234 SDValue LHS = V.getOperand(0);
5235 SDValue RHS = V.getOperand(1);
5236 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5237 // Unshifted element must have zero upperbits.
5238 if (RHS.getOpcode() == ISD::SHL &&
5239 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5240 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5241 DAG.MaskedValueIsZero(LHS, HiBits)) {
5242 Lo = LHS;
5243 Hi = RHS.getOperand(0);
5244 return true;
5245 }
5246 if (LHS.getOpcode() == ISD::SHL &&
5247 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5248 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5249 DAG.MaskedValueIsZero(RHS, HiBits)) {
5250 Lo = RHS;
5251 Hi = LHS.getOperand(0);
5252 return true;
5253 }
5254 return false;
5255 };
5256
5257 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5258 unsigned EltBits = N0.getScalarValueSizeInBits();
5259 unsigned HalfBits = EltBits / 2;
5260 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5261 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5262 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5263 SDValue NewN0 =
5264 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5265 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5266 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5267 };
5268
5269 SDValue Lo, Hi;
5270 if (IsConcat(N0, Lo, Hi))
5271 return MergeConcat(Lo, Hi);
5272
5273 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5274 SDValue Lo0, Lo1, Hi0, Hi1;
5275 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5276 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5277 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5278 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5279 }
5280 }
5281 }
5282 }
5283
5284 // If we have "setcc X, C0", check to see if we can shrink the immediate
5285 // by changing cc.
5286 // TODO: Support this for vectors after legalize ops.
5287 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5288 // SETUGT X, SINTMAX -> SETLT X, 0
5289 // SETUGE X, SINTMIN -> SETLT X, 0
5290 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5291 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5292 return DAG.getSetCC(dl, VT, N0,
5293 DAG.getConstant(0, dl, N1.getValueType()),
5294 ISD::SETLT);
5295
5296 // SETULT X, SINTMIN -> SETGT X, -1
5297 // SETULE X, SINTMAX -> SETGT X, -1
5298 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5299 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5300 return DAG.getSetCC(dl, VT, N0,
5301 DAG.getAllOnesConstant(dl, N1.getValueType()),
5302 ISD::SETGT);
5303 }
5304 }
5305
5306 // Back to non-vector simplifications.
5307 // TODO: Can we do these for vector splats?
5308 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5309 const APInt &C1 = N1C->getAPIntValue();
5310 EVT ShValTy = N0.getValueType();
5311
5312 // Fold bit comparisons when we can. This will result in an
5313 // incorrect value when boolean false is negative one, unless
5314 // the bitsize is 1 in which case the false value is the same
5315 // in practice regardless of the representation.
5316 if ((VT.getSizeInBits() == 1 ||
5318 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5319 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5320 N0.getOpcode() == ISD::AND) {
5321 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5322 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5323 // Perform the xform if the AND RHS is a single bit.
5324 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5325 if (AndRHS->getAPIntValue().isPowerOf2() &&
5326 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5327 return DAG.getNode(
5328 ISD::TRUNCATE, dl, VT,
5329 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5330 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5331 }
5332 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5333 // (X & 8) == 8 --> (X & 8) >> 3
5334 // Perform the xform if C1 is a single bit.
5335 unsigned ShCt = C1.logBase2();
5336 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5337 return DAG.getNode(
5338 ISD::TRUNCATE, dl, VT,
5339 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5340 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5341 }
5342 }
5343 }
5344 }
5345
5346 if (C1.getSignificantBits() <= 64 &&
5348 // (X & -256) == 256 -> (X >> 8) == 1
5349 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5350 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5351 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5352 const APInt &AndRHSC = AndRHS->getAPIntValue();
5353 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5354 unsigned ShiftBits = AndRHSC.countr_zero();
5355 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5356 // If using an unsigned shift doesn't yield a legal compare
5357 // immediate, try using sra instead.
5358 APInt NewC = C1.lshr(ShiftBits);
5359 if (NewC.getSignificantBits() <= 64 &&
5361 APInt SignedC = C1.ashr(ShiftBits);
5362 if (SignedC.getSignificantBits() <= 64 &&
5364 SDValue Shift = DAG.getNode(
5365 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5366 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5367 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5368 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5369 }
5370 }
5371 SDValue Shift = DAG.getNode(
5372 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5373 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5374 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5375 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5376 }
5377 }
5378 }
5379 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5380 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5381 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5382 // X < 0x100000000 -> (X >> 32) < 1
5383 // X >= 0x100000000 -> (X >> 32) >= 1
5384 // X <= 0x0ffffffff -> (X >> 32) < 1
5385 // X > 0x0ffffffff -> (X >> 32) >= 1
5386 unsigned ShiftBits;
5387 APInt NewC = C1;
5388 ISD::CondCode NewCond = Cond;
5389 if (AdjOne) {
5390 ShiftBits = C1.countr_one();
5391 NewC = NewC + 1;
5392 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5393 } else {
5394 ShiftBits = C1.countr_zero();
5395 }
5396 NewC.lshrInPlace(ShiftBits);
5397 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5399 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5400 SDValue Shift =
5401 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5402 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5403 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5404 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5405 }
5406 }
5407 }
5408 }
5409
5411 auto *CFP = cast<ConstantFPSDNode>(N1);
5412 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5413
5414 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5415 // constant if knowing that the operand is non-nan is enough. We prefer to
5416 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5417 // materialize 0.0.
5418 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5419 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5420
5421 // setcc (fneg x), C -> setcc swap(pred) x, -C
5422 if (N0.getOpcode() == ISD::FNEG) {
5424 if (DCI.isBeforeLegalizeOps() ||
5425 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5426 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5427 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5428 }
5429 }
5430
5431 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5433 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5434 bool IsFabs = N0.getOpcode() == ISD::FABS;
5435 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5436 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5437 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5438 : (IsFabs ? fcInf : fcPosInf);
5439 if (Cond == ISD::SETUEQ)
5440 Flag |= fcNan;
5441 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5442 DAG.getTargetConstant(Flag, dl, MVT::i32));
5443 }
5444 }
5445
5446 // If the condition is not legal, see if we can find an equivalent one
5447 // which is legal.
5449 // If the comparison was an awkward floating-point == or != and one of
5450 // the comparison operands is infinity or negative infinity, convert the
5451 // condition to a less-awkward <= or >=.
5452 if (CFP->getValueAPF().isInfinity()) {
5453 bool IsNegInf = CFP->getValueAPF().isNegative();
5455 switch (Cond) {
5456 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5457 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5458 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5459 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5460 default: break;
5461 }
5462 if (NewCond != ISD::SETCC_INVALID &&
5463 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5464 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5465 }
5466 }
5467 }
5468
5469 if (N0 == N1) {
5470 // The sext(setcc()) => setcc() optimization relies on the appropriate
5471 // constant being emitted.
5472 assert(!N0.getValueType().isInteger() &&
5473 "Integer types should be handled by FoldSetCC");
5474
5475 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5476 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5477 if (UOF == 2) // FP operators that are undefined on NaNs.
5478 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5479 if (UOF == unsigned(EqTrue))
5480 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5481 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5482 // if it is not already.
5483 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5484 if (NewCond != Cond &&
5485 (DCI.isBeforeLegalizeOps() ||
5486 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5487 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5488 }
5489
5490 // ~X > ~Y --> Y > X
5491 // ~X < ~Y --> Y < X
5492 // ~X < C --> X > ~C
5493 // ~X > C --> X < ~C
5494 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5495 N0.getValueType().isInteger()) {
5496 if (isBitwiseNot(N0)) {
5497 if (isBitwiseNot(N1))
5498 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5499
5502 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5503 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5504 }
5505 }
5506 }
5507
5508 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5509 N0.getValueType().isInteger()) {
5510 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5511 N0.getOpcode() == ISD::XOR) {
5512 // Simplify (X+Y) == (X+Z) --> Y == Z
5513 if (N0.getOpcode() == N1.getOpcode()) {
5514 if (N0.getOperand(0) == N1.getOperand(0))
5515 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5516 if (N0.getOperand(1) == N1.getOperand(1))
5517 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5518 if (isCommutativeBinOp(N0.getOpcode())) {
5519 // If X op Y == Y op X, try other combinations.
5520 if (N0.getOperand(0) == N1.getOperand(1))
5521 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5522 Cond);
5523 if (N0.getOperand(1) == N1.getOperand(0))
5524 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5525 Cond);
5526 }
5527 }
5528
5529 // If RHS is a legal immediate value for a compare instruction, we need
5530 // to be careful about increasing register pressure needlessly.
5531 bool LegalRHSImm = false;
5532
5533 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5534 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5535 // Turn (X+C1) == C2 --> X == C2-C1
5536 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5537 return DAG.getSetCC(
5538 dl, VT, N0.getOperand(0),
5539 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5540 dl, N0.getValueType()),
5541 Cond);
5542
5543 // Turn (X^C1) == C2 --> X == C1^C2
5544 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5545 return DAG.getSetCC(
5546 dl, VT, N0.getOperand(0),
5547 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5548 dl, N0.getValueType()),
5549 Cond);
5550 }
5551
5552 // Turn (C1-X) == C2 --> X == C1-C2
5553 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5554 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5555 return DAG.getSetCC(
5556 dl, VT, N0.getOperand(1),
5557 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5558 dl, N0.getValueType()),
5559 Cond);
5560
5561 // Could RHSC fold directly into a compare?
5562 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5563 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5564 }
5565
5566 // (X+Y) == X --> Y == 0 and similar folds.
5567 // Don't do this if X is an immediate that can fold into a cmp
5568 // instruction and X+Y has other uses. It could be an induction variable
5569 // chain, and the transform would increase register pressure.
5570 if (!LegalRHSImm || N0.hasOneUse())
5571 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5572 return V;
5573 }
5574
5575 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5576 N1.getOpcode() == ISD::XOR)
5577 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5578 return V;
5579
5580 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5581 return V;
5582
5583 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5584 return V;
5585 }
5586
5587 // Fold remainder of division by a constant.
5588 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5589 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5590 // When division is cheap or optimizing for minimum size,
5591 // fall through to DIVREM creation by skipping this fold.
5592 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5593 if (N0.getOpcode() == ISD::UREM) {
5594 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5595 return Folded;
5596 } else if (N0.getOpcode() == ISD::SREM) {
5597 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5598 return Folded;
5599 }
5600 }
5601 }
5602
5603 // Fold away ALL boolean setcc's.
5604 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5605 SDValue Temp;
5606 switch (Cond) {
5607 default: llvm_unreachable("Unknown integer setcc!");
5608 case ISD::SETEQ: // X == Y -> ~(X^Y)
5609 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5610 N0 = DAG.getNOT(dl, Temp, OpVT);
5611 if (!DCI.isCalledByLegalizer())
5612 DCI.AddToWorklist(Temp.getNode());
5613 break;
5614 case ISD::SETNE: // X != Y --> (X^Y)
5615 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5616 break;
5617 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5618 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5619 Temp = DAG.getNOT(dl, N0, OpVT);
5620 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5621 if (!DCI.isCalledByLegalizer())
5622 DCI.AddToWorklist(Temp.getNode());
5623 break;
5624 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5625 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5626 Temp = DAG.getNOT(dl, N1, OpVT);
5627 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5628 if (!DCI.isCalledByLegalizer())
5629 DCI.AddToWorklist(Temp.getNode());
5630 break;
5631 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5632 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5633 Temp = DAG.getNOT(dl, N0, OpVT);
5634 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5635 if (!DCI.isCalledByLegalizer())
5636 DCI.AddToWorklist(Temp.getNode());
5637 break;
5638 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5639 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5640 Temp = DAG.getNOT(dl, N1, OpVT);
5641 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5642 break;
5643 }
5644 if (VT.getScalarType() != MVT::i1) {
5645 if (!DCI.isCalledByLegalizer())
5646 DCI.AddToWorklist(N0.getNode());
5647 // FIXME: If running after legalize, we probably can't do this.
5649 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5650 }
5651 return N0;
5652 }
5653
5654 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5655 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5656 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5658 N1->getFlags().hasNoUnsignedWrap()) ||
5660 N1->getFlags().hasNoSignedWrap())) &&
5662 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5663 }
5664
5665 // Could not fold it.
5666 return SDValue();
5667}
5668
5669/// Returns true (and the GlobalValue and the offset) if the node is a
5670/// GlobalAddress + offset.
5672 int64_t &Offset) const {
5673
5674 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5675
5676 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5677 GA = GASD->getGlobal();
5678 Offset += GASD->getOffset();
5679 return true;
5680 }
5681
5682 if (N->getOpcode() == ISD::ADD) {
5683 SDValue N1 = N->getOperand(0);
5684 SDValue N2 = N->getOperand(1);
5685 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5686 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5687 Offset += V->getSExtValue();
5688 return true;
5689 }
5690 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5691 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5692 Offset += V->getSExtValue();
5693 return true;
5694 }
5695 }
5696 }
5697
5698 return false;
5699}
5700
5702 DAGCombinerInfo &DCI) const {
5703 // Default implementation: no optimization.
5704 return SDValue();
5705}
5706
5707//===----------------------------------------------------------------------===//
5708// Inline Assembler Implementation Methods
5709//===----------------------------------------------------------------------===//
5710
5713 unsigned S = Constraint.size();
5714
5715 if (S == 1) {
5716 switch (Constraint[0]) {
5717 default: break;
5718 case 'r':
5719 return C_RegisterClass;
5720 case 'm': // memory
5721 case 'o': // offsetable
5722 case 'V': // not offsetable
5723 return C_Memory;
5724 case 'p': // Address.
5725 return C_Address;
5726 case 'n': // Simple Integer
5727 case 'E': // Floating Point Constant
5728 case 'F': // Floating Point Constant
5729 return C_Immediate;
5730 case 'i': // Simple Integer or Relocatable Constant
5731 case 's': // Relocatable Constant
5732 case 'X': // Allow ANY value.
5733 case 'I': // Target registers.
5734 case 'J':
5735 case 'K':
5736 case 'L':
5737 case 'M':
5738 case 'N':
5739 case 'O':
5740 case 'P':
5741 case '<':
5742 case '>':
5743 return C_Other;
5744 }
5745 }
5746
5747 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5748 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5749 return C_Memory;
5750 return C_Register;
5751 }
5752 return C_Unknown;
5753}
5754
5755/// Try to replace an X constraint, which matches anything, with another that
5756/// has more specific requirements based on the type of the corresponding
5757/// operand.
5758const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5759 if (ConstraintVT.isInteger())
5760 return "r";
5761 if (ConstraintVT.isFloatingPoint())
5762 return "f"; // works for many targets
5763 return nullptr;
5764}
5765
5767 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5768 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5769 return SDValue();
5770}
5771
5772/// Lower the specified operand into the Ops vector.
5773/// If it is invalid, don't add anything to Ops.
5775 StringRef Constraint,
5776 std::vector<SDValue> &Ops,
5777 SelectionDAG &DAG) const {
5778
5779 if (Constraint.size() > 1)
5780 return;
5781
5782 char ConstraintLetter = Constraint[0];
5783 switch (ConstraintLetter) {
5784 default: break;
5785 case 'X': // Allows any operand
5786 case 'i': // Simple Integer or Relocatable Constant
5787 case 'n': // Simple Integer
5788 case 's': { // Relocatable Constant
5789
5791 uint64_t Offset = 0;
5792
5793 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5794 // etc., since getelementpointer is variadic. We can't use
5795 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5796 // while in this case the GA may be furthest from the root node which is
5797 // likely an ISD::ADD.
5798 while (true) {
5799 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5800 // gcc prints these as sign extended. Sign extend value to 64 bits
5801 // now; without this it would get ZExt'd later in
5802 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5803 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5804 BooleanContent BCont = getBooleanContents(MVT::i64);
5805 ISD::NodeType ExtOpc =
5806 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5807 int64_t ExtVal =
5808 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5809 Ops.push_back(
5810 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5811 return;
5812 }
5813 if (ConstraintLetter != 'n') {
5814 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5815 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5816 GA->getValueType(0),
5817 Offset + GA->getOffset()));
5818 return;
5819 }
5820 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5821 Ops.push_back(DAG.getTargetBlockAddress(
5822 BA->getBlockAddress(), BA->getValueType(0),
5823 Offset + BA->getOffset(), BA->getTargetFlags()));
5824 return;
5825 }
5827 Ops.push_back(Op);
5828 return;
5829 }
5830 }
5831 const unsigned OpCode = Op.getOpcode();
5832 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5833 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5834 Op = Op.getOperand(1);
5835 // Subtraction is not commutative.
5836 else if (OpCode == ISD::ADD &&
5837 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5838 Op = Op.getOperand(0);
5839 else
5840 return;
5841 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5842 continue;
5843 }
5844 return;
5845 }
5846 break;
5847 }
5848 }
5849}
5850
5854
5855std::pair<unsigned, const TargetRegisterClass *>
5857 StringRef Constraint,
5858 MVT VT) const {
5859 if (!Constraint.starts_with("{"))
5860 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5861 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5862
5863 // Remove the braces from around the name.
5864 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5865
5866 std::pair<unsigned, const TargetRegisterClass *> R =
5867 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5868
5869 // Figure out which register class contains this reg.
5870 for (const TargetRegisterClass *RC : RI->regclasses()) {
5871 // If none of the value types for this register class are valid, we
5872 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5873 if (!isLegalRC(*RI, *RC))
5874 continue;
5875
5876 for (const MCPhysReg &PR : *RC) {
5877 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5878 std::pair<unsigned, const TargetRegisterClass *> S =
5879 std::make_pair(PR, RC);
5880
5881 // If this register class has the requested value type, return it,
5882 // otherwise keep searching and return the first class found
5883 // if no other is found which explicitly has the requested type.
5884 if (RI->isTypeLegalForClass(*RC, VT))
5885 return S;
5886 if (!R.second)
5887 R = S;
5888 }
5889 }
5890 }
5891
5892 return R;
5893}
5894
5895//===----------------------------------------------------------------------===//
5896// Constraint Selection.
5897
5898/// Return true of this is an input operand that is a matching constraint like
5899/// "4".
5901 assert(!ConstraintCode.empty() && "No known constraint!");
5902 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5903}
5904
5905/// If this is an input matching constraint, this method returns the output
5906/// operand it matches.
5908 assert(!ConstraintCode.empty() && "No known constraint!");
5909 return atoi(ConstraintCode.c_str());
5910}
5911
5912/// Split up the constraint string from the inline assembly value into the
5913/// specific constraints and their prefixes, and also tie in the associated
5914/// operand values.
5915/// If this returns an empty vector, and if the constraint string itself
5916/// isn't empty, there was an error parsing.
5919 const TargetRegisterInfo *TRI,
5920 const CallBase &Call) const {
5921 /// Information about all of the constraints.
5922 AsmOperandInfoVector ConstraintOperands;
5923 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5924 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5925
5926 // Do a prepass over the constraints, canonicalizing them, and building up the
5927 // ConstraintOperands list.
5928 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5929 unsigned ResNo = 0; // ResNo - The result number of the next output.
5930 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5931
5932 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5933 ConstraintOperands.emplace_back(std::move(CI));
5934 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5935
5936 // Update multiple alternative constraint count.
5937 if (OpInfo.multipleAlternatives.size() > maCount)
5938 maCount = OpInfo.multipleAlternatives.size();
5939
5940 OpInfo.ConstraintVT = MVT::Other;
5941
5942 // Compute the value type for each operand.
5943 switch (OpInfo.Type) {
5945 // Indirect outputs just consume an argument.
5946 if (OpInfo.isIndirect) {
5947 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5948 break;
5949 }
5950
5951 // The return value of the call is this value. As such, there is no
5952 // corresponding argument.
5953 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5954 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5955 OpInfo.ConstraintVT =
5956 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5957 .getSimpleVT();
5958 } else {
5959 assert(ResNo == 0 && "Asm only has one result!");
5960 OpInfo.ConstraintVT =
5962 }
5963 ++ResNo;
5964 break;
5965 case InlineAsm::isInput:
5966 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5967 break;
5968 case InlineAsm::isLabel:
5969 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5970 ++LabelNo;
5971 continue;
5973 // Nothing to do.
5974 break;
5975 }
5976
5977 if (OpInfo.CallOperandVal) {
5978 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5979 if (OpInfo.isIndirect) {
5980 OpTy = Call.getParamElementType(ArgNo);
5981 assert(OpTy && "Indirect operand must have elementtype attribute");
5982 }
5983
5984 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5985 if (StructType *STy = dyn_cast<StructType>(OpTy))
5986 if (STy->getNumElements() == 1)
5987 OpTy = STy->getElementType(0);
5988
5989 // If OpTy is not a single value, it may be a struct/union that we
5990 // can tile with integers.
5991 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5992 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5993 switch (BitSize) {
5994 default: break;
5995 case 1:
5996 case 8:
5997 case 16:
5998 case 32:
5999 case 64:
6000 case 128:
6001 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6002 break;
6003 }
6004 }
6005
6006 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6007 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6008 ArgNo++;
6009 }
6010 }
6011
6012 // If we have multiple alternative constraints, select the best alternative.
6013 if (!ConstraintOperands.empty()) {
6014 if (maCount) {
6015 unsigned bestMAIndex = 0;
6016 int bestWeight = -1;
6017 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6018 int weight = -1;
6019 unsigned maIndex;
6020 // Compute the sums of the weights for each alternative, keeping track
6021 // of the best (highest weight) one so far.
6022 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6023 int weightSum = 0;
6024 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6025 cIndex != eIndex; ++cIndex) {
6026 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6027 if (OpInfo.Type == InlineAsm::isClobber)
6028 continue;
6029
6030 // If this is an output operand with a matching input operand,
6031 // look up the matching input. If their types mismatch, e.g. one
6032 // is an integer, the other is floating point, or their sizes are
6033 // different, flag it as an maCantMatch.
6034 if (OpInfo.hasMatchingInput()) {
6035 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6036 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6037 if ((OpInfo.ConstraintVT.isInteger() !=
6038 Input.ConstraintVT.isInteger()) ||
6039 (OpInfo.ConstraintVT.getSizeInBits() !=
6040 Input.ConstraintVT.getSizeInBits())) {
6041 weightSum = -1; // Can't match.
6042 break;
6043 }
6044 }
6045 }
6046 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6047 if (weight == -1) {
6048 weightSum = -1;
6049 break;
6050 }
6051 weightSum += weight;
6052 }
6053 // Update best.
6054 if (weightSum > bestWeight) {
6055 bestWeight = weightSum;
6056 bestMAIndex = maIndex;
6057 }
6058 }
6059
6060 // Now select chosen alternative in each constraint.
6061 for (AsmOperandInfo &cInfo : ConstraintOperands)
6062 if (cInfo.Type != InlineAsm::isClobber)
6063 cInfo.selectAlternative(bestMAIndex);
6064 }
6065 }
6066
6067 // Check and hook up tied operands, choose constraint code to use.
6068 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6069 cIndex != eIndex; ++cIndex) {
6070 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6071
6072 // If this is an output operand with a matching input operand, look up the
6073 // matching input. If their types mismatch, e.g. one is an integer, the
6074 // other is floating point, or their sizes are different, flag it as an
6075 // error.
6076 if (OpInfo.hasMatchingInput()) {
6077 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6078
6079 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6080 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6081 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6082 OpInfo.ConstraintVT);
6083 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6084 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6085 Input.ConstraintVT);
6086 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6087 OpInfo.ConstraintVT.isFloatingPoint();
6088 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6089 Input.ConstraintVT.isFloatingPoint();
6090 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6091 (MatchRC.second != InputRC.second)) {
6092 report_fatal_error("Unsupported asm: input constraint"
6093 " with a matching output constraint of"
6094 " incompatible type!");
6095 }
6096 }
6097 }
6098 }
6099
6100 return ConstraintOperands;
6101}
6102
6103/// Return a number indicating our preference for chosing a type of constraint
6104/// over another, for the purpose of sorting them. Immediates are almost always
6105/// preferrable (when they can be emitted). A higher return value means a
6106/// stronger preference for one constraint type relative to another.
6107/// FIXME: We should prefer registers over memory but doing so may lead to
6108/// unrecoverable register exhaustion later.
6109/// https://github.com/llvm/llvm-project/issues/20571
6111 switch (CT) {
6114 return 4;
6117 return 3;
6119 return 2;
6121 return 1;
6123 return 0;
6124 }
6125 llvm_unreachable("Invalid constraint type");
6126}
6127
6128/// Examine constraint type and operand type and determine a weight value.
6129/// This object must already have been set up with the operand type
6130/// and the current alternative constraint selected.
6133 AsmOperandInfo &info, int maIndex) const {
6135 if (maIndex >= (int)info.multipleAlternatives.size())
6136 rCodes = &info.Codes;
6137 else
6138 rCodes = &info.multipleAlternatives[maIndex].Codes;
6139 ConstraintWeight BestWeight = CW_Invalid;
6140
6141 // Loop over the options, keeping track of the most general one.
6142 for (const std::string &rCode : *rCodes) {
6143 ConstraintWeight weight =
6144 getSingleConstraintMatchWeight(info, rCode.c_str());
6145 if (weight > BestWeight)
6146 BestWeight = weight;
6147 }
6148
6149 return BestWeight;
6150}
6151
6152/// Examine constraint type and operand type and determine a weight value.
6153/// This object must already have been set up with the operand type
6154/// and the current alternative constraint selected.
6157 AsmOperandInfo &info, const char *constraint) const {
6159 Value *CallOperandVal = info.CallOperandVal;
6160 // If we don't have a value, we can't do a match,
6161 // but allow it at the lowest weight.
6162 if (!CallOperandVal)
6163 return CW_Default;
6164 // Look at the constraint type.
6165 switch (*constraint) {
6166 case 'i': // immediate integer.
6167 case 'n': // immediate integer with a known value.
6168 if (isa<ConstantInt>(CallOperandVal))
6169 weight = CW_Constant;
6170 break;
6171 case 's': // non-explicit intregal immediate.
6172 if (isa<GlobalValue>(CallOperandVal))
6173 weight = CW_Constant;
6174 break;
6175 case 'E': // immediate float if host format.
6176 case 'F': // immediate float.
6177 if (isa<ConstantFP>(CallOperandVal))
6178 weight = CW_Constant;
6179 break;
6180 case '<': // memory operand with autodecrement.
6181 case '>': // memory operand with autoincrement.
6182 case 'm': // memory operand.
6183 case 'o': // offsettable memory operand
6184 case 'V': // non-offsettable memory operand
6185 weight = CW_Memory;
6186 break;
6187 case 'r': // general register.
6188 case 'g': // general register, memory operand or immediate integer.
6189 // note: Clang converts "g" to "imr".
6190 if (CallOperandVal->getType()->isIntegerTy())
6191 weight = CW_Register;
6192 break;
6193 case 'X': // any operand.
6194 default:
6195 weight = CW_Default;
6196 break;
6197 }
6198 return weight;
6199}
6200
6201/// If there are multiple different constraints that we could pick for this
6202/// operand (e.g. "imr") try to pick the 'best' one.
6203/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6204/// into seven classes:
6205/// Register -> one specific register
6206/// RegisterClass -> a group of regs
6207/// Memory -> memory
6208/// Address -> a symbolic memory reference
6209/// Immediate -> immediate values
6210/// Other -> magic values (such as "Flag Output Operands")
6211/// Unknown -> something we don't recognize yet and can't handle
6212/// Ideally, we would pick the most specific constraint possible: if we have
6213/// something that fits into a register, we would pick it. The problem here
6214/// is that if we have something that could either be in a register or in
6215/// memory that use of the register could cause selection of *other*
6216/// operands to fail: they might only succeed if we pick memory. Because of
6217/// this the heuristic we use is:
6218///
6219/// 1) If there is an 'other' constraint, and if the operand is valid for
6220/// that constraint, use it. This makes us take advantage of 'i'
6221/// constraints when available.
6222/// 2) Otherwise, pick the most general constraint present. This prefers
6223/// 'm' over 'r', for example.
6224///
6226 TargetLowering::AsmOperandInfo &OpInfo) const {
6227 ConstraintGroup Ret;
6228
6229 Ret.reserve(OpInfo.Codes.size());
6230 for (StringRef Code : OpInfo.Codes) {
6232
6233 // Indirect 'other' or 'immediate' constraints are not allowed.
6234 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6235 CType == TargetLowering::C_Register ||
6237 continue;
6238
6239 // Things with matching constraints can only be registers, per gcc
6240 // documentation. This mainly affects "g" constraints.
6241 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6242 continue;
6243
6244 Ret.emplace_back(Code, CType);
6245 }
6246
6248 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6249 });
6250
6251 return Ret;
6252}
6253
6254/// If we have an immediate, see if we can lower it. Return true if we can,
6255/// false otherwise.
6257 SDValue Op, SelectionDAG *DAG,
6258 const TargetLowering &TLI) {
6259
6260 assert((P.second == TargetLowering::C_Other ||
6261 P.second == TargetLowering::C_Immediate) &&
6262 "need immediate or other");
6263
6264 if (!Op.getNode())
6265 return false;
6266
6267 std::vector<SDValue> ResultOps;
6268 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6269 return !ResultOps.empty();
6270}
6271
6272/// Determines the constraint code and constraint type to use for the specific
6273/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6275 SDValue Op,
6276 SelectionDAG *DAG) const {
6277 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6278
6279 // Single-letter constraints ('r') are very common.
6280 if (OpInfo.Codes.size() == 1) {
6281 OpInfo.ConstraintCode = OpInfo.Codes[0];
6282 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6283 } else {
6285 if (G.empty())
6286 return;
6287
6288 unsigned BestIdx = 0;
6289 for (const unsigned E = G.size();
6290 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6291 G[BestIdx].second == TargetLowering::C_Immediate);
6292 ++BestIdx) {
6293 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6294 break;
6295 // If we're out of constraints, just pick the first one.
6296 if (BestIdx + 1 == E) {
6297 BestIdx = 0;
6298 break;
6299 }
6300 }
6301
6302 OpInfo.ConstraintCode = G[BestIdx].first;
6303 OpInfo.ConstraintType = G[BestIdx].second;
6304 }
6305
6306 // 'X' matches anything.
6307 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6308 // Constants are handled elsewhere. For Functions, the type here is the
6309 // type of the result, which is not what we want to look at; leave them
6310 // alone.
6311 Value *v = OpInfo.CallOperandVal;
6312 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6313 return;
6314 }
6315
6316 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6317 OpInfo.ConstraintCode = "i";
6318 return;
6319 }
6320
6321 // Otherwise, try to resolve it to something we know about by looking at
6322 // the actual operand type.
6323 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6324 OpInfo.ConstraintCode = Repl;
6325 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6326 }
6327 }
6328}
6329
6330/// Given an exact SDIV by a constant, create a multiplication
6331/// with the multiplicative inverse of the constant.
6332/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6334 const SDLoc &dl, SelectionDAG &DAG,
6335 SmallVectorImpl<SDNode *> &Created) {
6336 SDValue Op0 = N->getOperand(0);
6337 SDValue Op1 = N->getOperand(1);
6338 EVT VT = N->getValueType(0);
6339 EVT SVT = VT.getScalarType();
6340 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6341 EVT ShSVT = ShVT.getScalarType();
6342
6343 bool UseSRA = false;
6344 SmallVector<SDValue, 16> Shifts, Factors;
6345
6346 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6347 if (C->isZero())
6348 return false;
6349 APInt Divisor = C->getAPIntValue();
6350 unsigned Shift = Divisor.countr_zero();
6351 if (Shift) {
6352 Divisor.ashrInPlace(Shift);
6353 UseSRA = true;
6354 }
6355 APInt Factor = Divisor.multiplicativeInverse();
6356 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6357 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6358 return true;
6359 };
6360
6361 // Collect all magic values from the build vector.
6362 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6363 return SDValue();
6364
6365 SDValue Shift, Factor;
6366 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6367 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6368 Factor = DAG.getBuildVector(VT, dl, Factors);
6369 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6370 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6371 "Expected matchUnaryPredicate to return one element for scalable "
6372 "vectors");
6373 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6374 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6375 } else {
6376 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6377 Shift = Shifts[0];
6378 Factor = Factors[0];
6379 }
6380
6381 SDValue Res = Op0;
6382 if (UseSRA) {
6383 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6384 Created.push_back(Res.getNode());
6385 }
6386
6387 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6388}
6389
6390/// Given an exact UDIV by a constant, create a multiplication
6391/// with the multiplicative inverse of the constant.
6392/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6394 const SDLoc &dl, SelectionDAG &DAG,
6395 SmallVectorImpl<SDNode *> &Created) {
6396 EVT VT = N->getValueType(0);
6397 EVT SVT = VT.getScalarType();
6398 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6399 EVT ShSVT = ShVT.getScalarType();
6400
6401 bool UseSRL = false;
6402 SmallVector<SDValue, 16> Shifts, Factors;
6403
6404 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6405 if (C->isZero())
6406 return false;
6407 APInt Divisor = C->getAPIntValue();
6408 unsigned Shift = Divisor.countr_zero();
6409 if (Shift) {
6410 Divisor.lshrInPlace(Shift);
6411 UseSRL = true;
6412 }
6413 // Calculate the multiplicative inverse modulo BW.
6414 APInt Factor = Divisor.multiplicativeInverse();
6415 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6416 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6417 return true;
6418 };
6419
6420 SDValue Op1 = N->getOperand(1);
6421
6422 // Collect all magic values from the build vector.
6423 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6424 return SDValue();
6425
6426 SDValue Shift, Factor;
6427 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6428 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6429 Factor = DAG.getBuildVector(VT, dl, Factors);
6430 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6431 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6432 "Expected matchUnaryPredicate to return one element for scalable "
6433 "vectors");
6434 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6435 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6436 } else {
6437 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6438 Shift = Shifts[0];
6439 Factor = Factors[0];
6440 }
6441
6442 SDValue Res = N->getOperand(0);
6443 if (UseSRL) {
6444 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6445 Created.push_back(Res.getNode());
6446 }
6447
6448 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6449}
6450
6452 SelectionDAG &DAG,
6453 SmallVectorImpl<SDNode *> &Created) const {
6454 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6455 if (isIntDivCheap(N->getValueType(0), Attr))
6456 return SDValue(N, 0); // Lower SDIV as SDIV
6457 return SDValue();
6458}
6459
6460SDValue
6462 SelectionDAG &DAG,
6463 SmallVectorImpl<SDNode *> &Created) const {
6464 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6465 if (isIntDivCheap(N->getValueType(0), Attr))
6466 return SDValue(N, 0); // Lower SREM as SREM
6467 return SDValue();
6468}
6469
6470/// Build sdiv by power-of-2 with conditional move instructions
6471/// Ref: "Hacker's Delight" by Henry Warren 10-1
6472/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6473/// bgez x, label
6474/// add x, x, 2**k-1
6475/// label:
6476/// sra res, x, k
6477/// neg res, res (when the divisor is negative)
6479 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6480 SmallVectorImpl<SDNode *> &Created) const {
6481 unsigned Lg2 = Divisor.countr_zero();
6482 EVT VT = N->getValueType(0);
6483
6484 SDLoc DL(N);
6485 SDValue N0 = N->getOperand(0);
6486 SDValue Zero = DAG.getConstant(0, DL, VT);
6487 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6488 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6489
6490 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6491 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6492 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6493 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6494 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6495
6496 Created.push_back(Cmp.getNode());
6497 Created.push_back(Add.getNode());
6498 Created.push_back(CMov.getNode());
6499
6500 // Divide by pow2.
6501 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6502 DAG.getShiftAmountConstant(Lg2, VT, DL));
6503
6504 // If we're dividing by a positive value, we're done. Otherwise, we must
6505 // negate the result.
6506 if (Divisor.isNonNegative())
6507 return SRA;
6508
6509 Created.push_back(SRA.getNode());
6510 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6511}
6512
6513/// Given an ISD::SDIV node expressing a divide by constant,
6514/// return a DAG expression to select that will generate the same value by
6515/// multiplying by a magic number.
6516/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6518 bool IsAfterLegalization,
6519 bool IsAfterLegalTypes,
6520 SmallVectorImpl<SDNode *> &Created) const {
6521 SDLoc dl(N);
6522 EVT VT = N->getValueType(0);
6523 EVT SVT = VT.getScalarType();
6524 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6525 EVT ShSVT = ShVT.getScalarType();
6526 unsigned EltBits = VT.getScalarSizeInBits();
6527 EVT MulVT;
6528
6529 // Check to see if we can do this.
6530 // FIXME: We should be more aggressive here.
6531 if (!isTypeLegal(VT)) {
6532 // Limit this to simple scalars for now.
6533 if (VT.isVector() || !VT.isSimple())
6534 return SDValue();
6535
6536 // If this type will be promoted to a large enough type with a legal
6537 // multiply operation, we can go ahead and do this transform.
6539 return SDValue();
6540
6541 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6542 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6543 !isOperationLegal(ISD::MUL, MulVT))
6544 return SDValue();
6545 }
6546
6547 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6548 if (N->getFlags().hasExact())
6549 return BuildExactSDIV(*this, N, dl, DAG, Created);
6550
6551 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6552
6553 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6554 if (C->isZero())
6555 return false;
6556
6557 const APInt &Divisor = C->getAPIntValue();
6559 int NumeratorFactor = 0;
6560 int ShiftMask = -1;
6561
6562 if (Divisor.isOne() || Divisor.isAllOnes()) {
6563 // If d is +1/-1, we just multiply the numerator by +1/-1.
6564 NumeratorFactor = Divisor.getSExtValue();
6565 magics.Magic = 0;
6566 magics.ShiftAmount = 0;
6567 ShiftMask = 0;
6568 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6569 // If d > 0 and m < 0, add the numerator.
6570 NumeratorFactor = 1;
6571 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6572 // If d < 0 and m > 0, subtract the numerator.
6573 NumeratorFactor = -1;
6574 }
6575
6576 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6577 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6578 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6579 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6580 return true;
6581 };
6582
6583 SDValue N0 = N->getOperand(0);
6584 SDValue N1 = N->getOperand(1);
6585
6586 // Collect the shifts / magic values from each element.
6587 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6588 return SDValue();
6589
6590 SDValue MagicFactor, Factor, Shift, ShiftMask;
6591 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6592 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6593 Factor = DAG.getBuildVector(VT, dl, Factors);
6594 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6595 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6596 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6597 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6598 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6599 "Expected matchUnaryPredicate to return one element for scalable "
6600 "vectors");
6601 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6602 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6603 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6604 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6605 } else {
6606 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6607 MagicFactor = MagicFactors[0];
6608 Factor = Factors[0];
6609 Shift = Shifts[0];
6610 ShiftMask = ShiftMasks[0];
6611 }
6612
6613 // Multiply the numerator (operand 0) by the magic value.
6614 // FIXME: We should support doing a MUL in a wider type.
6615 auto GetMULHS = [&](SDValue X, SDValue Y) {
6616 // If the type isn't legal, use a wider mul of the type calculated
6617 // earlier.
6618 if (!isTypeLegal(VT)) {
6619 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6620 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6621 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6622 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6623 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6624 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6625 }
6626
6627 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6628 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6629 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6630 SDValue LoHi =
6631 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6632 return SDValue(LoHi.getNode(), 1);
6633 }
6634 // If type twice as wide legal, widen and use a mul plus a shift.
6635 unsigned Size = VT.getScalarSizeInBits();
6636 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6637 if (VT.isVector())
6638 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6640 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6641 // custom lowered. This is very expensive so avoid it at all costs for
6642 // constant divisors.
6643 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6646 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6647 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6648 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6649 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6650 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6651 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6652 }
6653 return SDValue();
6654 };
6655
6656 SDValue Q = GetMULHS(N0, MagicFactor);
6657 if (!Q)
6658 return SDValue();
6659
6660 Created.push_back(Q.getNode());
6661
6662 // (Optionally) Add/subtract the numerator using Factor.
6663 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6664 Created.push_back(Factor.getNode());
6665 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6666 Created.push_back(Q.getNode());
6667
6668 // Shift right algebraic by shift value.
6669 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6670 Created.push_back(Q.getNode());
6671
6672 // Extract the sign bit, mask it and add it to the quotient.
6673 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6674 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6675 Created.push_back(T.getNode());
6676 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6677 Created.push_back(T.getNode());
6678 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6679}
6680
6681/// Given an ISD::UDIV node expressing a divide by constant,
6682/// return a DAG expression to select that will generate the same value by
6683/// multiplying by a magic number.
6684/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6686 bool IsAfterLegalization,
6687 bool IsAfterLegalTypes,
6688 SmallVectorImpl<SDNode *> &Created) const {
6689 SDLoc dl(N);
6690 EVT VT = N->getValueType(0);
6691 EVT SVT = VT.getScalarType();
6692 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6693 EVT ShSVT = ShVT.getScalarType();
6694 unsigned EltBits = VT.getScalarSizeInBits();
6695 EVT MulVT;
6696
6697 // Check to see if we can do this.
6698 // FIXME: We should be more aggressive here.
6699 if (!isTypeLegal(VT)) {
6700 // Limit this to simple scalars for now.
6701 if (VT.isVector() || !VT.isSimple())
6702 return SDValue();
6703
6704 // If this type will be promoted to a large enough type with a legal
6705 // multiply operation, we can go ahead and do this transform.
6707 return SDValue();
6708
6709 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6710 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6711 !isOperationLegal(ISD::MUL, MulVT))
6712 return SDValue();
6713 }
6714
6715 // If the udiv has an 'exact' bit we can use a simpler lowering.
6716 if (N->getFlags().hasExact())
6717 return BuildExactUDIV(*this, N, dl, DAG, Created);
6718
6719 SDValue N0 = N->getOperand(0);
6720 SDValue N1 = N->getOperand(1);
6721
6722 // Try to use leading zeros of the dividend to reduce the multiplier and
6723 // avoid expensive fixups.
6724 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6725
6726 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6727 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6728
6729 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6730 if (C->isZero())
6731 return false;
6732 const APInt& Divisor = C->getAPIntValue();
6733
6734 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6735
6736 // Magic algorithm doesn't work for division by 1. We need to emit a select
6737 // at the end.
6738 if (Divisor.isOne()) {
6739 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6740 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6741 } else {
6744 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6745
6746 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6747
6748 assert(magics.PreShift < Divisor.getBitWidth() &&
6749 "We shouldn't generate an undefined shift!");
6750 assert(magics.PostShift < Divisor.getBitWidth() &&
6751 "We shouldn't generate an undefined shift!");
6752 assert((!magics.IsAdd || magics.PreShift == 0) &&
6753 "Unexpected pre-shift");
6754 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6755 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6756 NPQFactor = DAG.getConstant(
6757 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6758 : APInt::getZero(EltBits),
6759 dl, SVT);
6760 UseNPQ |= magics.IsAdd;
6761 UsePreShift |= magics.PreShift != 0;
6762 UsePostShift |= magics.PostShift != 0;
6763 }
6764
6765 PreShifts.push_back(PreShift);
6766 MagicFactors.push_back(MagicFactor);
6767 NPQFactors.push_back(NPQFactor);
6768 PostShifts.push_back(PostShift);
6769 return true;
6770 };
6771
6772 // Collect the shifts/magic values from each element.
6773 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6774 return SDValue();
6775
6776 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6777 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6778 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6779 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6780 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6781 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6782 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6783 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6784 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6785 "Expected matchUnaryPredicate to return one for scalable vectors");
6786 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6787 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6788 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6789 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6790 } else {
6791 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6792 PreShift = PreShifts[0];
6793 MagicFactor = MagicFactors[0];
6794 PostShift = PostShifts[0];
6795 }
6796
6797 SDValue Q = N0;
6798 if (UsePreShift) {
6799 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6800 Created.push_back(Q.getNode());
6801 }
6802
6803 // FIXME: We should support doing a MUL in a wider type.
6804 auto GetMULHU = [&](SDValue X, SDValue Y) {
6805 // If the type isn't legal, use a wider mul of the type calculated
6806 // earlier.
6807 if (!isTypeLegal(VT)) {
6808 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6809 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6810 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6811 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6812 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6813 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6814 }
6815
6816 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6817 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6818 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6819 SDValue LoHi =
6820 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6821 return SDValue(LoHi.getNode(), 1);
6822 }
6823 // If type twice as wide legal, widen and use a mul plus a shift.
6824 unsigned Size = VT.getScalarSizeInBits();
6825 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6826 if (VT.isVector())
6827 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6829 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6830 // custom lowered. This is very expensive so avoid it at all costs for
6831 // constant divisors.
6832 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6835 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6836 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6837 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6838 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6839 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6840 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6841 }
6842 return SDValue(); // No mulhu or equivalent
6843 };
6844
6845 // Multiply the numerator (operand 0) by the magic value.
6846 Q = GetMULHU(Q, MagicFactor);
6847 if (!Q)
6848 return SDValue();
6849
6850 Created.push_back(Q.getNode());
6851
6852 if (UseNPQ) {
6853 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6854 Created.push_back(NPQ.getNode());
6855
6856 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6857 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6858 if (VT.isVector())
6859 NPQ = GetMULHU(NPQ, NPQFactor);
6860 else
6861 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6862
6863 Created.push_back(NPQ.getNode());
6864
6865 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6866 Created.push_back(Q.getNode());
6867 }
6868
6869 if (UsePostShift) {
6870 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6871 Created.push_back(Q.getNode());
6872 }
6873
6874 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6875
6876 SDValue One = DAG.getConstant(1, dl, VT);
6877 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6878 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6879}
6880
6881/// If all values in Values that *don't* match the predicate are same 'splat'
6882/// value, then replace all values with that splat value.
6883/// Else, if AlternativeReplacement was provided, then replace all values that
6884/// do match predicate with AlternativeReplacement value.
6885static void
6887 std::function<bool(SDValue)> Predicate,
6888 SDValue AlternativeReplacement = SDValue()) {
6889 SDValue Replacement;
6890 // Is there a value for which the Predicate does *NOT* match? What is it?
6891 auto SplatValue = llvm::find_if_not(Values, Predicate);
6892 if (SplatValue != Values.end()) {
6893 // Does Values consist only of SplatValue's and values matching Predicate?
6894 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6895 return Value == *SplatValue || Predicate(Value);
6896 })) // Then we shall replace values matching predicate with SplatValue.
6897 Replacement = *SplatValue;
6898 }
6899 if (!Replacement) {
6900 // Oops, we did not find the "baseline" splat value.
6901 if (!AlternativeReplacement)
6902 return; // Nothing to do.
6903 // Let's replace with provided value then.
6904 Replacement = AlternativeReplacement;
6905 }
6906 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6907}
6908
6909/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6910/// where the divisor is constant and the comparison target is zero,
6911/// return a DAG expression that will generate the same comparison result
6912/// using only multiplications, additions and shifts/rotations.
6913/// Ref: "Hacker's Delight" 10-17.
6914SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6915 SDValue CompTargetNode,
6917 DAGCombinerInfo &DCI,
6918 const SDLoc &DL) const {
6920 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6921 DCI, DL, Built)) {
6922 for (SDNode *N : Built)
6923 DCI.AddToWorklist(N);
6924 return Folded;
6925 }
6926
6927 return SDValue();
6928}
6929
6930SDValue
6931TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6932 SDValue CompTargetNode, ISD::CondCode Cond,
6933 DAGCombinerInfo &DCI, const SDLoc &DL,
6934 SmallVectorImpl<SDNode *> &Created) const {
6935 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6936 // - D must be constant, with D = D0 * 2^K where D0 is odd
6937 // - P is the multiplicative inverse of D0 modulo 2^W
6938 // - Q = floor(((2^W) - 1) / D)
6939 // where W is the width of the common type of N and D.
6940 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6941 "Only applicable for (in)equality comparisons.");
6942
6943 SelectionDAG &DAG = DCI.DAG;
6944
6945 EVT VT = REMNode.getValueType();
6946 EVT SVT = VT.getScalarType();
6947 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6948 EVT ShSVT = ShVT.getScalarType();
6949
6950 // If MUL is unavailable, we cannot proceed in any case.
6951 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6952 return SDValue();
6953
6954 bool ComparingWithAllZeros = true;
6955 bool AllComparisonsWithNonZerosAreTautological = true;
6956 bool HadTautologicalLanes = false;
6957 bool AllLanesAreTautological = true;
6958 bool HadEvenDivisor = false;
6959 bool AllDivisorsArePowerOfTwo = true;
6960 bool HadTautologicalInvertedLanes = false;
6961 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6962
6963 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6964 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6965 if (CDiv->isZero())
6966 return false;
6967
6968 const APInt &D = CDiv->getAPIntValue();
6969 const APInt &Cmp = CCmp->getAPIntValue();
6970
6971 ComparingWithAllZeros &= Cmp.isZero();
6972
6973 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6974 // if C2 is not less than C1, the comparison is always false.
6975 // But we will only be able to produce the comparison that will give the
6976 // opposive tautological answer. So this lane would need to be fixed up.
6977 bool TautologicalInvertedLane = D.ule(Cmp);
6978 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6979
6980 // If all lanes are tautological (either all divisors are ones, or divisor
6981 // is not greater than the constant we are comparing with),
6982 // we will prefer to avoid the fold.
6983 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6984 HadTautologicalLanes |= TautologicalLane;
6985 AllLanesAreTautological &= TautologicalLane;
6986
6987 // If we are comparing with non-zero, we need'll need to subtract said
6988 // comparison value from the LHS. But there is no point in doing that if
6989 // every lane where we are comparing with non-zero is tautological..
6990 if (!Cmp.isZero())
6991 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6992
6993 // Decompose D into D0 * 2^K
6994 unsigned K = D.countr_zero();
6995 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6996 APInt D0 = D.lshr(K);
6997
6998 // D is even if it has trailing zeros.
6999 HadEvenDivisor |= (K != 0);
7000 // D is a power-of-two if D0 is one.
7001 // If all divisors are power-of-two, we will prefer to avoid the fold.
7002 AllDivisorsArePowerOfTwo &= D0.isOne();
7003
7004 // P = inv(D0, 2^W)
7005 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7006 unsigned W = D.getBitWidth();
7007 APInt P = D0.multiplicativeInverse();
7008 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7009
7010 // Q = floor((2^W - 1) u/ D)
7011 // R = ((2^W - 1) u% D)
7012 APInt Q, R;
7014
7015 // If we are comparing with zero, then that comparison constant is okay,
7016 // else it may need to be one less than that.
7017 if (Cmp.ugt(R))
7018 Q -= 1;
7019
7021 "We are expecting that K is always less than all-ones for ShSVT");
7022
7023 // If the lane is tautological the result can be constant-folded.
7024 if (TautologicalLane) {
7025 // Set P and K amount to a bogus values so we can try to splat them.
7026 P = 0;
7027 K = -1;
7028 // And ensure that comparison constant is tautological,
7029 // it will always compare true/false.
7030 Q = -1;
7031 }
7032
7033 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7034 KAmts.push_back(
7035 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7036 /*implicitTrunc=*/true),
7037 DL, ShSVT));
7038 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7039 return true;
7040 };
7041
7042 SDValue N = REMNode.getOperand(0);
7043 SDValue D = REMNode.getOperand(1);
7044
7045 // Collect the values from each element.
7046 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7047 return SDValue();
7048
7049 // If all lanes are tautological, the result can be constant-folded.
7050 if (AllLanesAreTautological)
7051 return SDValue();
7052
7053 // If this is a urem by a powers-of-two, avoid the fold since it can be
7054 // best implemented as a bit test.
7055 if (AllDivisorsArePowerOfTwo)
7056 return SDValue();
7057
7058 SDValue PVal, KVal, QVal;
7059 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7060 if (HadTautologicalLanes) {
7061 // Try to turn PAmts into a splat, since we don't care about the values
7062 // that are currently '0'. If we can't, just keep '0'`s.
7064 // Try to turn KAmts into a splat, since we don't care about the values
7065 // that are currently '-1'. If we can't, change them to '0'`s.
7067 DAG.getConstant(0, DL, ShSVT));
7068 }
7069
7070 PVal = DAG.getBuildVector(VT, DL, PAmts);
7071 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7072 QVal = DAG.getBuildVector(VT, DL, QAmts);
7073 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7074 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7075 "Expected matchBinaryPredicate to return one element for "
7076 "SPLAT_VECTORs");
7077 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7078 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7079 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7080 } else {
7081 PVal = PAmts[0];
7082 KVal = KAmts[0];
7083 QVal = QAmts[0];
7084 }
7085
7086 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7087 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7088 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7089 assert(CompTargetNode.getValueType() == N.getValueType() &&
7090 "Expecting that the types on LHS and RHS of comparisons match.");
7091 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7092 }
7093
7094 // (mul N, P)
7095 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7096 Created.push_back(Op0.getNode());
7097
7098 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7099 // divisors as a performance improvement, since rotating by 0 is a no-op.
7100 if (HadEvenDivisor) {
7101 // We need ROTR to do this.
7102 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7103 return SDValue();
7104 // UREM: (rotr (mul N, P), K)
7105 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7106 Created.push_back(Op0.getNode());
7107 }
7108
7109 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7110 SDValue NewCC =
7111 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7113 if (!HadTautologicalInvertedLanes)
7114 return NewCC;
7115
7116 // If any lanes previously compared always-false, the NewCC will give
7117 // always-true result for them, so we need to fixup those lanes.
7118 // Or the other way around for inequality predicate.
7119 assert(VT.isVector() && "Can/should only get here for vectors.");
7120 Created.push_back(NewCC.getNode());
7121
7122 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7123 // if C2 is not less than C1, the comparison is always false.
7124 // But we have produced the comparison that will give the
7125 // opposive tautological answer. So these lanes would need to be fixed up.
7126 SDValue TautologicalInvertedChannels =
7127 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7128 Created.push_back(TautologicalInvertedChannels.getNode());
7129
7130 // NOTE: we avoid letting illegal types through even if we're before legalize
7131 // ops – legalization has a hard time producing good code for this.
7132 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7133 // If we have a vector select, let's replace the comparison results in the
7134 // affected lanes with the correct tautological result.
7135 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7136 DL, SETCCVT, SETCCVT);
7137 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7138 Replacement, NewCC);
7139 }
7140
7141 // Else, we can just invert the comparison result in the appropriate lanes.
7142 //
7143 // NOTE: see the note above VSELECT above.
7144 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7145 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7146 TautologicalInvertedChannels);
7147
7148 return SDValue(); // Don't know how to lower.
7149}
7150
7151/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7152/// where the divisor is constant and the comparison target is zero,
7153/// return a DAG expression that will generate the same comparison result
7154/// using only multiplications, additions and shifts/rotations.
7155/// Ref: "Hacker's Delight" 10-17.
7156SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7157 SDValue CompTargetNode,
7159 DAGCombinerInfo &DCI,
7160 const SDLoc &DL) const {
7162 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7163 DCI, DL, Built)) {
7164 assert(Built.size() <= 7 && "Max size prediction failed.");
7165 for (SDNode *N : Built)
7166 DCI.AddToWorklist(N);
7167 return Folded;
7168 }
7169
7170 return SDValue();
7171}
7172
7173SDValue
7174TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7175 SDValue CompTargetNode, ISD::CondCode Cond,
7176 DAGCombinerInfo &DCI, const SDLoc &DL,
7177 SmallVectorImpl<SDNode *> &Created) const {
7178 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7179 // Fold:
7180 // (seteq/ne (srem N, D), 0)
7181 // To:
7182 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7183 //
7184 // - D must be constant, with D = D0 * 2^K where D0 is odd
7185 // - P is the multiplicative inverse of D0 modulo 2^W
7186 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7187 // - Q = floor((2 * A) / (2^K))
7188 // where W is the width of the common type of N and D.
7189 //
7190 // When D is a power of two (and thus D0 is 1), the normal
7191 // formula for A and Q don't apply, because the derivation
7192 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7193 // does not apply. This specifically fails when N = INT_MIN.
7194 //
7195 // Instead, for power-of-two D, we use:
7196 // - A = 2^(W-1)
7197 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7198 // - Q = 2^(W-K) - 1
7199 // |-> Test that the top K bits are zero after rotation
7200 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7201 "Only applicable for (in)equality comparisons.");
7202
7203 SelectionDAG &DAG = DCI.DAG;
7204
7205 EVT VT = REMNode.getValueType();
7206 EVT SVT = VT.getScalarType();
7207 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7208 EVT ShSVT = ShVT.getScalarType();
7209
7210 // If we are after ops legalization, and MUL is unavailable, we can not
7211 // proceed.
7212 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7213 return SDValue();
7214
7215 // TODO: Could support comparing with non-zero too.
7216 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7217 if (!CompTarget || !CompTarget->isZero())
7218 return SDValue();
7219
7220 bool HadIntMinDivisor = false;
7221 bool HadOneDivisor = false;
7222 bool AllDivisorsAreOnes = true;
7223 bool HadEvenDivisor = false;
7224 bool NeedToApplyOffset = false;
7225 bool AllDivisorsArePowerOfTwo = true;
7226 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7227
7228 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7229 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7230 if (C->isZero())
7231 return false;
7232
7233 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7234
7235 // WARNING: this fold is only valid for positive divisors!
7236 APInt D = C->getAPIntValue();
7237 if (D.isNegative())
7238 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7239
7240 HadIntMinDivisor |= D.isMinSignedValue();
7241
7242 // If all divisors are ones, we will prefer to avoid the fold.
7243 HadOneDivisor |= D.isOne();
7244 AllDivisorsAreOnes &= D.isOne();
7245
7246 // Decompose D into D0 * 2^K
7247 unsigned K = D.countr_zero();
7248 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7249 APInt D0 = D.lshr(K);
7250
7251 if (!D.isMinSignedValue()) {
7252 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7253 // we don't care about this lane in this fold, we'll special-handle it.
7254 HadEvenDivisor |= (K != 0);
7255 }
7256
7257 // D is a power-of-two if D0 is one. This includes INT_MIN.
7258 // If all divisors are power-of-two, we will prefer to avoid the fold.
7259 AllDivisorsArePowerOfTwo &= D0.isOne();
7260
7261 // P = inv(D0, 2^W)
7262 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7263 unsigned W = D.getBitWidth();
7264 APInt P = D0.multiplicativeInverse();
7265 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7266
7267 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7268 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7269 A.clearLowBits(K);
7270
7271 if (!D.isMinSignedValue()) {
7272 // If divisor INT_MIN, then we don't care about this lane in this fold,
7273 // we'll special-handle it.
7274 NeedToApplyOffset |= A != 0;
7275 }
7276
7277 // Q = floor((2 * A) / (2^K))
7278 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7279
7281 "We are expecting that A is always less than all-ones for SVT");
7283 "We are expecting that K is always less than all-ones for ShSVT");
7284
7285 // If D was a power of two, apply the alternate constant derivation.
7286 if (D0.isOne()) {
7287 // A = 2^(W-1)
7289 // - Q = 2^(W-K) - 1
7290 Q = APInt::getAllOnes(W - K).zext(W);
7291 }
7292
7293 // If the divisor is 1 the result can be constant-folded. Likewise, we
7294 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7295 if (D.isOne()) {
7296 // Set P, A and K to a bogus values so we can try to splat them.
7297 P = 0;
7298 A = -1;
7299 K = -1;
7300
7301 // x ?% 1 == 0 <--> true <--> x u<= -1
7302 Q = -1;
7303 }
7304
7305 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7306 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7307 KAmts.push_back(
7308 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7309 /*implicitTrunc=*/true),
7310 DL, ShSVT));
7311 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7312 return true;
7313 };
7314
7315 SDValue N = REMNode.getOperand(0);
7316 SDValue D = REMNode.getOperand(1);
7317
7318 // Collect the values from each element.
7319 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7320 return SDValue();
7321
7322 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7323 if (AllDivisorsAreOnes)
7324 return SDValue();
7325
7326 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7327 // since it can be best implemented as a bit test.
7328 if (AllDivisorsArePowerOfTwo)
7329 return SDValue();
7330
7331 SDValue PVal, AVal, KVal, QVal;
7332 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7333 if (HadOneDivisor) {
7334 // Try to turn PAmts into a splat, since we don't care about the values
7335 // that are currently '0'. If we can't, just keep '0'`s.
7337 // Try to turn AAmts into a splat, since we don't care about the
7338 // values that are currently '-1'. If we can't, change them to '0'`s.
7340 DAG.getConstant(0, DL, SVT));
7341 // Try to turn KAmts into a splat, since we don't care about the values
7342 // that are currently '-1'. If we can't, change them to '0'`s.
7344 DAG.getConstant(0, DL, ShSVT));
7345 }
7346
7347 PVal = DAG.getBuildVector(VT, DL, PAmts);
7348 AVal = DAG.getBuildVector(VT, DL, AAmts);
7349 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7350 QVal = DAG.getBuildVector(VT, DL, QAmts);
7351 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7352 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7353 QAmts.size() == 1 &&
7354 "Expected matchUnaryPredicate to return one element for scalable "
7355 "vectors");
7356 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7357 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7358 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7359 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7360 } else {
7361 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7362 PVal = PAmts[0];
7363 AVal = AAmts[0];
7364 KVal = KAmts[0];
7365 QVal = QAmts[0];
7366 }
7367
7368 // (mul N, P)
7369 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7370 Created.push_back(Op0.getNode());
7371
7372 if (NeedToApplyOffset) {
7373 // We need ADD to do this.
7374 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7375 return SDValue();
7376
7377 // (add (mul N, P), A)
7378 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7379 Created.push_back(Op0.getNode());
7380 }
7381
7382 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7383 // divisors as a performance improvement, since rotating by 0 is a no-op.
7384 if (HadEvenDivisor) {
7385 // We need ROTR to do this.
7386 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7387 return SDValue();
7388 // SREM: (rotr (add (mul N, P), A), K)
7389 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7390 Created.push_back(Op0.getNode());
7391 }
7392
7393 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7394 SDValue Fold =
7395 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7397
7398 // If we didn't have lanes with INT_MIN divisor, then we're done.
7399 if (!HadIntMinDivisor)
7400 return Fold;
7401
7402 // That fold is only valid for positive divisors. Which effectively means,
7403 // it is invalid for INT_MIN divisors. So if we have such a lane,
7404 // we must fix-up results for said lanes.
7405 assert(VT.isVector() && "Can/should only get here for vectors.");
7406
7407 // NOTE: we avoid letting illegal types through even if we're before legalize
7408 // ops – legalization has a hard time producing good code for the code that
7409 // follows.
7410 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7414 return SDValue();
7415
7416 Created.push_back(Fold.getNode());
7417
7418 SDValue IntMin = DAG.getConstant(
7420 SDValue IntMax = DAG.getConstant(
7422 SDValue Zero =
7424
7425 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7426 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7427 Created.push_back(DivisorIsIntMin.getNode());
7428
7429 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7430 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7431 Created.push_back(Masked.getNode());
7432 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7433 Created.push_back(MaskedIsZero.getNode());
7434
7435 // To produce final result we need to blend 2 vectors: 'SetCC' and
7436 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7437 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7438 // constant-folded, select can get lowered to a shuffle with constant mask.
7439 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7440 MaskedIsZero, Fold);
7441
7442 return Blended;
7443}
7444
7446 const DenormalMode &Mode) const {
7447 SDLoc DL(Op);
7448 EVT VT = Op.getValueType();
7449 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7450 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7451
7452 // This is specifically a check for the handling of denormal inputs, not the
7453 // result.
7454 if (Mode.Input == DenormalMode::PreserveSign ||
7455 Mode.Input == DenormalMode::PositiveZero) {
7456 // Test = X == 0.0
7457 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7458 }
7459
7460 // Testing it with denormal inputs to avoid wrong estimate.
7461 //
7462 // Test = fabs(X) < SmallestNormal
7463 const fltSemantics &FltSem = VT.getFltSemantics();
7464 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7465 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7466 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7467 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7468}
7469
7471 bool LegalOps, bool OptForSize,
7473 unsigned Depth) const {
7474 // fneg is removable even if it has multiple uses.
7475 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7477 return Op.getOperand(0);
7478 }
7479
7480 // Don't recurse exponentially.
7482 return SDValue();
7483
7484 // Pre-increment recursion depth for use in recursive calls.
7485 ++Depth;
7486 const SDNodeFlags Flags = Op->getFlags();
7487 const TargetOptions &Options = DAG.getTarget().Options;
7488 EVT VT = Op.getValueType();
7489 unsigned Opcode = Op.getOpcode();
7490
7491 // Don't allow anything with multiple uses unless we know it is free.
7492 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7493 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7494 isFPExtFree(VT, Op.getOperand(0).getValueType());
7495 if (!IsFreeExtend)
7496 return SDValue();
7497 }
7498
7499 auto RemoveDeadNode = [&](SDValue N) {
7500 if (N && N.getNode()->use_empty())
7501 DAG.RemoveDeadNode(N.getNode());
7502 };
7503
7504 SDLoc DL(Op);
7505
7506 // Because getNegatedExpression can delete nodes we need a handle to keep
7507 // temporary nodes alive in case the recursion manages to create an identical
7508 // node.
7509 std::list<HandleSDNode> Handles;
7510
7511 switch (Opcode) {
7512 case ISD::ConstantFP: {
7513 // Don't invert constant FP values after legalization unless the target says
7514 // the negated constant is legal.
7515 bool IsOpLegal =
7517 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7518 OptForSize);
7519
7520 if (LegalOps && !IsOpLegal)
7521 break;
7522
7523 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7524 V.changeSign();
7525 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7526
7527 // If we already have the use of the negated floating constant, it is free
7528 // to negate it even it has multiple uses.
7529 if (!Op.hasOneUse() && CFP.use_empty())
7530 break;
7532 return CFP;
7533 }
7534 case ISD::BUILD_VECTOR: {
7535 // Only permit BUILD_VECTOR of constants.
7536 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7537 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7538 }))
7539 break;
7540
7541 bool IsOpLegal =
7544 llvm::all_of(Op->op_values(), [&](SDValue N) {
7545 return N.isUndef() ||
7546 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7547 OptForSize);
7548 });
7549
7550 if (LegalOps && !IsOpLegal)
7551 break;
7552
7554 for (SDValue C : Op->op_values()) {
7555 if (C.isUndef()) {
7556 Ops.push_back(C);
7557 continue;
7558 }
7559 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7560 V.changeSign();
7561 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7562 }
7564 return DAG.getBuildVector(VT, DL, Ops);
7565 }
7566 case ISD::FADD: {
7567 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7568 break;
7569
7570 // After operation legalization, it might not be legal to create new FSUBs.
7571 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7572 break;
7573 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7574
7575 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7577 SDValue NegX =
7578 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7579 // Prevent this node from being deleted by the next call.
7580 if (NegX)
7581 Handles.emplace_back(NegX);
7582
7583 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7585 SDValue NegY =
7586 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7587
7588 // We're done with the handles.
7589 Handles.clear();
7590
7591 // Negate the X if its cost is less or equal than Y.
7592 if (NegX && (CostX <= CostY)) {
7593 Cost = CostX;
7594 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7595 if (NegY != N)
7596 RemoveDeadNode(NegY);
7597 return N;
7598 }
7599
7600 // Negate the Y if it is not expensive.
7601 if (NegY) {
7602 Cost = CostY;
7603 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7604 if (NegX != N)
7605 RemoveDeadNode(NegX);
7606 return N;
7607 }
7608 break;
7609 }
7610 case ISD::FSUB: {
7611 // We can't turn -(A-B) into B-A when we honor signed zeros.
7612 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7613 break;
7614
7615 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7616 // fold (fneg (fsub 0, Y)) -> Y
7617 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7618 if (C->isZero()) {
7620 return Y;
7621 }
7622
7623 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7625 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7626 }
7627 case ISD::FMUL:
7628 case ISD::FDIV: {
7629 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7630
7631 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7633 SDValue NegX =
7634 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7635 // Prevent this node from being deleted by the next call.
7636 if (NegX)
7637 Handles.emplace_back(NegX);
7638
7639 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7641 SDValue NegY =
7642 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7643
7644 // We're done with the handles.
7645 Handles.clear();
7646
7647 // Negate the X if its cost is less or equal than Y.
7648 if (NegX && (CostX <= CostY)) {
7649 Cost = CostX;
7650 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7651 if (NegY != N)
7652 RemoveDeadNode(NegY);
7653 return N;
7654 }
7655
7656 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7657 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7658 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7659 break;
7660
7661 // Negate the Y if it is not expensive.
7662 if (NegY) {
7663 Cost = CostY;
7664 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7665 if (NegX != N)
7666 RemoveDeadNode(NegX);
7667 return N;
7668 }
7669 break;
7670 }
7671 case ISD::FMA:
7672 case ISD::FMAD: {
7673 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7674 break;
7675
7676 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7678 SDValue NegZ =
7679 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7680 // Give up if fail to negate the Z.
7681 if (!NegZ)
7682 break;
7683
7684 // Prevent this node from being deleted by the next two calls.
7685 Handles.emplace_back(NegZ);
7686
7687 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7689 SDValue NegX =
7690 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7691 // Prevent this node from being deleted by the next call.
7692 if (NegX)
7693 Handles.emplace_back(NegX);
7694
7695 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7697 SDValue NegY =
7698 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7699
7700 // We're done with the handles.
7701 Handles.clear();
7702
7703 // Negate the X if its cost is less or equal than Y.
7704 if (NegX && (CostX <= CostY)) {
7705 Cost = std::min(CostX, CostZ);
7706 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7707 if (NegY != N)
7708 RemoveDeadNode(NegY);
7709 return N;
7710 }
7711
7712 // Negate the Y if it is not expensive.
7713 if (NegY) {
7714 Cost = std::min(CostY, CostZ);
7715 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7716 if (NegX != N)
7717 RemoveDeadNode(NegX);
7718 return N;
7719 }
7720 break;
7721 }
7722
7723 case ISD::FP_EXTEND:
7724 case ISD::FSIN:
7725 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7726 OptForSize, Cost, Depth))
7727 return DAG.getNode(Opcode, DL, VT, NegV);
7728 break;
7729 case ISD::FP_ROUND:
7730 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7731 OptForSize, Cost, Depth))
7732 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7733 break;
7734 case ISD::SELECT:
7735 case ISD::VSELECT: {
7736 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7737 // iff at least one cost is cheaper and the other is neutral/cheaper
7738 SDValue LHS = Op.getOperand(1);
7740 SDValue NegLHS =
7741 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7742 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7743 RemoveDeadNode(NegLHS);
7744 break;
7745 }
7746
7747 // Prevent this node from being deleted by the next call.
7748 Handles.emplace_back(NegLHS);
7749
7750 SDValue RHS = Op.getOperand(2);
7752 SDValue NegRHS =
7753 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7754
7755 // We're done with the handles.
7756 Handles.clear();
7757
7758 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7759 (CostLHS != NegatibleCost::Cheaper &&
7760 CostRHS != NegatibleCost::Cheaper)) {
7761 RemoveDeadNode(NegLHS);
7762 RemoveDeadNode(NegRHS);
7763 break;
7764 }
7765
7766 Cost = std::min(CostLHS, CostRHS);
7767 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7768 }
7769 }
7770
7771 return SDValue();
7772}
7773
7774//===----------------------------------------------------------------------===//
7775// Legalization Utilities
7776//===----------------------------------------------------------------------===//
7777
7778bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7779 SDValue LHS, SDValue RHS,
7781 EVT HiLoVT, SelectionDAG &DAG,
7782 MulExpansionKind Kind, SDValue LL,
7783 SDValue LH, SDValue RL, SDValue RH) const {
7784 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7785 Opcode == ISD::SMUL_LOHI);
7786
7787 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7789 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7791 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7793 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7795
7796 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7797 return false;
7798
7799 unsigned OuterBitSize = VT.getScalarSizeInBits();
7800 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7801
7802 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7803 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7804 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7805
7806 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7807 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7808 bool Signed) -> bool {
7809 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7810 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7811 Hi = SDValue(Lo.getNode(), 1);
7812 return true;
7813 }
7814 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7815 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7816 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7817 return true;
7818 }
7819 return false;
7820 };
7821
7822 SDValue Lo, Hi;
7823
7824 if (!LL.getNode() && !RL.getNode() &&
7826 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7827 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7828 }
7829
7830 if (!LL.getNode())
7831 return false;
7832
7833 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7834 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7835 DAG.MaskedValueIsZero(RHS, HighMask)) {
7836 // The inputs are both zero-extended.
7837 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7838 Result.push_back(Lo);
7839 Result.push_back(Hi);
7840 if (Opcode != ISD::MUL) {
7841 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7842 Result.push_back(Zero);
7843 Result.push_back(Zero);
7844 }
7845 return true;
7846 }
7847 }
7848
7849 if (!VT.isVector() && Opcode == ISD::MUL &&
7850 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7851 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7852 // The input values are both sign-extended.
7853 // TODO non-MUL case?
7854 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7855 Result.push_back(Lo);
7856 Result.push_back(Hi);
7857 return true;
7858 }
7859 }
7860
7861 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7862 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7863
7864 if (!LH.getNode() && !RH.getNode() &&
7867 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7868 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7869 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7870 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7871 }
7872
7873 if (!LH.getNode())
7874 return false;
7875
7876 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7877 return false;
7878
7879 Result.push_back(Lo);
7880
7881 if (Opcode == ISD::MUL) {
7882 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7883 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7884 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7885 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7886 Result.push_back(Hi);
7887 return true;
7888 }
7889
7890 // Compute the full width result.
7891 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7892 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7893 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7894 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7895 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7896 };
7897
7898 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7899 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7900 return false;
7901
7902 // This is effectively the add part of a multiply-add of half-sized operands,
7903 // so it cannot overflow.
7904 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7905
7906 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7907 return false;
7908
7909 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7910 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7911
7912 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7914 if (UseGlue)
7915 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7916 Merge(Lo, Hi));
7917 else
7918 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7919 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7920
7921 SDValue Carry = Next.getValue(1);
7922 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7923 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7924
7925 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7926 return false;
7927
7928 if (UseGlue)
7929 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7930 Carry);
7931 else
7932 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7933 Zero, Carry);
7934
7935 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7936
7937 if (Opcode == ISD::SMUL_LOHI) {
7938 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7939 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7940 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7941
7942 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7943 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7944 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7945 }
7946
7947 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7948 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7949 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7950 return true;
7951}
7952
7954 SelectionDAG &DAG, MulExpansionKind Kind,
7955 SDValue LL, SDValue LH, SDValue RL,
7956 SDValue RH) const {
7958 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7959 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7960 DAG, Kind, LL, LH, RL, RH);
7961 if (Ok) {
7962 assert(Result.size() == 2);
7963 Lo = Result[0];
7964 Hi = Result[1];
7965 }
7966 return Ok;
7967}
7968
7969// Optimize unsigned division or remainder by constants for types twice as large
7970// as a legal VT.
7971//
7972// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7973// can be computed
7974// as:
7975// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7976// Remainder = Sum % Constant
7977// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7978//
7979// For division, we can compute the remainder using the algorithm described
7980// above, subtract it from the dividend to get an exact multiple of Constant.
7981// Then multiply that exact multiply by the multiplicative inverse modulo
7982// (1 << (BitWidth / 2)) to get the quotient.
7983
7984// If Constant is even, we can shift right the dividend and the divisor by the
7985// number of trailing zeros in Constant before applying the remainder algorithm.
7986// If we're after the quotient, we can subtract this value from the shifted
7987// dividend and multiply by the multiplicative inverse of the shifted divisor.
7988// If we want the remainder, we shift the value left by the number of trailing
7989// zeros and add the bits that were shifted out of the dividend.
7992 EVT HiLoVT, SelectionDAG &DAG,
7993 SDValue LL, SDValue LH) const {
7994 unsigned Opcode = N->getOpcode();
7995 EVT VT = N->getValueType(0);
7996
7997 // TODO: Support signed division/remainder.
7998 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7999 return false;
8000 assert(
8001 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8002 "Unexpected opcode");
8003
8004 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8005 if (!CN)
8006 return false;
8007
8008 APInt Divisor = CN->getAPIntValue();
8009 unsigned BitWidth = Divisor.getBitWidth();
8010 unsigned HBitWidth = BitWidth / 2;
8012 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8013
8014 // Divisor needs to less than (1 << HBitWidth).
8015 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8016 if (Divisor.uge(HalfMaxPlus1))
8017 return false;
8018
8019 // We depend on the UREM by constant optimization in DAGCombiner that requires
8020 // high multiply.
8021 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8023 return false;
8024
8025 // Don't expand if optimizing for size.
8026 if (DAG.shouldOptForSize())
8027 return false;
8028
8029 // Early out for 0 or 1 divisors.
8030 if (Divisor.ule(1))
8031 return false;
8032
8033 // If the divisor is even, shift it until it becomes odd.
8034 unsigned TrailingZeros = 0;
8035 if (!Divisor[0]) {
8036 TrailingZeros = Divisor.countr_zero();
8037 Divisor.lshrInPlace(TrailingZeros);
8038 }
8039
8040 SDLoc dl(N);
8041 SDValue Sum;
8042 SDValue PartialRem;
8043
8044 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8045 // then add in the carry.
8046 // TODO: If we can't split it in half, we might be able to split into 3 or
8047 // more pieces using a smaller bit width.
8048 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8049 assert(!LL == !LH && "Expected both input halves or no input halves!");
8050 if (!LL)
8051 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8052
8053 // Shift the input by the number of TrailingZeros in the divisor. The
8054 // shifted out bits will be added to the remainder later.
8055 if (TrailingZeros) {
8056 // Save the shifted off bits if we need the remainder.
8057 if (Opcode != ISD::UDIV) {
8058 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8059 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8060 DAG.getConstant(Mask, dl, HiLoVT));
8061 }
8062
8063 LL = DAG.getNode(
8064 ISD::OR, dl, HiLoVT,
8065 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8066 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8067 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8068 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8069 HiLoVT, dl)));
8070 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8071 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8072 }
8073
8074 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8075 EVT SetCCType =
8076 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8078 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8079 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8080 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8081 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8082 } else {
8083 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8084 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8085 // If the boolean for the target is 0 or 1, we can add the setcc result
8086 // directly.
8087 if (getBooleanContents(HiLoVT) ==
8089 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8090 else
8091 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8092 DAG.getConstant(0, dl, HiLoVT));
8093 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8094 }
8095 }
8096
8097 // If we didn't find a sum, we can't do the expansion.
8098 if (!Sum)
8099 return false;
8100
8101 // Perform a HiLoVT urem on the Sum using truncated divisor.
8102 SDValue RemL =
8103 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8104 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8105 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8106
8107 if (Opcode != ISD::UREM) {
8108 // Subtract the remainder from the shifted dividend.
8109 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8110 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8111
8112 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8113
8114 // Multiply by the multiplicative inverse of the divisor modulo
8115 // (1 << BitWidth).
8116 APInt MulFactor = Divisor.multiplicativeInverse();
8117
8118 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8119 DAG.getConstant(MulFactor, dl, VT));
8120
8121 // Split the quotient into low and high parts.
8122 SDValue QuotL, QuotH;
8123 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8124 Result.push_back(QuotL);
8125 Result.push_back(QuotH);
8126 }
8127
8128 if (Opcode != ISD::UDIV) {
8129 // If we shifted the input, shift the remainder left and add the bits we
8130 // shifted off the input.
8131 if (TrailingZeros) {
8132 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8133 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8134 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8135 }
8136 Result.push_back(RemL);
8137 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8138 }
8139
8140 return true;
8141}
8142
8143// Check that (every element of) Z is undef or not an exact multiple of BW.
8144static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8146 Z,
8147 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8148 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8149}
8150
8152 EVT VT = Node->getValueType(0);
8153 SDValue ShX, ShY;
8154 SDValue ShAmt, InvShAmt;
8155 SDValue X = Node->getOperand(0);
8156 SDValue Y = Node->getOperand(1);
8157 SDValue Z = Node->getOperand(2);
8158 SDValue Mask = Node->getOperand(3);
8159 SDValue VL = Node->getOperand(4);
8160
8161 unsigned BW = VT.getScalarSizeInBits();
8162 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8163 SDLoc DL(SDValue(Node, 0));
8164
8165 EVT ShVT = Z.getValueType();
8166 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8167 // fshl: X << C | Y >> (BW - C)
8168 // fshr: X << (BW - C) | Y >> C
8169 // where C = Z % BW is not zero
8170 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8171 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8172 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8173 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8174 VL);
8175 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8176 VL);
8177 } else {
8178 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8179 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8180 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8181 if (isPowerOf2_32(BW)) {
8182 // Z % BW -> Z & (BW - 1)
8183 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8184 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8185 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8186 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8187 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8188 } else {
8189 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8190 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8191 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8192 }
8193
8194 SDValue One = DAG.getConstant(1, DL, ShVT);
8195 if (IsFSHL) {
8196 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8197 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8198 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8199 } else {
8200 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8201 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8202 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8203 }
8204 }
8205 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8206}
8207
8209 SelectionDAG &DAG) const {
8210 if (Node->isVPOpcode())
8211 return expandVPFunnelShift(Node, DAG);
8212
8213 EVT VT = Node->getValueType(0);
8214
8215 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8219 return SDValue();
8220
8221 SDValue X = Node->getOperand(0);
8222 SDValue Y = Node->getOperand(1);
8223 SDValue Z = Node->getOperand(2);
8224
8225 unsigned BW = VT.getScalarSizeInBits();
8226 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8227 SDLoc DL(SDValue(Node, 0));
8228
8229 EVT ShVT = Z.getValueType();
8230
8231 // If a funnel shift in the other direction is more supported, use it.
8232 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8233 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8234 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8235 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8236 // fshl X, Y, Z -> fshr X, Y, -Z
8237 // fshr X, Y, Z -> fshl X, Y, -Z
8238 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8239 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8240 } else {
8241 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8242 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8243 SDValue One = DAG.getConstant(1, DL, ShVT);
8244 if (IsFSHL) {
8245 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8246 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8247 } else {
8248 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8249 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8250 }
8251 Z = DAG.getNOT(DL, Z, ShVT);
8252 }
8253 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8254 }
8255
8256 SDValue ShX, ShY;
8257 SDValue ShAmt, InvShAmt;
8258 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8259 // fshl: X << C | Y >> (BW - C)
8260 // fshr: X << (BW - C) | Y >> C
8261 // where C = Z % BW is not zero
8262 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8263 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8264 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8265 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8266 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8267 } else {
8268 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8269 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8270 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8271 if (isPowerOf2_32(BW)) {
8272 // Z % BW -> Z & (BW - 1)
8273 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8274 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8275 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8276 } else {
8277 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8278 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8279 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8280 }
8281
8282 SDValue One = DAG.getConstant(1, DL, ShVT);
8283 if (IsFSHL) {
8284 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8285 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8286 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8287 } else {
8288 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8289 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8290 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8291 }
8292 }
8293 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8294}
8295
8296// TODO: Merge with expandFunnelShift.
8298 SelectionDAG &DAG) const {
8299 EVT VT = Node->getValueType(0);
8300 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8301 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8302 SDValue Op0 = Node->getOperand(0);
8303 SDValue Op1 = Node->getOperand(1);
8304 SDLoc DL(SDValue(Node, 0));
8305
8306 EVT ShVT = Op1.getValueType();
8307 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8308
8309 // If a rotate in the other direction is more supported, use it.
8310 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8311 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8312 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8313 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8314 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8315 }
8316
8317 if (!AllowVectorOps && VT.isVector() &&
8323 return SDValue();
8324
8325 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8326 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8327 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8328 SDValue ShVal;
8329 SDValue HsVal;
8330 if (isPowerOf2_32(EltSizeInBits)) {
8331 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8332 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8333 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8334 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8335 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8336 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8337 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8338 } else {
8339 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8340 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8341 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8342 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8343 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8344 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8345 SDValue One = DAG.getConstant(1, DL, ShVT);
8346 HsVal =
8347 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8348 }
8349 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8350}
8351
8353 SelectionDAG &DAG) const {
8354 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8355 EVT VT = Node->getValueType(0);
8356 unsigned VTBits = VT.getScalarSizeInBits();
8357 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8358
8359 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8360 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8361 SDValue ShOpLo = Node->getOperand(0);
8362 SDValue ShOpHi = Node->getOperand(1);
8363 SDValue ShAmt = Node->getOperand(2);
8364 EVT ShAmtVT = ShAmt.getValueType();
8365 EVT ShAmtCCVT =
8366 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8367 SDLoc dl(Node);
8368
8369 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8370 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8371 // away during isel.
8372 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8373 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8374 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8375 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8376 : DAG.getConstant(0, dl, VT);
8377
8378 SDValue Tmp2, Tmp3;
8379 if (IsSHL) {
8380 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8381 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8382 } else {
8383 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8384 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8385 }
8386
8387 // If the shift amount is larger or equal than the width of a part we don't
8388 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8389 // values for large shift amounts.
8390 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8391 DAG.getConstant(VTBits, dl, ShAmtVT));
8392 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8393 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8394
8395 if (IsSHL) {
8396 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8397 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8398 } else {
8399 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8400 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8401 }
8402}
8403
8405 SelectionDAG &DAG) const {
8406 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8407 SDValue Src = Node->getOperand(OpNo);
8408 EVT SrcVT = Src.getValueType();
8409 EVT DstVT = Node->getValueType(0);
8410 SDLoc dl(SDValue(Node, 0));
8411
8412 // FIXME: Only f32 to i64 conversions are supported.
8413 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8414 return false;
8415
8416 if (Node->isStrictFPOpcode())
8417 // When a NaN is converted to an integer a trap is allowed. We can't
8418 // use this expansion here because it would eliminate that trap. Other
8419 // traps are also allowed and cannot be eliminated. See
8420 // IEEE 754-2008 sec 5.8.
8421 return false;
8422
8423 // Expand f32 -> i64 conversion
8424 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8425 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8426 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8427 EVT IntVT = SrcVT.changeTypeToInteger();
8428 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8429
8430 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8431 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8432 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8433 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8434 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8435 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8436
8437 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8438
8439 SDValue ExponentBits = DAG.getNode(
8440 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8441 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8442 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8443
8444 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8445 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8446 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8447 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8448
8449 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8450 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8451 DAG.getConstant(0x00800000, dl, IntVT));
8452
8453 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8454
8455 R = DAG.getSelectCC(
8456 dl, Exponent, ExponentLoBit,
8457 DAG.getNode(ISD::SHL, dl, DstVT, R,
8458 DAG.getZExtOrTrunc(
8459 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8460 dl, IntShVT)),
8461 DAG.getNode(ISD::SRL, dl, DstVT, R,
8462 DAG.getZExtOrTrunc(
8463 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8464 dl, IntShVT)),
8465 ISD::SETGT);
8466
8467 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8468 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8469
8470 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8471 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8472 return true;
8473}
8474
8476 SDValue &Chain,
8477 SelectionDAG &DAG) const {
8478 SDLoc dl(SDValue(Node, 0));
8479 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8480 SDValue Src = Node->getOperand(OpNo);
8481
8482 EVT SrcVT = Src.getValueType();
8483 EVT DstVT = Node->getValueType(0);
8484 EVT SetCCVT =
8485 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8486 EVT DstSetCCVT =
8487 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8488
8489 // Only expand vector types if we have the appropriate vector bit operations.
8490 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8492 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8494 return false;
8495
8496 // If the maximum float value is smaller then the signed integer range,
8497 // the destination signmask can't be represented by the float, so we can
8498 // just use FP_TO_SINT directly.
8499 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8500 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8501 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8503 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8504 if (Node->isStrictFPOpcode()) {
8505 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8506 { Node->getOperand(0), Src });
8507 Chain = Result.getValue(1);
8508 } else
8509 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8510 return true;
8511 }
8512
8513 // Don't expand it if there isn't cheap fsub instruction.
8515 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8516 return false;
8517
8518 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8519 SDValue Sel;
8520
8521 if (Node->isStrictFPOpcode()) {
8522 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8523 Node->getOperand(0), /*IsSignaling*/ true);
8524 Chain = Sel.getValue(1);
8525 } else {
8526 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8527 }
8528
8529 bool Strict = Node->isStrictFPOpcode() ||
8530 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8531
8532 if (Strict) {
8533 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8534 // signmask then offset (the result of which should be fully representable).
8535 // Sel = Src < 0x8000000000000000
8536 // FltOfs = select Sel, 0, 0x8000000000000000
8537 // IntOfs = select Sel, 0, 0x8000000000000000
8538 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8539
8540 // TODO: Should any fast-math-flags be set for the FSUB?
8541 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8542 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8543 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8544 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8545 DAG.getConstant(0, dl, DstVT),
8546 DAG.getConstant(SignMask, dl, DstVT));
8547 SDValue SInt;
8548 if (Node->isStrictFPOpcode()) {
8549 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8550 { Chain, Src, FltOfs });
8551 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8552 { Val.getValue(1), Val });
8553 Chain = SInt.getValue(1);
8554 } else {
8555 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8556 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8557 }
8558 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8559 } else {
8560 // Expand based on maximum range of FP_TO_SINT:
8561 // True = fp_to_sint(Src)
8562 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8563 // Result = select (Src < 0x8000000000000000), True, False
8564
8565 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8566 // TODO: Should any fast-math-flags be set for the FSUB?
8567 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8568 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8569 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8570 DAG.getConstant(SignMask, dl, DstVT));
8571 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8572 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8573 }
8574 return true;
8575}
8576
8578 SDValue &Chain, SelectionDAG &DAG) const {
8579 // This transform is not correct for converting 0 when rounding mode is set
8580 // to round toward negative infinity which will produce -0.0. So disable
8581 // under strictfp.
8582 if (Node->isStrictFPOpcode())
8583 return false;
8584
8585 SDValue Src = Node->getOperand(0);
8586 EVT SrcVT = Src.getValueType();
8587 EVT DstVT = Node->getValueType(0);
8588
8589 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8590 // it.
8591 if (Node->getFlags().hasNonNeg() &&
8593 Result =
8594 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8595 return true;
8596 }
8597
8598 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8599 return false;
8600
8601 // Only expand vector types if we have the appropriate vector bit
8602 // operations.
8603 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8608 return false;
8609
8610 SDLoc dl(SDValue(Node, 0));
8611
8612 // Implementation of unsigned i64 to f64 following the algorithm in
8613 // __floatundidf in compiler_rt. This implementation performs rounding
8614 // correctly in all rounding modes with the exception of converting 0
8615 // when rounding toward negative infinity. In that case the fsub will
8616 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8617 // incorrect.
8618 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8619 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8620 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8621 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8622 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8623 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8624
8625 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8626 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8627 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8628 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8629 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8630 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8631 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8632 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8633 return true;
8634}
8635
8636SDValue
8638 SelectionDAG &DAG) const {
8639 unsigned Opcode = Node->getOpcode();
8640 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8641 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8642 "Wrong opcode");
8643
8644 if (Node->getFlags().hasNoNaNs()) {
8645 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8646 EVT VT = Node->getValueType(0);
8647 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8649 VT.isVector())
8650 return SDValue();
8651 SDValue Op1 = Node->getOperand(0);
8652 SDValue Op2 = Node->getOperand(1);
8653 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8654 Node->getFlags());
8655 }
8656
8657 return SDValue();
8658}
8659
8661 SelectionDAG &DAG) const {
8662 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8663 return Expanded;
8664
8665 EVT VT = Node->getValueType(0);
8666 if (VT.isScalableVector())
8668 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8669
8670 SDLoc dl(Node);
8671 unsigned NewOp =
8672 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8673
8674 if (isOperationLegalOrCustom(NewOp, VT)) {
8675 SDValue Quiet0 = Node->getOperand(0);
8676 SDValue Quiet1 = Node->getOperand(1);
8677
8678 if (!Node->getFlags().hasNoNaNs()) {
8679 // Insert canonicalizes if it's possible we need to quiet to get correct
8680 // sNaN behavior.
8681 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8682 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8683 Node->getFlags());
8684 }
8685 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8686 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8687 Node->getFlags());
8688 }
8689 }
8690
8691 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8692 }
8693
8694 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8695 // instead if there are no NaNs and there can't be an incompatible zero
8696 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8697 if ((Node->getFlags().hasNoNaNs() ||
8698 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8699 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8700 (Node->getFlags().hasNoSignedZeros() ||
8701 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8702 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8703 unsigned IEEE2018Op =
8704 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8705 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8706 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8707 Node->getOperand(1), Node->getFlags());
8708 }
8709
8711 return SelCC;
8712
8713 return SDValue();
8714}
8715
8717 SelectionDAG &DAG) const {
8718 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8719 return Expanded;
8720
8721 SDLoc DL(N);
8722 SDValue LHS = N->getOperand(0);
8723 SDValue RHS = N->getOperand(1);
8724 unsigned Opc = N->getOpcode();
8725 EVT VT = N->getValueType(0);
8726 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8727 bool IsMax = Opc == ISD::FMAXIMUM;
8728 SDNodeFlags Flags = N->getFlags();
8729
8730 // First, implement comparison not propagating NaN. If no native fmin or fmax
8731 // available, use plain select with setcc instead.
8733 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8734 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8735
8736 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8737 // signed zero behavior.
8738 bool MinMaxMustRespectOrderedZero = false;
8739
8740 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8741 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8742 MinMaxMustRespectOrderedZero = true;
8743 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8744 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8745 } else {
8747 return DAG.UnrollVectorOp(N);
8748
8749 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8750 SDValue Compare =
8751 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8752 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8753 }
8754
8755 // Propagate any NaN of both operands
8756 if (!N->getFlags().hasNoNaNs() &&
8757 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8758 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8760 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8761 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8762 }
8763
8764 // fminimum/fmaximum requires -0.0 less than +0.0
8765 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8766 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8767 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8768 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8769 SDValue TestZero =
8770 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8771 SDValue LCmp = DAG.getSelect(
8772 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8773 MinMax, Flags);
8774 SDValue RCmp = DAG.getSelect(
8775 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8776 LCmp, Flags);
8777 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8778 }
8779
8780 return MinMax;
8781}
8782
8784 SelectionDAG &DAG) const {
8785 SDLoc DL(Node);
8786 SDValue LHS = Node->getOperand(0);
8787 SDValue RHS = Node->getOperand(1);
8788 unsigned Opc = Node->getOpcode();
8789 EVT VT = Node->getValueType(0);
8790 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8791 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8792 const TargetOptions &Options = DAG.getTarget().Options;
8793 SDNodeFlags Flags = Node->getFlags();
8794
8795 unsigned NewOp =
8796 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8797
8798 if (isOperationLegalOrCustom(NewOp, VT)) {
8799 if (!Flags.hasNoNaNs()) {
8800 // Insert canonicalizes if it's possible we need to quiet to get correct
8801 // sNaN behavior.
8802 if (!DAG.isKnownNeverSNaN(LHS)) {
8803 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8804 }
8805 if (!DAG.isKnownNeverSNaN(RHS)) {
8806 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8807 }
8808 }
8809
8810 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8811 }
8812
8813 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8814 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8815 if (Flags.hasNoNaNs() ||
8816 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8817 unsigned IEEE2019Op =
8818 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8819 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8820 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8821 }
8822
8823 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8824 // either one for +0.0 vs -0.0.
8825 if ((Flags.hasNoNaNs() ||
8826 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8827 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8828 DAG.isKnownNeverZeroFloat(RHS))) {
8829 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8830 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8831 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8832 }
8833
8835 return DAG.UnrollVectorOp(Node);
8836
8837 // If only one operand is NaN, override it with another operand.
8838 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8839 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8840 }
8841 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8842 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8843 }
8844
8845 SDValue MinMax =
8846 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8847
8848 // TODO: We need quiet sNaN if strictfp.
8849
8850 // Fixup signed zero behavior.
8851 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8852 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8853 return MinMax;
8854 }
8855 SDValue TestZero =
8856 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8857 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8858 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8859 SDValue LCmp = DAG.getSelect(
8860 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8861 MinMax, Flags);
8862 SDValue RCmp = DAG.getSelect(
8863 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8864 Flags);
8865 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8866}
8867
8868/// Returns a true value if if this FPClassTest can be performed with an ordered
8869/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8870/// std::nullopt if it cannot be performed as a compare with 0.
8871static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8872 const fltSemantics &Semantics,
8873 const MachineFunction &MF) {
8874 FPClassTest OrderedMask = Test & ~fcNan;
8875 FPClassTest NanTest = Test & fcNan;
8876 bool IsOrdered = NanTest == fcNone;
8877 bool IsUnordered = NanTest == fcNan;
8878
8879 // Skip cases that are testing for only a qnan or snan.
8880 if (!IsOrdered && !IsUnordered)
8881 return std::nullopt;
8882
8883 if (OrderedMask == fcZero &&
8884 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8885 return IsOrdered;
8886 if (OrderedMask == (fcZero | fcSubnormal) &&
8887 MF.getDenormalMode(Semantics).inputsAreZero())
8888 return IsOrdered;
8889 return std::nullopt;
8890}
8891
8893 const FPClassTest OrigTestMask,
8894 SDNodeFlags Flags, const SDLoc &DL,
8895 SelectionDAG &DAG) const {
8896 EVT OperandVT = Op.getValueType();
8897 assert(OperandVT.isFloatingPoint());
8898 FPClassTest Test = OrigTestMask;
8899
8900 // Degenerated cases.
8901 if (Test == fcNone)
8902 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8903 if (Test == fcAllFlags)
8904 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8905
8906 // PPC double double is a pair of doubles, of which the higher part determines
8907 // the value class.
8908 if (OperandVT == MVT::ppcf128) {
8909 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8910 DAG.getConstant(1, DL, MVT::i32));
8911 OperandVT = MVT::f64;
8912 }
8913
8914 // Floating-point type properties.
8915 EVT ScalarFloatVT = OperandVT.getScalarType();
8916 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8917 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8918 bool IsF80 = (ScalarFloatVT == MVT::f80);
8919
8920 // Some checks can be implemented using float comparisons, if floating point
8921 // exceptions are ignored.
8922 if (Flags.hasNoFPExcept() &&
8924 FPClassTest FPTestMask = Test;
8925 bool IsInvertedFP = false;
8926
8927 if (FPClassTest InvertedFPCheck =
8928 invertFPClassTestIfSimpler(FPTestMask, true)) {
8929 FPTestMask = InvertedFPCheck;
8930 IsInvertedFP = true;
8931 }
8932
8933 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8934 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8935
8936 // See if we can fold an | fcNan into an unordered compare.
8937 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8938
8939 // Can't fold the ordered check if we're only testing for snan or qnan
8940 // individually.
8941 if ((FPTestMask & fcNan) != fcNan)
8942 OrderedFPTestMask = FPTestMask;
8943
8944 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8945
8946 if (std::optional<bool> IsCmp0 =
8947 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8948 IsCmp0 && (isCondCodeLegalOrCustom(
8949 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8950 OperandVT.getScalarType().getSimpleVT()))) {
8951
8952 // If denormals could be implicitly treated as 0, this is not equivalent
8953 // to a compare with 0 since it will also be true for denormals.
8954 return DAG.getSetCC(DL, ResultVT, Op,
8955 DAG.getConstantFP(0.0, DL, OperandVT),
8956 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8957 }
8958
8959 if (FPTestMask == fcNan &&
8961 OperandVT.getScalarType().getSimpleVT()))
8962 return DAG.getSetCC(DL, ResultVT, Op, Op,
8963 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8964
8965 bool IsOrderedInf = FPTestMask == fcInf;
8966 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8967 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8968 : UnorderedCmpOpcode,
8969 OperandVT.getScalarType().getSimpleVT()) &&
8970 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8972 (OperandVT.isVector() &&
8974 // isinf(x) --> fabs(x) == inf
8975 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8976 SDValue Inf =
8977 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8978 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8979 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8980 }
8981
8982 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8983 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8984 : UnorderedCmpOpcode,
8985 OperandVT.getSimpleVT())) {
8986 // isposinf(x) --> x == inf
8987 // isneginf(x) --> x == -inf
8988 // isposinf(x) || nan --> x u== inf
8989 // isneginf(x) || nan --> x u== -inf
8990
8991 SDValue Inf = DAG.getConstantFP(
8992 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8993 OperandVT);
8994 return DAG.getSetCC(DL, ResultVT, Op, Inf,
8995 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8996 }
8997
8998 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8999 // TODO: Could handle ordered case, but it produces worse code for
9000 // x86. Maybe handle ordered if fabs is free?
9001
9002 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9003 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9004
9005 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9006 OperandVT.getScalarType().getSimpleVT())) {
9007 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9008
9009 // TODO: Maybe only makes sense if fabs is free. Integer test of
9010 // exponent bits seems better for x86.
9011 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9012 SDValue SmallestNormal = DAG.getConstantFP(
9013 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9014 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9015 IsOrdered ? OrderedOp : UnorderedOp);
9016 }
9017 }
9018
9019 if (FPTestMask == fcNormal) {
9020 // TODO: Handle unordered
9021 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9022 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9023
9024 if (isCondCodeLegalOrCustom(IsFiniteOp,
9025 OperandVT.getScalarType().getSimpleVT()) &&
9026 isCondCodeLegalOrCustom(IsNormalOp,
9027 OperandVT.getScalarType().getSimpleVT()) &&
9028 isFAbsFree(OperandVT)) {
9029 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9030 SDValue Inf =
9031 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9032 SDValue SmallestNormal = DAG.getConstantFP(
9033 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9034
9035 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9036 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9037 SDValue IsNormal =
9038 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9039 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9040 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9041 }
9042 }
9043 }
9044
9045 // Some checks may be represented as inversion of simpler check, for example
9046 // "inf|normal|subnormal|zero" => !"nan".
9047 bool IsInverted = false;
9048
9049 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9050 Test = InvertedCheck;
9051 IsInverted = true;
9052 }
9053
9054 // In the general case use integer operations.
9055 unsigned BitSize = OperandVT.getScalarSizeInBits();
9056 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9057 if (OperandVT.isVector())
9058 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9059 OperandVT.getVectorElementCount());
9060 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9061
9062 // Various masks.
9063 APInt SignBit = APInt::getSignMask(BitSize);
9064 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9065 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9066 const unsigned ExplicitIntBitInF80 = 63;
9067 APInt ExpMask = Inf;
9068 if (IsF80)
9069 ExpMask.clearBit(ExplicitIntBitInF80);
9070 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9071 APInt QNaNBitMask =
9072 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9073 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9074
9075 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9076 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9077 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9078 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9079 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9080 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9081
9082 SDValue Res;
9083 const auto appendResult = [&](SDValue PartialRes) {
9084 if (PartialRes) {
9085 if (Res)
9086 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9087 else
9088 Res = PartialRes;
9089 }
9090 };
9091
9092 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9093 const auto getIntBitIsSet = [&]() -> SDValue {
9094 if (!IntBitIsSetV) {
9095 APInt IntBitMask(BitSize, 0);
9096 IntBitMask.setBit(ExplicitIntBitInF80);
9097 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9098 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9099 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9100 }
9101 return IntBitIsSetV;
9102 };
9103
9104 // Split the value into sign bit and absolute value.
9105 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9106 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9107 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9108
9109 // Tests that involve more than one class should be processed first.
9110 SDValue PartialRes;
9111
9112 if (IsF80)
9113 ; // Detect finite numbers of f80 by checking individual classes because
9114 // they have different settings of the explicit integer bit.
9115 else if ((Test & fcFinite) == fcFinite) {
9116 // finite(V) ==> abs(V) < exp_mask
9117 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9118 Test &= ~fcFinite;
9119 } else if ((Test & fcFinite) == fcPosFinite) {
9120 // finite(V) && V > 0 ==> V < exp_mask
9121 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9122 Test &= ~fcPosFinite;
9123 } else if ((Test & fcFinite) == fcNegFinite) {
9124 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9125 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9126 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9127 Test &= ~fcNegFinite;
9128 }
9129 appendResult(PartialRes);
9130
9131 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9132 // fcZero | fcSubnormal => test all exponent bits are 0
9133 // TODO: Handle sign bit specific cases
9134 if (PartialCheck == (fcZero | fcSubnormal)) {
9135 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9136 SDValue ExpIsZero =
9137 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9138 appendResult(ExpIsZero);
9139 Test &= ~PartialCheck & fcAllFlags;
9140 }
9141 }
9142
9143 // Check for individual classes.
9144
9145 if (unsigned PartialCheck = Test & fcZero) {
9146 if (PartialCheck == fcPosZero)
9147 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9148 else if (PartialCheck == fcZero)
9149 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9150 else // ISD::fcNegZero
9151 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9152 appendResult(PartialRes);
9153 }
9154
9155 if (unsigned PartialCheck = Test & fcSubnormal) {
9156 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9157 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9158 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9159 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9160 SDValue VMinusOneV =
9161 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9162 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9163 if (PartialCheck == fcNegSubnormal)
9164 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9165 appendResult(PartialRes);
9166 }
9167
9168 if (unsigned PartialCheck = Test & fcInf) {
9169 if (PartialCheck == fcPosInf)
9170 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9171 else if (PartialCheck == fcInf)
9172 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9173 else { // ISD::fcNegInf
9174 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9175 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9176 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9177 }
9178 appendResult(PartialRes);
9179 }
9180
9181 if (unsigned PartialCheck = Test & fcNan) {
9182 APInt InfWithQnanBit = Inf | QNaNBitMask;
9183 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9184 if (PartialCheck == fcNan) {
9185 // isnan(V) ==> abs(V) > int(inf)
9186 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9187 if (IsF80) {
9188 // Recognize unsupported values as NaNs for compatibility with glibc.
9189 // In them (exp(V)==0) == int_bit.
9190 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9191 SDValue ExpIsZero =
9192 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9193 SDValue IsPseudo =
9194 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9195 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9196 }
9197 } else if (PartialCheck == fcQNan) {
9198 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9199 PartialRes =
9200 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9201 } else { // ISD::fcSNan
9202 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9203 // abs(V) < (unsigned(Inf) | quiet_bit)
9204 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9205 SDValue IsNotQnan =
9206 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9207 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9208 }
9209 appendResult(PartialRes);
9210 }
9211
9212 if (unsigned PartialCheck = Test & fcNormal) {
9213 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9214 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9215 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9216 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9217 APInt ExpLimit = ExpMask - ExpLSB;
9218 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9219 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9220 if (PartialCheck == fcNegNormal)
9221 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9222 else if (PartialCheck == fcPosNormal) {
9223 SDValue PosSignV =
9224 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9225 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9226 }
9227 if (IsF80)
9228 PartialRes =
9229 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9230 appendResult(PartialRes);
9231 }
9232
9233 if (!Res)
9234 return DAG.getConstant(IsInverted, DL, ResultVT);
9235 if (IsInverted)
9236 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9237 return Res;
9238}
9239
9240// Only expand vector types if we have the appropriate vector bit operations.
9241static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9242 assert(VT.isVector() && "Expected vector type");
9243 unsigned Len = VT.getScalarSizeInBits();
9244 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9247 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9249}
9250
9252 SDLoc dl(Node);
9253 EVT VT = Node->getValueType(0);
9254 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9255 SDValue Op = Node->getOperand(0);
9256 unsigned Len = VT.getScalarSizeInBits();
9257 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9258
9259 // TODO: Add support for irregular type lengths.
9260 if (!(Len <= 128 && Len % 8 == 0))
9261 return SDValue();
9262
9263 // Only expand vector types if we have the appropriate vector bit operations.
9264 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9265 return SDValue();
9266
9267 // This is the "best" algorithm from
9268 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9269 SDValue Mask55 =
9270 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9271 SDValue Mask33 =
9272 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9273 SDValue Mask0F =
9274 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9275
9276 // v = v - ((v >> 1) & 0x55555555...)
9277 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9278 DAG.getNode(ISD::AND, dl, VT,
9279 DAG.getNode(ISD::SRL, dl, VT, Op,
9280 DAG.getConstant(1, dl, ShVT)),
9281 Mask55));
9282 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9283 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9284 DAG.getNode(ISD::AND, dl, VT,
9285 DAG.getNode(ISD::SRL, dl, VT, Op,
9286 DAG.getConstant(2, dl, ShVT)),
9287 Mask33));
9288 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9289 Op = DAG.getNode(ISD::AND, dl, VT,
9290 DAG.getNode(ISD::ADD, dl, VT, Op,
9291 DAG.getNode(ISD::SRL, dl, VT, Op,
9292 DAG.getConstant(4, dl, ShVT))),
9293 Mask0F);
9294
9295 if (Len <= 8)
9296 return Op;
9297
9298 // Avoid the multiply if we only have 2 bytes to add.
9299 // TODO: Only doing this for scalars because vectors weren't as obviously
9300 // improved.
9301 if (Len == 16 && !VT.isVector()) {
9302 // v = (v + (v >> 8)) & 0x00FF;
9303 return DAG.getNode(ISD::AND, dl, VT,
9304 DAG.getNode(ISD::ADD, dl, VT, Op,
9305 DAG.getNode(ISD::SRL, dl, VT, Op,
9306 DAG.getConstant(8, dl, ShVT))),
9307 DAG.getConstant(0xFF, dl, VT));
9308 }
9309
9310 // v = (v * 0x01010101...) >> (Len - 8)
9311 SDValue V;
9314 SDValue Mask01 =
9315 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9316 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9317 } else {
9318 V = Op;
9319 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9320 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9321 V = DAG.getNode(ISD::ADD, dl, VT, V,
9322 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9323 }
9324 }
9325 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9326}
9327
9329 SDLoc dl(Node);
9330 EVT VT = Node->getValueType(0);
9331 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9332 SDValue Op = Node->getOperand(0);
9333 SDValue Mask = Node->getOperand(1);
9334 SDValue VL = Node->getOperand(2);
9335 unsigned Len = VT.getScalarSizeInBits();
9336 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9337
9338 // TODO: Add support for irregular type lengths.
9339 if (!(Len <= 128 && Len % 8 == 0))
9340 return SDValue();
9341
9342 // This is same algorithm of expandCTPOP from
9343 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9344 SDValue Mask55 =
9345 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9346 SDValue Mask33 =
9347 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9348 SDValue Mask0F =
9349 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9350
9351 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9352
9353 // v = v - ((v >> 1) & 0x55555555...)
9354 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9355 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9356 DAG.getConstant(1, dl, ShVT), Mask, VL),
9357 Mask55, Mask, VL);
9358 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9359
9360 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9361 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9362 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9363 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9364 DAG.getConstant(2, dl, ShVT), Mask, VL),
9365 Mask33, Mask, VL);
9366 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9367
9368 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9369 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9370 Mask, VL),
9371 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9372 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9373
9374 if (Len <= 8)
9375 return Op;
9376
9377 // v = (v * 0x01010101...) >> (Len - 8)
9378 SDValue V;
9380 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9381 SDValue Mask01 =
9382 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9383 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9384 } else {
9385 V = Op;
9386 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9387 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9388 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9389 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9390 Mask, VL);
9391 }
9392 }
9393 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9394 Mask, VL);
9395}
9396
9398 SDLoc dl(Node);
9399 EVT VT = Node->getValueType(0);
9400 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9401 SDValue Op = Node->getOperand(0);
9402 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9403
9404 // If the non-ZERO_UNDEF version is supported we can use that instead.
9405 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9407 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9408
9409 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9411 EVT SetCCVT =
9412 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9413 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9414 SDValue Zero = DAG.getConstant(0, dl, VT);
9415 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9416 return DAG.getSelect(dl, VT, SrcIsZero,
9417 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9418 }
9419
9420 // Only expand vector types if we have the appropriate vector bit operations.
9421 // This includes the operations needed to expand CTPOP if it isn't supported.
9422 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9424 !canExpandVectorCTPOP(*this, VT)) ||
9427 return SDValue();
9428
9429 // for now, we do this:
9430 // x = x | (x >> 1);
9431 // x = x | (x >> 2);
9432 // ...
9433 // x = x | (x >>16);
9434 // x = x | (x >>32); // for 64-bit input
9435 // return popcount(~x);
9436 //
9437 // Ref: "Hacker's Delight" by Henry Warren
9438 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9439 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9440 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9441 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9442 }
9443 Op = DAG.getNOT(dl, Op, VT);
9444 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9445}
9446
9448 SDLoc dl(Node);
9449 EVT VT = Node->getValueType(0);
9450 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9451 SDValue Op = Node->getOperand(0);
9452 SDValue Mask = Node->getOperand(1);
9453 SDValue VL = Node->getOperand(2);
9454 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9455
9456 // do this:
9457 // x = x | (x >> 1);
9458 // x = x | (x >> 2);
9459 // ...
9460 // x = x | (x >>16);
9461 // x = x | (x >>32); // for 64-bit input
9462 // return popcount(~x);
9463 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9464 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9465 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9466 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9467 VL);
9468 }
9469 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9470 Mask, VL);
9471 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9472}
9473
9475 const SDLoc &DL, EVT VT, SDValue Op,
9476 unsigned BitWidth) const {
9477 if (BitWidth != 32 && BitWidth != 64)
9478 return SDValue();
9479 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9480 : APInt(64, 0x0218A392CD3D5DBFULL);
9481 const DataLayout &TD = DAG.getDataLayout();
9482 MachinePointerInfo PtrInfo =
9484 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9485 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9486 SDValue Lookup = DAG.getNode(
9487 ISD::SRL, DL, VT,
9488 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9489 DAG.getConstant(DeBruijn, DL, VT)),
9490 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9492
9494 for (unsigned i = 0; i < BitWidth; i++) {
9495 APInt Shl = DeBruijn.shl(i);
9496 APInt Lshr = Shl.lshr(ShiftAmt);
9497 Table[Lshr.getZExtValue()] = i;
9498 }
9499
9500 // Create a ConstantArray in Constant Pool
9501 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9502 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9503 TD.getPrefTypeAlign(CA->getType()));
9504 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9505 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9506 PtrInfo, MVT::i8);
9507 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9508 return ExtLoad;
9509
9510 EVT SetCCVT =
9511 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9512 SDValue Zero = DAG.getConstant(0, DL, VT);
9513 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9514 return DAG.getSelect(DL, VT, SrcIsZero,
9515 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9516}
9517
9519 SDLoc dl(Node);
9520 EVT VT = Node->getValueType(0);
9521 SDValue Op = Node->getOperand(0);
9522 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9523
9524 // If the non-ZERO_UNDEF version is supported we can use that instead.
9525 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9527 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9528
9529 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9531 EVT SetCCVT =
9532 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9533 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9534 SDValue Zero = DAG.getConstant(0, dl, VT);
9535 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9536 return DAG.getSelect(dl, VT, SrcIsZero,
9537 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9538 }
9539
9540 // Only expand vector types if we have the appropriate vector bit operations.
9541 // This includes the operations needed to expand CTPOP if it isn't supported.
9542 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9545 !canExpandVectorCTPOP(*this, VT)) ||
9549 return SDValue();
9550
9551 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9552 // to be expanded or converted to a libcall.
9555 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9556 return V;
9557
9558 // for now, we use: { return popcount(~x & (x - 1)); }
9559 // unless the target has ctlz but not ctpop, in which case we use:
9560 // { return 32 - nlz(~x & (x-1)); }
9561 // Ref: "Hacker's Delight" by Henry Warren
9562 SDValue Tmp = DAG.getNode(
9563 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9564 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9565
9566 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9568 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9569 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9570 }
9571
9572 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9573}
9574
9576 SDValue Op = Node->getOperand(0);
9577 SDValue Mask = Node->getOperand(1);
9578 SDValue VL = Node->getOperand(2);
9579 SDLoc dl(Node);
9580 EVT VT = Node->getValueType(0);
9581
9582 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9583 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9584 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9585 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9586 DAG.getConstant(1, dl, VT), Mask, VL);
9587 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9588 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9589}
9590
9592 SelectionDAG &DAG) const {
9593 // %cond = to_bool_vec %source
9594 // %splat = splat /*val=*/VL
9595 // %tz = step_vector
9596 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9597 // %r = vp.reduce.umin %v
9598 SDLoc DL(N);
9599 SDValue Source = N->getOperand(0);
9600 SDValue Mask = N->getOperand(1);
9601 SDValue EVL = N->getOperand(2);
9602 EVT SrcVT = Source.getValueType();
9603 EVT ResVT = N->getValueType(0);
9604 EVT ResVecVT =
9605 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9606
9607 // Convert to boolean vector.
9608 if (SrcVT.getScalarType() != MVT::i1) {
9609 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9610 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9611 SrcVT.getVectorElementCount());
9612 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9613 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9614 }
9615
9616 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9617 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9618 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9619 SDValue Select =
9620 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9621 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9622}
9623
9625 SelectionDAG &DAG) const {
9626 SDLoc DL(N);
9627 SDValue Mask = N->getOperand(0);
9628 EVT MaskVT = Mask.getValueType();
9629 EVT BoolVT = MaskVT.getScalarType();
9630
9631 // Find a suitable type for a stepvector.
9632 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9633 if (MaskVT.isScalableVector())
9634 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9635 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9636 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9637 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9638 /*ZeroIsPoison=*/true, &VScaleRange);
9639 EVT StepVT = MVT::getIntegerVT(EltWidth);
9640 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9641
9642 // If promotion is required to make the type legal, do it here; promotion
9643 // of integers within LegalizeVectorOps is looking for types of the same
9644 // size but with a smaller number of larger elements, not the usual larger
9645 // size with the same number of larger elements.
9646 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9648 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9649 StepVT = StepVecVT.getVectorElementType();
9650 }
9651
9652 // Zero out lanes with inactive elements, then find the highest remaining
9653 // value from the stepvector.
9654 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9655 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9656 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9657 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9658 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9659}
9660
9662 bool IsNegative) const {
9663 SDLoc dl(N);
9664 EVT VT = N->getValueType(0);
9665 SDValue Op = N->getOperand(0);
9666
9667 // abs(x) -> smax(x,sub(0,x))
9668 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9670 SDValue Zero = DAG.getConstant(0, dl, VT);
9671 Op = DAG.getFreeze(Op);
9672 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9673 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9674 }
9675
9676 // abs(x) -> umin(x,sub(0,x))
9677 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9679 SDValue Zero = DAG.getConstant(0, dl, VT);
9680 Op = DAG.getFreeze(Op);
9681 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9682 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9683 }
9684
9685 // 0 - abs(x) -> smin(x, sub(0,x))
9686 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9688 SDValue Zero = DAG.getConstant(0, dl, VT);
9689 Op = DAG.getFreeze(Op);
9690 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9691 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9692 }
9693
9694 // Only expand vector types if we have the appropriate vector operations.
9695 if (VT.isVector() &&
9697 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9698 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9700 return SDValue();
9701
9702 Op = DAG.getFreeze(Op);
9703 SDValue Shift = DAG.getNode(
9704 ISD::SRA, dl, VT, Op,
9705 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9706 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9707
9708 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9709 if (!IsNegative)
9710 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9711
9712 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9713 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9714}
9715
9717 SDLoc dl(N);
9718 EVT VT = N->getValueType(0);
9719 SDValue LHS = N->getOperand(0);
9720 SDValue RHS = N->getOperand(1);
9721 bool IsSigned = N->getOpcode() == ISD::ABDS;
9722
9723 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9724 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9725 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9726 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9727 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9728 LHS = DAG.getFreeze(LHS);
9729 RHS = DAG.getFreeze(RHS);
9730 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9731 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9732 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9733 }
9734
9735 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9736 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9737 LHS = DAG.getFreeze(LHS);
9738 RHS = DAG.getFreeze(RHS);
9739 return DAG.getNode(ISD::OR, dl, VT,
9740 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9741 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9742 }
9743
9744 // If the subtract doesn't overflow then just use abs(sub())
9745 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9746
9747 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9748 return DAG.getNode(ISD::ABS, dl, VT,
9749 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9750
9751 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9752 return DAG.getNode(ISD::ABS, dl, VT,
9753 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9754
9755 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9757 LHS = DAG.getFreeze(LHS);
9758 RHS = DAG.getFreeze(RHS);
9759 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9760
9761 // Branchless expansion iff cmp result is allbits:
9762 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9763 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9764 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9765 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9766 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9767 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9768 }
9769
9770 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9771 // flag if the (scalar) type is illegal as this is more likely to legalize
9772 // cleanly:
9773 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9774 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9775 SDValue USubO =
9776 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9777 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9778 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9779 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9780 }
9781
9782 // FIXME: Should really try to split the vector in case it's legal on a
9783 // subvector.
9785 return DAG.UnrollVectorOp(N);
9786
9787 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9788 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9789 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9790 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9791}
9792
9794 SDLoc dl(N);
9795 EVT VT = N->getValueType(0);
9796 SDValue LHS = N->getOperand(0);
9797 SDValue RHS = N->getOperand(1);
9798
9799 unsigned Opc = N->getOpcode();
9800 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9801 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9802 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9803 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9804 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9805 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9807 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9808 "Unknown AVG node");
9809
9810 // If the operands are already extended, we can add+shift.
9811 bool IsExt =
9812 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9813 DAG.ComputeNumSignBits(RHS) >= 2) ||
9814 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9815 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9816 if (IsExt) {
9817 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9818 if (!IsFloor)
9819 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9820 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9821 DAG.getShiftAmountConstant(1, VT, dl));
9822 }
9823
9824 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9825 if (VT.isScalarInteger()) {
9826 unsigned BW = VT.getScalarSizeInBits();
9827 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9828 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9829 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9830 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9831 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9832 if (!IsFloor)
9833 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9834 DAG.getConstant(1, dl, ExtVT));
9835 // Just use SRL as we will be truncating away the extended sign bits.
9836 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9837 DAG.getShiftAmountConstant(1, ExtVT, dl));
9838 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9839 }
9840 }
9841
9842 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9843 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9844 SDValue UAddWithOverflow =
9845 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9846
9847 SDValue Sum = UAddWithOverflow.getValue(0);
9848 SDValue Overflow = UAddWithOverflow.getValue(1);
9849
9850 // Right shift the sum by 1
9851 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9852 DAG.getShiftAmountConstant(1, VT, dl));
9853
9854 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9855 SDValue OverflowShl = DAG.getNode(
9856 ISD::SHL, dl, VT, ZeroExtOverflow,
9857 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9858
9859 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9860 }
9861
9862 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9863 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9864 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9865 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9866 LHS = DAG.getFreeze(LHS);
9867 RHS = DAG.getFreeze(RHS);
9868 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9869 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9870 SDValue Shift =
9871 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9872 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9873}
9874
9876 SDLoc dl(N);
9877 EVT VT = N->getValueType(0);
9878 SDValue Op = N->getOperand(0);
9879
9880 if (!VT.isSimple())
9881 return SDValue();
9882
9883 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9884 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9885 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9886 default:
9887 return SDValue();
9888 case MVT::i16:
9889 // Use a rotate by 8. This can be further expanded if necessary.
9890 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9891 case MVT::i32:
9892 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9893 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9894 DAG.getConstant(0xFF00, dl, VT));
9895 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9896 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9897 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9898 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9899 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9900 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9901 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9902 case MVT::i64:
9903 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9904 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9905 DAG.getConstant(255ULL<<8, dl, VT));
9906 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9907 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9908 DAG.getConstant(255ULL<<16, dl, VT));
9909 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9910 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9911 DAG.getConstant(255ULL<<24, dl, VT));
9912 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9913 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9914 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9915 DAG.getConstant(255ULL<<24, dl, VT));
9916 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9917 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9918 DAG.getConstant(255ULL<<16, dl, VT));
9919 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9920 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9921 DAG.getConstant(255ULL<<8, dl, VT));
9922 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9923 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9924 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9925 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9926 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9927 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9928 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9929 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9930 }
9931}
9932
9934 SDLoc dl(N);
9935 EVT VT = N->getValueType(0);
9936 SDValue Op = N->getOperand(0);
9937 SDValue Mask = N->getOperand(1);
9938 SDValue EVL = N->getOperand(2);
9939
9940 if (!VT.isSimple())
9941 return SDValue();
9942
9943 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9944 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9945 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9946 default:
9947 return SDValue();
9948 case MVT::i16:
9949 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9950 Mask, EVL);
9951 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9952 Mask, EVL);
9953 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9954 case MVT::i32:
9955 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9956 Mask, EVL);
9957 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9958 Mask, EVL);
9959 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9960 Mask, EVL);
9961 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9962 Mask, EVL);
9963 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9964 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9965 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9966 Mask, EVL);
9967 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9968 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9969 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9970 case MVT::i64:
9971 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9972 Mask, EVL);
9973 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9974 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9975 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9976 Mask, EVL);
9977 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9978 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9979 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9980 Mask, EVL);
9981 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9982 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9983 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9984 Mask, EVL);
9985 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9986 Mask, EVL);
9987 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9988 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9989 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9990 Mask, EVL);
9991 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9992 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9993 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9994 Mask, EVL);
9995 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9996 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9997 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9998 Mask, EVL);
9999 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10000 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10001 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10002 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10003 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10004 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10005 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10006 }
10007}
10008
10010 SDLoc dl(N);
10011 EVT VT = N->getValueType(0);
10012 SDValue Op = N->getOperand(0);
10013 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10014 unsigned Sz = VT.getScalarSizeInBits();
10015
10016 SDValue Tmp, Tmp2, Tmp3;
10017
10018 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10019 // and finally the i1 pairs.
10020 // TODO: We can easily support i4/i2 legal types if any target ever does.
10021 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10022 // Create the masks - repeating the pattern every byte.
10023 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10024 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10025 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10026
10027 // BSWAP if the type is wider than a single byte.
10028 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10029
10030 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10031 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10032 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10033 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10034 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10035 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10036
10037 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10038 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10039 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10040 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10041 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10042 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10043
10044 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10045 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10046 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10047 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10048 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10049 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10050 return Tmp;
10051 }
10052
10053 Tmp = DAG.getConstant(0, dl, VT);
10054 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10055 if (I < J)
10056 Tmp2 =
10057 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10058 else
10059 Tmp2 =
10060 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10061
10062 APInt Shift = APInt::getOneBitSet(Sz, J);
10063 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10064 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10065 }
10066
10067 return Tmp;
10068}
10069
10071 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10072
10073 SDLoc dl(N);
10074 EVT VT = N->getValueType(0);
10075 SDValue Op = N->getOperand(0);
10076 SDValue Mask = N->getOperand(1);
10077 SDValue EVL = N->getOperand(2);
10078 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10079 unsigned Sz = VT.getScalarSizeInBits();
10080
10081 SDValue Tmp, Tmp2, Tmp3;
10082
10083 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10084 // and finally the i1 pairs.
10085 // TODO: We can easily support i4/i2 legal types if any target ever does.
10086 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10087 // Create the masks - repeating the pattern every byte.
10088 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10089 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10090 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10091
10092 // BSWAP if the type is wider than a single byte.
10093 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10094
10095 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10096 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10097 Mask, EVL);
10098 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10099 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10100 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10101 Mask, EVL);
10102 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10103 Mask, EVL);
10104 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10105
10106 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10107 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10108 Mask, EVL);
10109 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10110 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10111 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10112 Mask, EVL);
10113 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10114 Mask, EVL);
10115 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10116
10117 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10118 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10119 Mask, EVL);
10120 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10121 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10122 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10123 Mask, EVL);
10124 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10125 Mask, EVL);
10126 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10127 return Tmp;
10128 }
10129 return SDValue();
10130}
10131
10132std::pair<SDValue, SDValue>
10134 SelectionDAG &DAG) const {
10135 SDLoc SL(LD);
10136 SDValue Chain = LD->getChain();
10137 SDValue BasePTR = LD->getBasePtr();
10138 EVT SrcVT = LD->getMemoryVT();
10139 EVT DstVT = LD->getValueType(0);
10140 ISD::LoadExtType ExtType = LD->getExtensionType();
10141
10142 if (SrcVT.isScalableVector())
10143 report_fatal_error("Cannot scalarize scalable vector loads");
10144
10145 unsigned NumElem = SrcVT.getVectorNumElements();
10146
10147 EVT SrcEltVT = SrcVT.getScalarType();
10148 EVT DstEltVT = DstVT.getScalarType();
10149
10150 // A vector must always be stored in memory as-is, i.e. without any padding
10151 // between the elements, since various code depend on it, e.g. in the
10152 // handling of a bitcast of a vector type to int, which may be done with a
10153 // vector store followed by an integer load. A vector that does not have
10154 // elements that are byte-sized must therefore be stored as an integer
10155 // built out of the extracted vector elements.
10156 if (!SrcEltVT.isByteSized()) {
10157 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10158 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10159
10160 unsigned NumSrcBits = SrcVT.getSizeInBits();
10161 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10162
10163 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10164 SDValue SrcEltBitMask = DAG.getConstant(
10165 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10166
10167 // Load the whole vector and avoid masking off the top bits as it makes
10168 // the codegen worse.
10169 SDValue Load =
10170 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10171 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10172 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10173
10175 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10176 unsigned ShiftIntoIdx =
10177 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10178 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10179 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10180 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10181 SDValue Elt =
10182 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10183 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10184
10185 if (ExtType != ISD::NON_EXTLOAD) {
10186 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10187 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10188 }
10189
10190 Vals.push_back(Scalar);
10191 }
10192
10193 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10194 return std::make_pair(Value, Load.getValue(1));
10195 }
10196
10197 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10198 assert(SrcEltVT.isByteSized());
10199
10201 SmallVector<SDValue, 8> LoadChains;
10202
10203 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10204 SDValue ScalarLoad = DAG.getExtLoad(
10205 ExtType, SL, DstEltVT, Chain, BasePTR,
10206 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10207 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10208
10209 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10210
10211 Vals.push_back(ScalarLoad.getValue(0));
10212 LoadChains.push_back(ScalarLoad.getValue(1));
10213 }
10214
10215 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10216 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10217
10218 return std::make_pair(Value, NewChain);
10219}
10220
10222 SelectionDAG &DAG) const {
10223 SDLoc SL(ST);
10224
10225 SDValue Chain = ST->getChain();
10226 SDValue BasePtr = ST->getBasePtr();
10227 SDValue Value = ST->getValue();
10228 EVT StVT = ST->getMemoryVT();
10229
10230 if (StVT.isScalableVector())
10231 report_fatal_error("Cannot scalarize scalable vector stores");
10232
10233 // The type of the data we want to save
10234 EVT RegVT = Value.getValueType();
10235 EVT RegSclVT = RegVT.getScalarType();
10236
10237 // The type of data as saved in memory.
10238 EVT MemSclVT = StVT.getScalarType();
10239
10240 unsigned NumElem = StVT.getVectorNumElements();
10241
10242 // A vector must always be stored in memory as-is, i.e. without any padding
10243 // between the elements, since various code depend on it, e.g. in the
10244 // handling of a bitcast of a vector type to int, which may be done with a
10245 // vector store followed by an integer load. A vector that does not have
10246 // elements that are byte-sized must therefore be stored as an integer
10247 // built out of the extracted vector elements.
10248 if (!MemSclVT.isByteSized()) {
10249 unsigned NumBits = StVT.getSizeInBits();
10250 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10251
10252 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10253
10254 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10255 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10256 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10257 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10258 unsigned ShiftIntoIdx =
10259 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10260 SDValue ShiftAmount =
10261 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10262 SDValue ShiftedElt =
10263 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10264 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10265 }
10266
10267 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10268 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10269 ST->getAAInfo());
10270 }
10271
10272 // Store Stride in bytes
10273 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10274 assert(Stride && "Zero stride!");
10275 // Extract each of the elements from the original vector and save them into
10276 // memory individually.
10278 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10279 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10280
10281 SDValue Ptr =
10282 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10283
10284 // This scalar TruncStore may be illegal, but we legalize it later.
10285 SDValue Store = DAG.getTruncStore(
10286 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10287 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10288 ST->getAAInfo());
10289
10290 Stores.push_back(Store);
10291 }
10292
10293 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10294}
10295
10296std::pair<SDValue, SDValue>
10298 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10299 "unaligned indexed loads not implemented!");
10300 SDValue Chain = LD->getChain();
10301 SDValue Ptr = LD->getBasePtr();
10302 EVT VT = LD->getValueType(0);
10303 EVT LoadedVT = LD->getMemoryVT();
10304 SDLoc dl(LD);
10305 auto &MF = DAG.getMachineFunction();
10306
10307 if (VT.isFloatingPoint() || VT.isVector()) {
10308 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10309 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10310 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10311 LoadedVT.isVector()) {
10312 // Scalarize the load and let the individual components be handled.
10313 return scalarizeVectorLoad(LD, DAG);
10314 }
10315
10316 // Expand to a (misaligned) integer load of the same size,
10317 // then bitconvert to floating point or vector.
10318 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10319 LD->getMemOperand());
10320 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10321 if (LoadedVT != VT)
10322 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10323 ISD::ANY_EXTEND, dl, VT, Result);
10324
10325 return std::make_pair(Result, newLoad.getValue(1));
10326 }
10327
10328 // Copy the value to a (aligned) stack slot using (unaligned) integer
10329 // loads and stores, then do a (aligned) load from the stack slot.
10330 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10331 unsigned LoadedBytes = LoadedVT.getStoreSize();
10332 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10333 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10334
10335 // Make sure the stack slot is also aligned for the register type.
10336 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10337 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10339 SDValue StackPtr = StackBase;
10340 unsigned Offset = 0;
10341
10342 EVT PtrVT = Ptr.getValueType();
10343 EVT StackPtrVT = StackPtr.getValueType();
10344
10345 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10346 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10347
10348 // Do all but one copies using the full register width.
10349 for (unsigned i = 1; i < NumRegs; i++) {
10350 // Load one integer register's worth from the original location.
10351 SDValue Load = DAG.getLoad(
10352 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10353 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10354 // Follow the load with a store to the stack slot. Remember the store.
10355 Stores.push_back(DAG.getStore(
10356 Load.getValue(1), dl, Load, StackPtr,
10357 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10358 // Increment the pointers.
10359 Offset += RegBytes;
10360
10361 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10362 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10363 }
10364
10365 // The last copy may be partial. Do an extending load.
10366 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10367 8 * (LoadedBytes - Offset));
10368 SDValue Load = DAG.getExtLoad(
10369 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10370 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10371 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10372 // Follow the load with a store to the stack slot. Remember the store.
10373 // On big-endian machines this requires a truncating store to ensure
10374 // that the bits end up in the right place.
10375 Stores.push_back(DAG.getTruncStore(
10376 Load.getValue(1), dl, Load, StackPtr,
10377 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10378
10379 // The order of the stores doesn't matter - say it with a TokenFactor.
10380 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10381
10382 // Finally, perform the original load only redirected to the stack slot.
10383 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10384 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10385 LoadedVT);
10386
10387 // Callers expect a MERGE_VALUES node.
10388 return std::make_pair(Load, TF);
10389 }
10390
10391 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10392 "Unaligned load of unsupported type.");
10393
10394 // Compute the new VT that is half the size of the old one. This is an
10395 // integer MVT.
10396 unsigned NumBits = LoadedVT.getSizeInBits();
10397 EVT NewLoadedVT;
10398 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10399 NumBits >>= 1;
10400
10401 Align Alignment = LD->getBaseAlign();
10402 unsigned IncrementSize = NumBits / 8;
10403 ISD::LoadExtType HiExtType = LD->getExtensionType();
10404
10405 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10406 if (HiExtType == ISD::NON_EXTLOAD)
10407 HiExtType = ISD::ZEXTLOAD;
10408
10409 // Load the value in two parts
10410 SDValue Lo, Hi;
10411 if (DAG.getDataLayout().isLittleEndian()) {
10412 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10413 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10414 LD->getAAInfo());
10415
10416 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10417 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10418 LD->getPointerInfo().getWithOffset(IncrementSize),
10419 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10420 LD->getAAInfo());
10421 } else {
10422 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10423 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10424 LD->getAAInfo());
10425
10426 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10427 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10428 LD->getPointerInfo().getWithOffset(IncrementSize),
10429 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10430 LD->getAAInfo());
10431 }
10432
10433 // aggregate the two parts
10434 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10435 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10436 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10437
10438 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10439 Hi.getValue(1));
10440
10441 return std::make_pair(Result, TF);
10442}
10443
10445 SelectionDAG &DAG) const {
10446 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10447 "unaligned indexed stores not implemented!");
10448 SDValue Chain = ST->getChain();
10449 SDValue Ptr = ST->getBasePtr();
10450 SDValue Val = ST->getValue();
10451 EVT VT = Val.getValueType();
10452 Align Alignment = ST->getBaseAlign();
10453 auto &MF = DAG.getMachineFunction();
10454 EVT StoreMemVT = ST->getMemoryVT();
10455
10456 SDLoc dl(ST);
10457 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10458 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10459 if (isTypeLegal(intVT)) {
10460 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10461 StoreMemVT.isVector()) {
10462 // Scalarize the store and let the individual components be handled.
10463 SDValue Result = scalarizeVectorStore(ST, DAG);
10464 return Result;
10465 }
10466 // Expand to a bitconvert of the value to the integer type of the
10467 // same size, then a (misaligned) int store.
10468 // FIXME: Does not handle truncating floating point stores!
10469 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10470 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10471 Alignment, ST->getMemOperand()->getFlags());
10472 return Result;
10473 }
10474 // Do a (aligned) store to a stack slot, then copy from the stack slot
10475 // to the final destination using (unaligned) integer loads and stores.
10476 MVT RegVT = getRegisterType(
10477 *DAG.getContext(),
10478 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10479 EVT PtrVT = Ptr.getValueType();
10480 unsigned StoredBytes = StoreMemVT.getStoreSize();
10481 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10482 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10483
10484 // Make sure the stack slot is also aligned for the register type.
10485 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10486 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10487
10488 // Perform the original store, only redirected to the stack slot.
10489 SDValue Store = DAG.getTruncStore(
10490 Chain, dl, Val, StackPtr,
10491 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10492
10493 EVT StackPtrVT = StackPtr.getValueType();
10494
10495 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10496 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10498 unsigned Offset = 0;
10499
10500 // Do all but one copies using the full register width.
10501 for (unsigned i = 1; i < NumRegs; i++) {
10502 // Load one integer register's worth from the stack slot.
10503 SDValue Load = DAG.getLoad(
10504 RegVT, dl, Store, StackPtr,
10505 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10506 // Store it to the final location. Remember the store.
10507 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10508 ST->getPointerInfo().getWithOffset(Offset),
10509 ST->getBaseAlign(),
10510 ST->getMemOperand()->getFlags()));
10511 // Increment the pointers.
10512 Offset += RegBytes;
10513 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10514 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10515 }
10516
10517 // The last store may be partial. Do a truncating store. On big-endian
10518 // machines this requires an extending load from the stack slot to ensure
10519 // that the bits are in the right place.
10520 EVT LoadMemVT =
10521 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10522
10523 // Load from the stack slot.
10524 SDValue Load = DAG.getExtLoad(
10525 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10526 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10527
10528 Stores.push_back(DAG.getTruncStore(
10529 Load.getValue(1), dl, Load, Ptr,
10530 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10531 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10532 // The order of the stores doesn't matter - say it with a TokenFactor.
10533 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10534 return Result;
10535 }
10536
10537 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10538 "Unaligned store of unknown type.");
10539 // Get the half-size VT
10540 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10541 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10542 unsigned IncrementSize = NumBits / 8;
10543
10544 // Divide the stored value in two parts.
10545 SDValue ShiftAmount =
10546 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10547 SDValue Lo = Val;
10548 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10549 // fold and not use the upper bits. A smaller constant may be easier to
10550 // materialize.
10551 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10552 Lo = DAG.getNode(
10553 ISD::AND, dl, VT, Lo,
10554 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10555 VT));
10556 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10557
10558 // Store the two parts
10559 SDValue Store1, Store2;
10560 Store1 = DAG.getTruncStore(Chain, dl,
10561 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10562 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10563 ST->getMemOperand()->getFlags());
10564
10565 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10566 Store2 = DAG.getTruncStore(
10567 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10568 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10569 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10570
10571 SDValue Result =
10572 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10573 return Result;
10574}
10575
10576SDValue
10578 const SDLoc &DL, EVT DataVT,
10579 SelectionDAG &DAG,
10580 bool IsCompressedMemory) const {
10581 SDValue Increment;
10582 EVT AddrVT = Addr.getValueType();
10583 EVT MaskVT = Mask.getValueType();
10584 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10585 "Incompatible types of Data and Mask");
10586 if (IsCompressedMemory) {
10587 if (DataVT.isScalableVector())
10589 "Cannot currently handle compressed memory with scalable vectors");
10590 // Incrementing the pointer according to number of '1's in the mask.
10591 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10592 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10593 if (MaskIntVT.getSizeInBits() < 32) {
10594 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10595 MaskIntVT = MVT::i32;
10596 }
10597
10598 // Count '1's with POPCNT.
10599 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10600 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10601 // Scale is an element size in bytes.
10602 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10603 AddrVT);
10604 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10605 } else if (DataVT.isScalableVector()) {
10606 Increment = DAG.getVScale(DL, AddrVT,
10607 APInt(AddrVT.getFixedSizeInBits(),
10608 DataVT.getStoreSize().getKnownMinValue()));
10609 } else
10610 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10611
10612 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10613}
10614
10616 EVT VecVT, const SDLoc &dl,
10617 ElementCount SubEC) {
10618 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10619 "Cannot index a scalable vector within a fixed-width vector");
10620
10621 unsigned NElts = VecVT.getVectorMinNumElements();
10622 unsigned NumSubElts = SubEC.getKnownMinValue();
10623 EVT IdxVT = Idx.getValueType();
10624
10625 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10626 // If this is a constant index and we know the value plus the number of the
10627 // elements in the subvector minus one is less than the minimum number of
10628 // elements then it's safe to return Idx.
10629 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10630 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10631 return Idx;
10632 SDValue VS =
10633 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10634 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10635 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10636 DAG.getConstant(NumSubElts, dl, IdxVT));
10637 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10638 }
10639 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10640 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10641 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10642 DAG.getConstant(Imm, dl, IdxVT));
10643 }
10644 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10645 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10646 DAG.getConstant(MaxIndex, dl, IdxVT));
10647}
10648
10650 SDValue VecPtr, EVT VecVT,
10651 SDValue Index) const {
10653 DAG, VecPtr, VecVT,
10655 Index);
10656}
10657
10659 SDValue VecPtr, EVT VecVT,
10660 EVT SubVecVT,
10661 SDValue Index) const {
10662 SDLoc dl(Index);
10663 // Make sure the index type is big enough to compute in.
10664 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10665
10666 EVT EltVT = VecVT.getVectorElementType();
10667
10668 // Calculate the element offset and add it to the pointer.
10669 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10670 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10671 "Converting bits to bytes lost precision");
10672 assert(SubVecVT.getVectorElementType() == EltVT &&
10673 "Sub-vector must be a vector with matching element type");
10674 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10675 SubVecVT.getVectorElementCount());
10676
10677 EVT IdxVT = Index.getValueType();
10678 if (SubVecVT.isScalableVector())
10679 Index =
10680 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10681 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10682
10683 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10684 DAG.getConstant(EltSize, dl, IdxVT));
10685 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10686}
10687
10688//===----------------------------------------------------------------------===//
10689// Implementation of Emulated TLS Model
10690//===----------------------------------------------------------------------===//
10691
10693 SelectionDAG &DAG) const {
10694 // Access to address of TLS varialbe xyz is lowered to a function call:
10695 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10696 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10697 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10698 SDLoc dl(GA);
10699
10700 ArgListTy Args;
10701 const GlobalValue *GV =
10703 SmallString<32> NameString("__emutls_v.");
10704 NameString += GV->getName();
10705 StringRef EmuTlsVarName(NameString);
10706 const GlobalVariable *EmuTlsVar =
10707 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10708 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10709 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10710
10711 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10712
10714 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10715 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10716 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10717
10718 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10719 // At last for X86 targets, maybe good for other targets too?
10721 MFI.setAdjustsStack(true); // Is this only for X86 target?
10722 MFI.setHasCalls(true);
10723
10724 assert((GA->getOffset() == 0) &&
10725 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10726 return CallResult.first;
10727}
10728
10730 SelectionDAG &DAG) const {
10731 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10732 if (!isCtlzFast())
10733 return SDValue();
10734 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10735 SDLoc dl(Op);
10736 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10737 EVT VT = Op.getOperand(0).getValueType();
10738 SDValue Zext = Op.getOperand(0);
10739 if (VT.bitsLT(MVT::i32)) {
10740 VT = MVT::i32;
10741 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10742 }
10743 unsigned Log2b = Log2_32(VT.getSizeInBits());
10744 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10745 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10746 DAG.getConstant(Log2b, dl, MVT::i32));
10747 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10748 }
10749 return SDValue();
10750}
10751
10753 SDValue Op0 = Node->getOperand(0);
10754 SDValue Op1 = Node->getOperand(1);
10755 EVT VT = Op0.getValueType();
10756 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10757 unsigned Opcode = Node->getOpcode();
10758 SDLoc DL(Node);
10759
10760 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10761 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10763 Op0 = DAG.getFreeze(Op0);
10764 SDValue Zero = DAG.getConstant(0, DL, VT);
10765 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10766 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10767 }
10768
10769 // umin(x,y) -> sub(x,usubsat(x,y))
10770 // TODO: Missing freeze(Op0)?
10771 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10773 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10774 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10775 }
10776
10777 // umax(x,y) -> add(x,usubsat(y,x))
10778 // TODO: Missing freeze(Op0)?
10779 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10781 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10782 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10783 }
10784
10785 // FIXME: Should really try to split the vector in case it's legal on a
10786 // subvector.
10788 return DAG.UnrollVectorOp(Node);
10789
10790 // Attempt to find an existing SETCC node that we can reuse.
10791 // TODO: Do we need a generic doesSETCCNodeExist?
10792 // TODO: Missing freeze(Op0)/freeze(Op1)?
10793 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10794 ISD::CondCode PrefCommuteCC,
10795 ISD::CondCode AltCommuteCC) {
10796 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10797 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10798 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10799 {Op0, Op1, DAG.getCondCode(CC)})) {
10800 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10801 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10802 }
10803 }
10804 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10805 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10806 {Op0, Op1, DAG.getCondCode(CC)})) {
10807 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10808 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10809 }
10810 }
10811 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10812 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10813 };
10814
10815 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10816 // -> Y = (A < B) ? B : A
10817 // -> Y = (A >= B) ? A : B
10818 // -> Y = (A <= B) ? B : A
10819 switch (Opcode) {
10820 case ISD::SMAX:
10821 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10822 case ISD::SMIN:
10823 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10824 case ISD::UMAX:
10825 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10826 case ISD::UMIN:
10827 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10828 }
10829
10830 llvm_unreachable("How did we get here?");
10831}
10832
10834 unsigned Opcode = Node->getOpcode();
10835 SDValue LHS = Node->getOperand(0);
10836 SDValue RHS = Node->getOperand(1);
10837 EVT VT = LHS.getValueType();
10838 SDLoc dl(Node);
10839
10840 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10841 assert(VT.isInteger() && "Expected operands to be integers");
10842
10843 // usub.sat(a, b) -> umax(a, b) - b
10844 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10845 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10846 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10847 }
10848
10849 // uadd.sat(a, b) -> umin(a, ~b) + b
10850 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10851 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10852 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10853 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10854 }
10855
10856 unsigned OverflowOp;
10857 switch (Opcode) {
10858 case ISD::SADDSAT:
10859 OverflowOp = ISD::SADDO;
10860 break;
10861 case ISD::UADDSAT:
10862 OverflowOp = ISD::UADDO;
10863 break;
10864 case ISD::SSUBSAT:
10865 OverflowOp = ISD::SSUBO;
10866 break;
10867 case ISD::USUBSAT:
10868 OverflowOp = ISD::USUBO;
10869 break;
10870 default:
10871 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10872 "addition or subtraction node.");
10873 }
10874
10875 // FIXME: Should really try to split the vector in case it's legal on a
10876 // subvector.
10878 return DAG.UnrollVectorOp(Node);
10879
10880 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10881 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10882 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10883 SDValue SumDiff = Result.getValue(0);
10884 SDValue Overflow = Result.getValue(1);
10885 SDValue Zero = DAG.getConstant(0, dl, VT);
10886 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10887
10888 if (Opcode == ISD::UADDSAT) {
10890 // (LHS + RHS) | OverflowMask
10891 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10892 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10893 }
10894 // Overflow ? 0xffff.... : (LHS + RHS)
10895 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10896 }
10897
10898 if (Opcode == ISD::USUBSAT) {
10900 // (LHS - RHS) & ~OverflowMask
10901 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10902 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10903 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10904 }
10905 // Overflow ? 0 : (LHS - RHS)
10906 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10907 }
10908
10909 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10912
10913 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10914 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10915
10916 // If either of the operand signs are known, then they are guaranteed to
10917 // only saturate in one direction. If non-negative they will saturate
10918 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10919 //
10920 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10921 // sign of 'y' has to be flipped.
10922
10923 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10924 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10925 : KnownRHS.isNegative();
10926 if (LHSIsNonNegative || RHSIsNonNegative) {
10927 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10928 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10929 }
10930
10931 bool LHSIsNegative = KnownLHS.isNegative();
10932 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10933 : KnownRHS.isNonNegative();
10934 if (LHSIsNegative || RHSIsNegative) {
10935 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10936 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10937 }
10938 }
10939
10940 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10942 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10943 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10944 DAG.getConstant(BitWidth - 1, dl, VT));
10945 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10946 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10947}
10948
10950 unsigned Opcode = Node->getOpcode();
10951 SDValue LHS = Node->getOperand(0);
10952 SDValue RHS = Node->getOperand(1);
10953 EVT VT = LHS.getValueType();
10954 EVT ResVT = Node->getValueType(0);
10955 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10956 SDLoc dl(Node);
10957
10958 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10959 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10960 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10961 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10962
10963 // We can't perform arithmetic on i1 values. Extending them would
10964 // probably result in worse codegen, so let's just use two selects instead.
10965 // Some targets are also just better off using selects rather than subtraction
10966 // because one of the conditions can be merged with one of the selects.
10967 // And finally, if we don't know the contents of high bits of a boolean value
10968 // we can't perform any arithmetic either.
10969 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10971 SDValue SelectZeroOrOne =
10972 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10973 DAG.getConstant(0, dl, ResVT));
10974 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10975 SelectZeroOrOne);
10976 }
10977
10979 std::swap(IsGT, IsLT);
10980 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10981 ResVT);
10982}
10983
10985 unsigned Opcode = Node->getOpcode();
10986 bool IsSigned = Opcode == ISD::SSHLSAT;
10987 SDValue LHS = Node->getOperand(0);
10988 SDValue RHS = Node->getOperand(1);
10989 EVT VT = LHS.getValueType();
10990 SDLoc dl(Node);
10991
10992 assert((Node->getOpcode() == ISD::SSHLSAT ||
10993 Node->getOpcode() == ISD::USHLSAT) &&
10994 "Expected a SHLSAT opcode");
10995 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10996 assert(VT.isInteger() && "Expected operands to be integers");
10997
10999 return DAG.UnrollVectorOp(Node);
11000
11001 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11002
11003 unsigned BW = VT.getScalarSizeInBits();
11004 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11005 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11006 SDValue Orig =
11007 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11008
11009 SDValue SatVal;
11010 if (IsSigned) {
11011 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11012 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11013 SDValue Cond =
11014 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11015 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11016 } else {
11017 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11018 }
11019 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11020 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11021}
11022
11024 bool Signed, SDValue &Lo, SDValue &Hi,
11025 SDValue LHS, SDValue RHS,
11026 SDValue HiLHS, SDValue HiRHS) const {
11027 EVT VT = LHS.getValueType();
11028 assert(RHS.getValueType() == VT && "Mismatching operand types");
11029
11030 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11031 assert((!Signed || !HiLHS) &&
11032 "Signed flag should only be set when HiLHS and RiRHS are null");
11033
11034 // We'll expand the multiplication by brute force because we have no other
11035 // options. This is a trivially-generalized version of the code from
11036 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11037 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11038 // sign bits while calculating the Hi half.
11039 unsigned Bits = VT.getSizeInBits();
11040 unsigned HalfBits = Bits / 2;
11041 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11042 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11043 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11044
11045 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11046 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11047
11048 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11049 // This is always an unsigned shift.
11050 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11051
11052 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11053 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11054 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11055
11056 SDValue U =
11057 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11058 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11059 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11060
11061 SDValue V =
11062 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11063 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11064
11065 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11066 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11067
11068 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11069 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11070
11071 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11072 // the products to Hi.
11073 if (HiLHS) {
11074 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11075 DAG.getNode(ISD::ADD, dl, VT,
11076 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11077 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11078 }
11079}
11080
11082 bool Signed, const SDValue LHS,
11083 const SDValue RHS, SDValue &Lo,
11084 SDValue &Hi) const {
11085 EVT VT = LHS.getValueType();
11086 assert(RHS.getValueType() == VT && "Mismatching operand types");
11087 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11088 // We can fall back to a libcall with an illegal type for the MUL if we
11089 // have a libcall big enough.
11090 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11091 if (WideVT == MVT::i16)
11092 LC = RTLIB::MUL_I16;
11093 else if (WideVT == MVT::i32)
11094 LC = RTLIB::MUL_I32;
11095 else if (WideVT == MVT::i64)
11096 LC = RTLIB::MUL_I64;
11097 else if (WideVT == MVT::i128)
11098 LC = RTLIB::MUL_I128;
11099
11100 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11101 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11102 return;
11103 }
11104
11105 SDValue HiLHS, HiRHS;
11106 if (Signed) {
11107 // The high part is obtained by SRA'ing all but one of the bits of low
11108 // part.
11109 unsigned LoSize = VT.getFixedSizeInBits();
11110 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11111 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11112 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11113 } else {
11114 HiLHS = DAG.getConstant(0, dl, VT);
11115 HiRHS = DAG.getConstant(0, dl, VT);
11116 }
11117
11118 // Attempt a libcall.
11119 SDValue Ret;
11121 CallOptions.setIsSigned(Signed);
11122 CallOptions.setIsPostTypeLegalization(true);
11124 // Halves of WideVT are packed into registers in different order
11125 // depending on platform endianness. This is usually handled by
11126 // the C calling convention, but we can't defer to it in
11127 // the legalizer.
11128 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11129 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11130 } else {
11131 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11132 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11133 }
11134 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11135 "Ret value is a collection of constituent nodes holding result.");
11136 if (DAG.getDataLayout().isLittleEndian()) {
11137 // Same as above.
11138 Lo = Ret.getOperand(0);
11139 Hi = Ret.getOperand(1);
11140 } else {
11141 Lo = Ret.getOperand(1);
11142 Hi = Ret.getOperand(0);
11143 }
11144}
11145
11146SDValue
11148 assert((Node->getOpcode() == ISD::SMULFIX ||
11149 Node->getOpcode() == ISD::UMULFIX ||
11150 Node->getOpcode() == ISD::SMULFIXSAT ||
11151 Node->getOpcode() == ISD::UMULFIXSAT) &&
11152 "Expected a fixed point multiplication opcode");
11153
11154 SDLoc dl(Node);
11155 SDValue LHS = Node->getOperand(0);
11156 SDValue RHS = Node->getOperand(1);
11157 EVT VT = LHS.getValueType();
11158 unsigned Scale = Node->getConstantOperandVal(2);
11159 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11160 Node->getOpcode() == ISD::UMULFIXSAT);
11161 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11162 Node->getOpcode() == ISD::SMULFIXSAT);
11163 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11164 unsigned VTSize = VT.getScalarSizeInBits();
11165
11166 if (!Scale) {
11167 // [us]mul.fix(a, b, 0) -> mul(a, b)
11168 if (!Saturating) {
11170 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11171 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11172 SDValue Result =
11173 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11174 SDValue Product = Result.getValue(0);
11175 SDValue Overflow = Result.getValue(1);
11176 SDValue Zero = DAG.getConstant(0, dl, VT);
11177
11178 APInt MinVal = APInt::getSignedMinValue(VTSize);
11179 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11180 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11181 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11182 // Xor the inputs, if resulting sign bit is 0 the product will be
11183 // positive, else negative.
11184 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11185 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11186 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11187 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11188 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11189 SDValue Result =
11190 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11191 SDValue Product = Result.getValue(0);
11192 SDValue Overflow = Result.getValue(1);
11193
11194 APInt MaxVal = APInt::getMaxValue(VTSize);
11195 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11196 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11197 }
11198 }
11199
11200 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11201 "Expected scale to be less than the number of bits if signed or at "
11202 "most the number of bits if unsigned.");
11203 assert(LHS.getValueType() == RHS.getValueType() &&
11204 "Expected both operands to be the same type");
11205
11206 // Get the upper and lower bits of the result.
11207 SDValue Lo, Hi;
11208 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11209 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11210 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11211 if (VT.isVector())
11212 WideVT =
11214 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11215 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11216 Lo = Result.getValue(0);
11217 Hi = Result.getValue(1);
11218 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11219 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11220 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11221 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11222 // Try for a multiplication using a wider type.
11223 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11224 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11225 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11226 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11227 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11228 SDValue Shifted =
11229 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11230 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11231 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11232 } else if (VT.isVector()) {
11233 return SDValue();
11234 } else {
11235 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11236 }
11237
11238 if (Scale == VTSize)
11239 // Result is just the top half since we'd be shifting by the width of the
11240 // operand. Overflow impossible so this works for both UMULFIX and
11241 // UMULFIXSAT.
11242 return Hi;
11243
11244 // The result will need to be shifted right by the scale since both operands
11245 // are scaled. The result is given to us in 2 halves, so we only want part of
11246 // both in the result.
11247 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11248 DAG.getShiftAmountConstant(Scale, VT, dl));
11249 if (!Saturating)
11250 return Result;
11251
11252 if (!Signed) {
11253 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11254 // widened multiplication) aren't all zeroes.
11255
11256 // Saturate to max if ((Hi >> Scale) != 0),
11257 // which is the same as if (Hi > ((1 << Scale) - 1))
11258 APInt MaxVal = APInt::getMaxValue(VTSize);
11259 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11260 dl, VT);
11261 Result = DAG.getSelectCC(dl, Hi, LowMask,
11262 DAG.getConstant(MaxVal, dl, VT), Result,
11263 ISD::SETUGT);
11264
11265 return Result;
11266 }
11267
11268 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11269 // widened multiplication) aren't all ones or all zeroes.
11270
11271 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11272 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11273
11274 if (Scale == 0) {
11275 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11276 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11277 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11278 // Saturated to SatMin if wide product is negative, and SatMax if wide
11279 // product is positive ...
11280 SDValue Zero = DAG.getConstant(0, dl, VT);
11281 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11282 ISD::SETLT);
11283 // ... but only if we overflowed.
11284 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11285 }
11286
11287 // We handled Scale==0 above so all the bits to examine is in Hi.
11288
11289 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11290 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11291 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11292 dl, VT);
11293 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11294 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11295 // which is the same as if (HI < (-1 << (Scale - 1))
11296 SDValue HighMask =
11297 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11298 dl, VT);
11299 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11300 return Result;
11301}
11302
11303SDValue
11305 SDValue LHS, SDValue RHS,
11306 unsigned Scale, SelectionDAG &DAG) const {
11307 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11308 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11309 "Expected a fixed point division opcode");
11310
11311 EVT VT = LHS.getValueType();
11312 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11313 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11314 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11315
11316 // If there is enough room in the type to upscale the LHS or downscale the
11317 // RHS before the division, we can perform it in this type without having to
11318 // resize. For signed operations, the LHS headroom is the number of
11319 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11320 // The headroom for the RHS is the number of trailing zeroes.
11321 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11323 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11324
11325 // For signed saturating operations, we need to be able to detect true integer
11326 // division overflow; that is, when you have MIN / -EPS. However, this
11327 // is undefined behavior and if we emit divisions that could take such
11328 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11329 // example).
11330 // Avoid this by requiring an extra bit so that we never get this case.
11331 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11332 // signed saturating division, we need to emit a whopping 32-bit division.
11333 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11334 return SDValue();
11335
11336 unsigned LHSShift = std::min(LHSLead, Scale);
11337 unsigned RHSShift = Scale - LHSShift;
11338
11339 // At this point, we know that if we shift the LHS up by LHSShift and the
11340 // RHS down by RHSShift, we can emit a regular division with a final scaling
11341 // factor of Scale.
11342
11343 if (LHSShift)
11344 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11345 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11346 if (RHSShift)
11347 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11348 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11349
11350 SDValue Quot;
11351 if (Signed) {
11352 // For signed operations, if the resulting quotient is negative and the
11353 // remainder is nonzero, subtract 1 from the quotient to round towards
11354 // negative infinity.
11355 SDValue Rem;
11356 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11357 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11358 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11359 if (isTypeLegal(VT) &&
11361 Quot = DAG.getNode(ISD::SDIVREM, dl,
11362 DAG.getVTList(VT, VT),
11363 LHS, RHS);
11364 Rem = Quot.getValue(1);
11365 Quot = Quot.getValue(0);
11366 } else {
11367 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11368 LHS, RHS);
11369 Rem = DAG.getNode(ISD::SREM, dl, VT,
11370 LHS, RHS);
11371 }
11372 SDValue Zero = DAG.getConstant(0, dl, VT);
11373 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11374 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11375 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11376 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11377 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11378 DAG.getConstant(1, dl, VT));
11379 Quot = DAG.getSelect(dl, VT,
11380 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11381 Sub1, Quot);
11382 } else
11383 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11384 LHS, RHS);
11385
11386 return Quot;
11387}
11388
11390 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11391 SDLoc dl(Node);
11392 SDValue LHS = Node->getOperand(0);
11393 SDValue RHS = Node->getOperand(1);
11394 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11395
11396 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11397 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11398 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11399 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11400 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11401 { LHS, RHS, CarryIn });
11402 Result = SDValue(NodeCarry.getNode(), 0);
11403 Overflow = SDValue(NodeCarry.getNode(), 1);
11404 return;
11405 }
11406
11407 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11408 LHS.getValueType(), LHS, RHS);
11409
11410 EVT ResultType = Node->getValueType(1);
11411 EVT SetCCType = getSetCCResultType(
11412 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11413 SDValue SetCC;
11414 if (IsAdd && isOneConstant(RHS)) {
11415 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11416 // the live range of X. We assume comparing with 0 is cheap.
11417 // The general case (X + C) < C is not necessarily beneficial. Although we
11418 // reduce the live range of X, we may introduce the materialization of
11419 // constant C.
11420 SetCC =
11421 DAG.getSetCC(dl, SetCCType, Result,
11422 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11423 } else if (IsAdd && isAllOnesConstant(RHS)) {
11424 // Special case: uaddo X, -1 overflows if X != 0.
11425 SetCC =
11426 DAG.getSetCC(dl, SetCCType, LHS,
11427 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11428 } else {
11429 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11430 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11431 }
11432 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11433}
11434
11436 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11437 SDLoc dl(Node);
11438 SDValue LHS = Node->getOperand(0);
11439 SDValue RHS = Node->getOperand(1);
11440 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11441
11442 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11443 LHS.getValueType(), LHS, RHS);
11444
11445 EVT ResultType = Node->getValueType(1);
11446 EVT OType = getSetCCResultType(
11447 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11448
11449 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11450 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11451 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11452 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11453 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11454 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11455 return;
11456 }
11457
11458 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11459
11460 // For an addition, the result should be less than one of the operands (LHS)
11461 // if and only if the other operand (RHS) is negative, otherwise there will
11462 // be overflow.
11463 // For a subtraction, the result should be less than one of the operands
11464 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11465 // otherwise there will be overflow.
11466 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11467 SDValue ConditionRHS =
11468 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11469
11470 Overflow = DAG.getBoolExtOrTrunc(
11471 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11472 ResultType, ResultType);
11473}
11474
11476 SDValue &Overflow, SelectionDAG &DAG) const {
11477 SDLoc dl(Node);
11478 EVT VT = Node->getValueType(0);
11479 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11480 SDValue LHS = Node->getOperand(0);
11481 SDValue RHS = Node->getOperand(1);
11482 bool isSigned = Node->getOpcode() == ISD::SMULO;
11483
11484 // For power-of-two multiplications we can use a simpler shift expansion.
11485 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11486 const APInt &C = RHSC->getAPIntValue();
11487 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11488 if (C.isPowerOf2()) {
11489 // smulo(x, signed_min) is same as umulo(x, signed_min).
11490 bool UseArithShift = isSigned && !C.isMinSignedValue();
11491 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11492 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11493 Overflow = DAG.getSetCC(dl, SetCCVT,
11494 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11495 dl, VT, Result, ShiftAmt),
11496 LHS, ISD::SETNE);
11497 return true;
11498 }
11499 }
11500
11501 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11502 if (VT.isVector())
11503 WideVT =
11505
11506 SDValue BottomHalf;
11507 SDValue TopHalf;
11508 static const unsigned Ops[2][3] =
11511 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11512 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11513 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11514 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11515 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11516 RHS);
11517 TopHalf = BottomHalf.getValue(1);
11518 } else if (isTypeLegal(WideVT)) {
11519 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11520 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11521 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11522 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11523 SDValue ShiftAmt =
11524 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11525 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11526 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11527 } else {
11528 if (VT.isVector())
11529 return false;
11530
11531 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11532 }
11533
11534 Result = BottomHalf;
11535 if (isSigned) {
11536 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11537 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11538 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11539 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11540 } else {
11541 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11542 DAG.getConstant(0, dl, VT), ISD::SETNE);
11543 }
11544
11545 // Truncate the result if SetCC returns a larger type than needed.
11546 EVT RType = Node->getValueType(1);
11547 if (RType.bitsLT(Overflow.getValueType()))
11548 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11549
11550 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11551 "Unexpected result type for S/UMULO legalization");
11552 return true;
11553}
11554
11556 SDLoc dl(Node);
11557 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11558 SDValue Op = Node->getOperand(0);
11559 EVT VT = Op.getValueType();
11560
11561 // Try to use a shuffle reduction for power of two vectors.
11562 if (VT.isPow2VectorType()) {
11564 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11565 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11566 break;
11567
11568 SDValue Lo, Hi;
11569 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11570 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11571 VT = HalfVT;
11572
11573 // Stop if splitting is enough to make the reduction legal.
11574 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11575 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11576 Node->getFlags());
11577 }
11578 }
11579
11580 if (VT.isScalableVector())
11582 "Expanding reductions for scalable vectors is undefined.");
11583
11584 EVT EltVT = VT.getVectorElementType();
11585 unsigned NumElts = VT.getVectorNumElements();
11586
11588 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11589
11590 SDValue Res = Ops[0];
11591 for (unsigned i = 1; i < NumElts; i++)
11592 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11593
11594 // Result type may be wider than element type.
11595 if (EltVT != Node->getValueType(0))
11596 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11597 return Res;
11598}
11599
11601 SDLoc dl(Node);
11602 SDValue AccOp = Node->getOperand(0);
11603 SDValue VecOp = Node->getOperand(1);
11604 SDNodeFlags Flags = Node->getFlags();
11605
11606 EVT VT = VecOp.getValueType();
11607 EVT EltVT = VT.getVectorElementType();
11608
11609 if (VT.isScalableVector())
11611 "Expanding reductions for scalable vectors is undefined.");
11612
11613 unsigned NumElts = VT.getVectorNumElements();
11614
11616 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11617
11618 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11619
11620 SDValue Res = AccOp;
11621 for (unsigned i = 0; i < NumElts; i++)
11622 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11623
11624 return Res;
11625}
11626
11628 SelectionDAG &DAG) const {
11629 EVT VT = Node->getValueType(0);
11630 SDLoc dl(Node);
11631 bool isSigned = Node->getOpcode() == ISD::SREM;
11632 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11633 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11634 SDValue Dividend = Node->getOperand(0);
11635 SDValue Divisor = Node->getOperand(1);
11636 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11637 SDVTList VTs = DAG.getVTList(VT, VT);
11638 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11639 return true;
11640 }
11641 if (isOperationLegalOrCustom(DivOpc, VT)) {
11642 // X % Y -> X-X/Y*Y
11643 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11644 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11645 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11646 return true;
11647 }
11648 return false;
11649}
11650
11652 SelectionDAG &DAG) const {
11653 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11654 SDLoc dl(SDValue(Node, 0));
11655 SDValue Src = Node->getOperand(0);
11656
11657 // DstVT is the result type, while SatVT is the size to which we saturate
11658 EVT SrcVT = Src.getValueType();
11659 EVT DstVT = Node->getValueType(0);
11660
11661 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11662 unsigned SatWidth = SatVT.getScalarSizeInBits();
11663 unsigned DstWidth = DstVT.getScalarSizeInBits();
11664 assert(SatWidth <= DstWidth &&
11665 "Expected saturation width smaller than result width");
11666
11667 // Determine minimum and maximum integer values and their corresponding
11668 // floating-point values.
11669 APInt MinInt, MaxInt;
11670 if (IsSigned) {
11671 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11672 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11673 } else {
11674 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11675 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11676 }
11677
11678 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11679 // libcall emission cannot handle this. Large result types will fail.
11680 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11681 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11682 SrcVT = Src.getValueType();
11683 }
11684
11685 const fltSemantics &Sem = SrcVT.getFltSemantics();
11686 APFloat MinFloat(Sem);
11687 APFloat MaxFloat(Sem);
11688
11689 APFloat::opStatus MinStatus =
11690 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11691 APFloat::opStatus MaxStatus =
11692 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11693 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11694 !(MaxStatus & APFloat::opStatus::opInexact);
11695
11696 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11697 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11698
11699 // If the integer bounds are exactly representable as floats and min/max are
11700 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11701 // of comparisons and selects.
11702 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11703 isOperationLegal(ISD::FMAXNUM, SrcVT);
11704 if (AreExactFloatBounds && MinMaxLegal) {
11705 SDValue Clamped = Src;
11706
11707 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11708 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11709 // Clamp by MaxFloat from above. NaN cannot occur.
11710 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11711 // Convert clamped value to integer.
11712 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11713 dl, DstVT, Clamped);
11714
11715 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11716 // which will cast to zero.
11717 if (!IsSigned)
11718 return FpToInt;
11719
11720 // Otherwise, select 0 if Src is NaN.
11721 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11722 EVT SetCCVT =
11723 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11724 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11725 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11726 }
11727
11728 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11729 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11730
11731 // Result of direct conversion. The assumption here is that the operation is
11732 // non-trapping and it's fine to apply it to an out-of-range value if we
11733 // select it away later.
11734 SDValue FpToInt =
11735 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11736
11737 SDValue Select = FpToInt;
11738
11739 EVT SetCCVT =
11740 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11741
11742 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11743 // MinInt if Src is NaN.
11744 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11745 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11746 // If Src OGT MaxFloat, select MaxInt.
11747 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11748 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11749
11750 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11751 // is already zero.
11752 if (!IsSigned)
11753 return Select;
11754
11755 // Otherwise, select 0 if Src is NaN.
11756 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11757 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11758 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11759}
11760
11762 const SDLoc &dl,
11763 SelectionDAG &DAG) const {
11764 EVT OperandVT = Op.getValueType();
11765 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11766 return Op;
11767 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11768 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11769 // can induce double-rounding which may alter the results. We can
11770 // correct for this using a trick explained in: Boldo, Sylvie, and
11771 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11772 // World Congress. 2005.
11773 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11774 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11775
11776 // We can keep the narrow value as-is if narrowing was exact (no
11777 // rounding error), the wide value was NaN (the narrow value is also
11778 // NaN and should be preserved) or if we rounded to the odd value.
11779 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11780 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11781 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11782 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11783 EVT ResultIntVTCCVT = getSetCCResultType(
11784 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11785 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11786 // The result is already odd so we don't need to do anything.
11787 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11788
11789 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11790 Op.getValueType());
11791 // We keep results which are exact, odd or NaN.
11792 SDValue KeepNarrow =
11793 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11794 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11795 // We morally performed a round-down if AbsNarrow is smaller than
11796 // AbsWide.
11797 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11798 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11799 SDValue NarrowIsRd =
11800 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11801 // If the narrow value is odd or exact, pick it.
11802 // Otherwise, narrow is even and corresponds to either the rounded-up
11803 // or rounded-down value. If narrow is the rounded-down value, we want
11804 // the rounded-up value as it will be odd.
11805 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11806 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11807 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11808 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11809}
11810
11812 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11813 SDValue Op = Node->getOperand(0);
11814 EVT VT = Node->getValueType(0);
11815 SDLoc dl(Node);
11816 if (VT.getScalarType() == MVT::bf16) {
11817 if (Node->getConstantOperandVal(1) == 1) {
11818 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11819 }
11820 EVT OperandVT = Op.getValueType();
11821 SDValue IsNaN = DAG.getSetCC(
11822 dl,
11823 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11824 Op, Op, ISD::SETUO);
11825
11826 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11827 // can induce double-rounding which may alter the results. We can
11828 // correct for this using a trick explained in: Boldo, Sylvie, and
11829 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11830 // World Congress. 2005.
11831 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11832 EVT I32 = F32.changeTypeToInteger();
11833 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11834 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11835
11836 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11837 // turning into infinities.
11838 SDValue NaN =
11839 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11840
11841 // Factor in the contribution of the low 16 bits.
11842 SDValue One = DAG.getConstant(1, dl, I32);
11843 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11844 DAG.getShiftAmountConstant(16, I32, dl));
11845 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11846 SDValue RoundingBias =
11847 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11848 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11849
11850 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11851 // 0x80000000.
11852 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11853
11854 // Now that we have rounded, shift the bits into position.
11855 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11856 DAG.getShiftAmountConstant(16, I32, dl));
11857 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11858 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11859 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11860 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11861 }
11862 return SDValue();
11863}
11864
11866 SelectionDAG &DAG) const {
11867 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11868 assert(Node->getValueType(0).isScalableVector() &&
11869 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11870
11871 EVT VT = Node->getValueType(0);
11872 SDValue V1 = Node->getOperand(0);
11873 SDValue V2 = Node->getOperand(1);
11874 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11875 SDLoc DL(Node);
11876
11877 // Expand through memory thusly:
11878 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11879 // Store V1, Ptr
11880 // Store V2, Ptr + sizeof(V1)
11881 // If (Imm < 0)
11882 // TrailingElts = -Imm
11883 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11884 // else
11885 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11886 // Res = Load Ptr
11887
11888 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11889
11891 VT.getVectorElementCount() * 2);
11892 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11893 EVT PtrVT = StackPtr.getValueType();
11894 auto &MF = DAG.getMachineFunction();
11895 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11896 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11897
11898 // Store the lo part of CONCAT_VECTORS(V1, V2)
11899 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11900 // Store the hi part of CONCAT_VECTORS(V1, V2)
11901 SDValue OffsetToV2 = DAG.getVScale(
11902 DL, PtrVT,
11904 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11905 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11906
11907 if (Imm >= 0) {
11908 // Load back the required element. getVectorElementPointer takes care of
11909 // clamping the index if it's out-of-bounds.
11910 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11911 // Load the spliced result
11912 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11914 }
11915
11916 uint64_t TrailingElts = -Imm;
11917
11918 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11919 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11920 SDValue TrailingBytes =
11921 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11922
11923 if (TrailingElts > VT.getVectorMinNumElements()) {
11924 SDValue VLBytes =
11925 DAG.getVScale(DL, PtrVT,
11926 APInt(PtrVT.getFixedSizeInBits(),
11928 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11929 }
11930
11931 // Calculate the start address of the spliced result.
11932 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11933
11934 // Load the spliced result
11935 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11937}
11938
11940 SelectionDAG &DAG) const {
11941 SDLoc DL(Node);
11942 SDValue Vec = Node->getOperand(0);
11943 SDValue Mask = Node->getOperand(1);
11944 SDValue Passthru = Node->getOperand(2);
11945
11946 EVT VecVT = Vec.getValueType();
11947 EVT ScalarVT = VecVT.getScalarType();
11948 EVT MaskVT = Mask.getValueType();
11949 EVT MaskScalarVT = MaskVT.getScalarType();
11950
11951 // Needs to be handled by targets that have scalable vector types.
11952 if (VecVT.isScalableVector())
11953 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11954
11955 SDValue StackPtr = DAG.CreateStackTemporary(
11956 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11957 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11958 MachinePointerInfo PtrInfo =
11960
11961 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11962 SDValue Chain = DAG.getEntryNode();
11963 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11964
11965 bool HasPassthru = !Passthru.isUndef();
11966
11967 // If we have a passthru vector, store it on the stack, overwrite the matching
11968 // positions and then re-write the last element that was potentially
11969 // overwritten even though mask[i] = false.
11970 if (HasPassthru)
11971 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11972
11973 SDValue LastWriteVal;
11974 APInt PassthruSplatVal;
11975 bool IsSplatPassthru =
11976 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11977
11978 if (IsSplatPassthru) {
11979 // As we do not know which position we wrote to last, we cannot simply
11980 // access that index from the passthru vector. So we first check if passthru
11981 // is a splat vector, to use any element ...
11982 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11983 } else if (HasPassthru) {
11984 // ... if it is not a splat vector, we need to get the passthru value at
11985 // position = popcount(mask) and re-load it from the stack before it is
11986 // overwritten in the loop below.
11987 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11988 SDValue Popcount = DAG.getNode(
11989 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11990 Popcount =
11992 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11993 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11994 SDValue LastElmtPtr =
11995 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11996 LastWriteVal = DAG.getLoad(
11997 ScalarVT, DL, Chain, LastElmtPtr,
11999 Chain = LastWriteVal.getValue(1);
12000 }
12001
12002 unsigned NumElms = VecVT.getVectorNumElements();
12003 for (unsigned I = 0; I < NumElms; I++) {
12004 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12005 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12006 Chain = DAG.getStore(
12007 Chain, DL, ValI, OutPtr,
12009
12010 // Get the mask value and add it to the current output position. This
12011 // either increments by 1 if MaskI is true or adds 0 otherwise.
12012 // Freeze in case we have poison/undef mask entries.
12013 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12014 MaskI = DAG.getFreeze(MaskI);
12015 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12016 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12017 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12018
12019 if (HasPassthru && I == NumElms - 1) {
12020 SDValue EndOfVector =
12021 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12022 SDValue AllLanesSelected =
12023 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12024 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12025 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12026
12027 // Re-write the last ValI if all lanes were selected. Otherwise,
12028 // overwrite the last write it with the passthru value.
12029 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12030 LastWriteVal, SDNodeFlags::Unpredictable);
12031 Chain = DAG.getStore(
12032 Chain, DL, LastWriteVal, OutPtr,
12034 }
12035 }
12036
12037 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12038}
12039
12041 SelectionDAG &DAG) const {
12042 SDLoc DL(N);
12043 SDValue Acc = N->getOperand(0);
12044 SDValue MulLHS = N->getOperand(1);
12045 SDValue MulRHS = N->getOperand(2);
12046 EVT AccVT = Acc.getValueType();
12047 EVT MulOpVT = MulLHS.getValueType();
12048
12049 EVT ExtMulOpVT =
12051 MulOpVT.getVectorElementCount());
12052
12053 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12056 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12059
12060 if (ExtMulOpVT != MulOpVT) {
12061 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12062 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12063 }
12064 SDValue Input = MulLHS;
12065 APInt ConstantOne;
12066 if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
12067 !ConstantOne.isOne())
12068 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12069
12070 unsigned Stride = AccVT.getVectorMinNumElements();
12071 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12072
12073 // Collect all of the subvectors
12074 std::deque<SDValue> Subvectors = {Acc};
12075 for (unsigned I = 0; I < ScaleFactor; I++)
12076 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12077
12078 // Flatten the subvector tree
12079 while (Subvectors.size() > 1) {
12080 Subvectors.push_back(
12081 DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12082 Subvectors.pop_front();
12083 Subvectors.pop_front();
12084 }
12085
12086 assert(Subvectors.size() == 1 &&
12087 "There should only be one subvector after tree flattening");
12088
12089 return Subvectors[0];
12090}
12091
12093 SDValue &LHS, SDValue &RHS,
12094 SDValue &CC, SDValue Mask,
12095 SDValue EVL, bool &NeedInvert,
12096 const SDLoc &dl, SDValue &Chain,
12097 bool IsSignaling) const {
12098 MVT OpVT = LHS.getSimpleValueType();
12099 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12100 NeedInvert = false;
12101 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12102 bool IsNonVP = !EVL;
12103 switch (getCondCodeAction(CCCode, OpVT)) {
12104 default:
12105 llvm_unreachable("Unknown condition code action!");
12107 // Nothing to do.
12108 break;
12111 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12112 std::swap(LHS, RHS);
12113 CC = DAG.getCondCode(InvCC);
12114 return true;
12115 }
12116 // Swapping operands didn't work. Try inverting the condition.
12117 bool NeedSwap = false;
12118 InvCC = getSetCCInverse(CCCode, OpVT);
12119 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12120 // If inverting the condition is not enough, try swapping operands
12121 // on top of it.
12122 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12123 NeedSwap = true;
12124 }
12125 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12126 CC = DAG.getCondCode(InvCC);
12127 NeedInvert = true;
12128 if (NeedSwap)
12129 std::swap(LHS, RHS);
12130 return true;
12131 }
12132
12133 // Special case: expand i1 comparisons using logical operations.
12134 if (OpVT == MVT::i1) {
12135 SDValue Ret;
12136 switch (CCCode) {
12137 default:
12138 llvm_unreachable("Unknown integer setcc!");
12139 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12140 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12141 MVT::i1);
12142 break;
12143 case ISD::SETNE: // X != Y --> (X ^ Y)
12144 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12145 break;
12146 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12147 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12148 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12149 DAG.getNOT(dl, LHS, MVT::i1));
12150 break;
12151 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12152 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12153 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12154 DAG.getNOT(dl, RHS, MVT::i1));
12155 break;
12156 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12157 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12158 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12159 DAG.getNOT(dl, LHS, MVT::i1));
12160 break;
12161 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12162 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12163 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12164 DAG.getNOT(dl, RHS, MVT::i1));
12165 break;
12166 }
12167
12168 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12169 RHS = SDValue();
12170 CC = SDValue();
12171 return true;
12172 }
12173
12175 unsigned Opc = 0;
12176 switch (CCCode) {
12177 default:
12178 llvm_unreachable("Don't know how to expand this condition!");
12179 case ISD::SETUO:
12180 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12181 CC1 = ISD::SETUNE;
12182 CC2 = ISD::SETUNE;
12183 Opc = ISD::OR;
12184 break;
12185 }
12187 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12188 NeedInvert = true;
12189 [[fallthrough]];
12190 case ISD::SETO:
12192 "If SETO is expanded, SETOEQ must be legal!");
12193 CC1 = ISD::SETOEQ;
12194 CC2 = ISD::SETOEQ;
12195 Opc = ISD::AND;
12196 break;
12197 case ISD::SETONE:
12198 case ISD::SETUEQ:
12199 // If the SETUO or SETO CC isn't legal, we might be able to use
12200 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12201 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12202 // the operands.
12203 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12204 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12205 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12206 CC1 = ISD::SETOGT;
12207 CC2 = ISD::SETOLT;
12208 Opc = ISD::OR;
12209 NeedInvert = ((unsigned)CCCode & 0x8U);
12210 break;
12211 }
12212 [[fallthrough]];
12213 case ISD::SETOEQ:
12214 case ISD::SETOGT:
12215 case ISD::SETOGE:
12216 case ISD::SETOLT:
12217 case ISD::SETOLE:
12218 case ISD::SETUNE:
12219 case ISD::SETUGT:
12220 case ISD::SETUGE:
12221 case ISD::SETULT:
12222 case ISD::SETULE:
12223 // If we are floating point, assign and break, otherwise fall through.
12224 if (!OpVT.isInteger()) {
12225 // We can use the 4th bit to tell if we are the unordered
12226 // or ordered version of the opcode.
12227 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12228 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12229 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12230 break;
12231 }
12232 // Fallthrough if we are unsigned integer.
12233 [[fallthrough]];
12234 case ISD::SETLE:
12235 case ISD::SETGT:
12236 case ISD::SETGE:
12237 case ISD::SETLT:
12238 case ISD::SETNE:
12239 case ISD::SETEQ:
12240 // If all combinations of inverting the condition and swapping operands
12241 // didn't work then we have no means to expand the condition.
12242 llvm_unreachable("Don't know how to expand this condition!");
12243 }
12244
12245 SDValue SetCC1, SetCC2;
12246 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12247 // If we aren't the ordered or unorder operation,
12248 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12249 if (IsNonVP) {
12250 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12251 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12252 } else {
12253 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12254 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12255 }
12256 } else {
12257 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12258 if (IsNonVP) {
12259 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12260 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12261 } else {
12262 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12263 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12264 }
12265 }
12266 if (Chain)
12267 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12268 SetCC2.getValue(1));
12269 if (IsNonVP)
12270 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12271 else {
12272 // Transform the binary opcode to the VP equivalent.
12273 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12274 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12275 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12276 }
12277 RHS = SDValue();
12278 CC = SDValue();
12279 return true;
12280 }
12281 }
12282 return false;
12283}
12284
12286 SelectionDAG &DAG) const {
12287 EVT VT = Node->getValueType(0);
12288 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12289 // split into two equal parts.
12290 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12291 return SDValue();
12292
12293 // Restrict expansion to cases where both parts can be concatenated.
12294 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12295 if (LoVT != HiVT || !isTypeLegal(LoVT))
12296 return SDValue();
12297
12298 SDLoc DL(Node);
12299 unsigned Opcode = Node->getOpcode();
12300
12301 // Don't expand if the result is likely to be unrolled anyway.
12302 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12303 return SDValue();
12304
12305 SmallVector<SDValue, 4> LoOps, HiOps;
12306 for (const SDValue &V : Node->op_values()) {
12307 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12308 LoOps.push_back(Lo);
12309 HiOps.push_back(Hi);
12310 }
12311
12312 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12313 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12314 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12315}
12316
12318 const SDLoc &DL,
12319 EVT InVecVT, SDValue EltNo,
12320 LoadSDNode *OriginalLoad,
12321 SelectionDAG &DAG) const {
12322 assert(OriginalLoad->isSimple());
12323
12324 EVT VecEltVT = InVecVT.getVectorElementType();
12325
12326 // If the vector element type is not a multiple of a byte then we are unable
12327 // to correctly compute an address to load only the extracted element as a
12328 // scalar.
12329 if (!VecEltVT.isByteSized())
12330 return SDValue();
12331
12332 ISD::LoadExtType ExtTy =
12333 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12334 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12335 return SDValue();
12336
12337 std::optional<unsigned> ByteOffset;
12338 Align Alignment = OriginalLoad->getAlign();
12340 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12341 int Elt = ConstEltNo->getZExtValue();
12342 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12343 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12344 Alignment = commonAlignment(Alignment, *ByteOffset);
12345 } else {
12346 // Discard the pointer info except the address space because the memory
12347 // operand can't represent this new access since the offset is variable.
12348 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12349 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12350 }
12351
12352 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12353 return SDValue();
12354
12355 unsigned IsFast = 0;
12356 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12357 OriginalLoad->getAddressSpace(), Alignment,
12358 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12359 !IsFast)
12360 return SDValue();
12361
12362 SDValue NewPtr =
12363 getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12364
12365 // We are replacing a vector load with a scalar load. The new load must have
12366 // identical memory op ordering to the original.
12367 SDValue Load;
12368 if (ResultVT.bitsGT(VecEltVT)) {
12369 // If the result type of vextract is wider than the load, then issue an
12370 // extending load instead.
12371 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12373 : ISD::EXTLOAD;
12374 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12375 NewPtr, MPI, VecEltVT, Alignment,
12376 OriginalLoad->getMemOperand()->getFlags(),
12377 OriginalLoad->getAAInfo());
12378 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12379 } else {
12380 // The result type is narrower or the same width as the vector element
12381 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12382 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12383 OriginalLoad->getAAInfo());
12384 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12385 if (ResultVT.bitsLT(VecEltVT))
12386 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12387 else
12388 Load = DAG.getBitcast(ResultVT, Load);
12389 }
12390
12391 return Load;
12392}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1158
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
void changeSign()
Definition APFloat.h:1297
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
void negate()
Negate this APInt in place.
Definition APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1435
unsigned logBase2() const
Definition APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
void setAllBits()
Set every bit to 1.
Definition APInt.h:1319
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1417
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1442
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
iterator end() const
Definition StringRef.h:122
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
TargetLoweringBase(const TargetMachine &TM)
NOTE: The TargetMachine owns TLOF.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2038
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1743
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1569
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:294
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:179
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
KnownBits byteSwap() const
Definition KnownBits.h:507
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:282
KnownBits reverseBits() const
Definition KnownBits.h:511
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:226
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:314
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:98
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:160
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:279
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...