LLVM 19.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr :
66 {Attribute::Alignment, Attribute::Dereferenceable,
67 Attribute::DereferenceableOrNull, Attribute::NoAlias,
68 Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69 CallerAttrs.removeAttribute(Attr);
70
71 if (CallerAttrs.hasAttributes())
72 return false;
73
74 // It's not safe to eliminate the sign / zero extension of the return value.
75 if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77 return false;
78
79 // Check if the only use is a function return node.
80 return isUsedByReturnOnly(Node, Chain);
81}
82
84 const uint32_t *CallerPreservedMask,
85 const SmallVectorImpl<CCValAssign> &ArgLocs,
86 const SmallVectorImpl<SDValue> &OutVals) const {
87 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88 const CCValAssign &ArgLoc = ArgLocs[I];
89 if (!ArgLoc.isRegLoc())
90 continue;
91 MCRegister Reg = ArgLoc.getLocReg();
92 // Only look at callee saved registers.
93 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94 continue;
95 // Check that we pass the value used for the caller.
96 // (We look for a CopyFromReg reading a virtual register that is used
97 // for the function live-in value of register Reg)
98 SDValue Value = OutVals[I];
99 if (Value->getOpcode() == ISD::AssertZext)
100 Value = Value.getOperand(0);
101 if (Value->getOpcode() != ISD::CopyFromReg)
102 return false;
103 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105 return false;
106 }
107 return true;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
113 unsigned ArgIdx) {
114 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
163 CallOptions.IsSExt);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::i64;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::i64;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592 "ShrinkDemandedOp only supports operands that have the same size!");
593
594 // Don't do this if the node has another user, which may require the
595 // full value.
596 if (!Op.getNode()->hasOneUse())
597 return false;
598
599 // Search for the smallest integer type with free casts to and from
600 // Op's type. For expedience, just check power-of-2 integer types.
601 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
602 unsigned DemandedSize = DemandedBits.getActiveBits();
603 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
607 // We found a type with free casts.
608 SDValue X = DAG.getNode(
609 Op.getOpcode(), dl, SmallVT,
610 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614 return TLO.CombineTo(Op, Z);
615 }
616 }
617 return false;
618}
619
621 DAGCombinerInfo &DCI) const {
622 SelectionDAG &DAG = DCI.DAG;
623 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624 !DCI.isBeforeLegalizeOps());
625 KnownBits Known;
626
627 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628 if (Simplified) {
629 DCI.AddToWorklist(Op.getNode());
631 }
632 return Simplified;
633}
634
636 const APInt &DemandedElts,
637 DAGCombinerInfo &DCI) const {
638 SelectionDAG &DAG = DCI.DAG;
639 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640 !DCI.isBeforeLegalizeOps());
641 KnownBits Known;
642
643 bool Simplified =
644 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645 if (Simplified) {
646 DCI.AddToWorklist(Op.getNode());
648 }
649 return Simplified;
650}
651
653 KnownBits &Known,
655 unsigned Depth,
656 bool AssumeSingleUse) const {
657 EVT VT = Op.getValueType();
658
659 // Since the number of lanes in a scalable vector is unknown at compile time,
660 // we track one bit which is implicitly broadcast to all lanes. This means
661 // that all lanes in a scalable vector are considered demanded.
662 APInt DemandedElts = VT.isFixedLengthVector()
664 : APInt(1, 1);
665 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666 AssumeSingleUse);
667}
668
669// TODO: Under what circumstances can we create nodes? Constant folding?
671 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672 SelectionDAG &DAG, unsigned Depth) const {
673 EVT VT = Op.getValueType();
674
675 // Limit search depth.
677 return SDValue();
678
679 // Ignore UNDEFs.
680 if (Op.isUndef())
681 return SDValue();
682
683 // Not demanding any bits/elts from Op.
684 if (DemandedBits == 0 || DemandedElts == 0)
685 return DAG.getUNDEF(VT);
686
687 bool IsLE = DAG.getDataLayout().isLittleEndian();
688 unsigned NumElts = DemandedElts.getBitWidth();
689 unsigned BitWidth = DemandedBits.getBitWidth();
690 KnownBits LHSKnown, RHSKnown;
691 switch (Op.getOpcode()) {
692 case ISD::BITCAST: {
693 if (VT.isScalableVector())
694 return SDValue();
695
696 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697 EVT SrcVT = Src.getValueType();
698 EVT DstVT = Op.getValueType();
699 if (SrcVT == DstVT)
700 return Src;
701
702 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704 if (NumSrcEltBits == NumDstEltBits)
705 if (SDValue V = SimplifyMultipleUseDemandedBits(
706 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707 return DAG.getBitcast(DstVT, V);
708
709 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710 unsigned Scale = NumDstEltBits / NumSrcEltBits;
711 unsigned NumSrcElts = SrcVT.getVectorNumElements();
712 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714 for (unsigned i = 0; i != Scale; ++i) {
715 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716 unsigned BitOffset = EltOffset * NumSrcEltBits;
717 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718 if (!Sub.isZero()) {
719 DemandedSrcBits |= Sub;
720 for (unsigned j = 0; j != NumElts; ++j)
721 if (DemandedElts[j])
722 DemandedSrcElts.setBit((j * Scale) + i);
723 }
724 }
725
726 if (SDValue V = SimplifyMultipleUseDemandedBits(
727 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728 return DAG.getBitcast(DstVT, V);
729 }
730
731 // TODO - bigendian once we have test coverage.
732 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733 unsigned Scale = NumSrcEltBits / NumDstEltBits;
734 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737 for (unsigned i = 0; i != NumElts; ++i)
738 if (DemandedElts[i]) {
739 unsigned Offset = (i % Scale) * NumDstEltBits;
740 DemandedSrcBits.insertBits(DemandedBits, Offset);
741 DemandedSrcElts.setBit(i / Scale);
742 }
743
744 if (SDValue V = SimplifyMultipleUseDemandedBits(
745 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746 return DAG.getBitcast(DstVT, V);
747 }
748
749 break;
750 }
751 case ISD::FREEZE: {
752 SDValue N0 = Op.getOperand(0);
753 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754 /*PoisonOnly=*/false))
755 return N0;
756 break;
757 }
758 case ISD::AND: {
759 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762 // If all of the demanded bits are known 1 on one side, return the other.
763 // These bits cannot contribute to the result of the 'and' in this
764 // context.
765 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766 return Op.getOperand(0);
767 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768 return Op.getOperand(1);
769 break;
770 }
771 case ISD::OR: {
772 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other. These bits cannot contribute to the result of the 'or' in this
777 // context.
778 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779 return Op.getOperand(0);
780 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781 return Op.getOperand(1);
782 break;
783 }
784 case ISD::XOR: {
785 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787
788 // If all of the demanded bits are known zero on one side, return the
789 // other.
790 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791 return Op.getOperand(0);
792 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793 return Op.getOperand(1);
794 break;
795 }
796 case ISD::SHL: {
797 // If we are only demanding sign bits then we can use the shift source
798 // directly.
799 if (std::optional<uint64_t> MaxSA =
800 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801 SDValue Op0 = Op.getOperand(0);
802 unsigned ShAmt = *MaxSA;
803 unsigned NumSignBits =
804 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807 return Op0;
808 }
809 break;
810 }
811 case ISD::SETCC: {
812 SDValue Op0 = Op.getOperand(0);
813 SDValue Op1 = Op.getOperand(1);
814 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815 // If (1) we only need the sign-bit, (2) the setcc operands are the same
816 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817 // -1, we may be able to bypass the setcc.
818 if (DemandedBits.isSignMask() &&
822 // If we're testing X < 0, then this compare isn't needed - just use X!
823 // FIXME: We're limiting to integer types here, but this should also work
824 // if we don't care about FP signed-zero. The use of SETLT with FP means
825 // that we don't care about NaNs.
826 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
828 return Op0;
829 }
830 break;
831 }
833 // If none of the extended bits are demanded, eliminate the sextinreg.
834 SDValue Op0 = Op.getOperand(0);
835 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836 unsigned ExBits = ExVT.getScalarSizeInBits();
837 if (DemandedBits.getActiveBits() <= ExBits &&
839 return Op0;
840 // If the input is already sign extended, just drop the extension.
841 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842 if (NumSignBits >= (BitWidth - ExBits + 1))
843 return Op0;
844 break;
845 }
849 if (VT.isScalableVector())
850 return SDValue();
851
852 // If we only want the lowest element and none of extended bits, then we can
853 // return the bitcasted source vector.
854 SDValue Src = Op.getOperand(0);
855 EVT SrcVT = Src.getValueType();
856 EVT DstVT = Op.getValueType();
857 if (IsLE && DemandedElts == 1 &&
858 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860 return DAG.getBitcast(DstVT, Src);
861 }
862 break;
863 }
865 if (VT.isScalableVector())
866 return SDValue();
867
868 // If we don't demand the inserted element, return the base vector.
869 SDValue Vec = Op.getOperand(0);
870 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871 EVT VecVT = Vec.getValueType();
872 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873 !DemandedElts[CIdx->getZExtValue()])
874 return Vec;
875 break;
876 }
878 if (VT.isScalableVector())
879 return SDValue();
880
881 SDValue Vec = Op.getOperand(0);
882 SDValue Sub = Op.getOperand(1);
883 uint64_t Idx = Op.getConstantOperandVal(2);
884 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886 // If we don't demand the inserted subvector, return the base vector.
887 if (DemandedSubElts == 0)
888 return Vec;
889 break;
890 }
891 case ISD::VECTOR_SHUFFLE: {
893 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894
895 // If all the demanded elts are from one operand and are inline,
896 // then we can use the operand directly.
897 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898 for (unsigned i = 0; i != NumElts; ++i) {
899 int M = ShuffleMask[i];
900 if (M < 0 || !DemandedElts[i])
901 continue;
902 AllUndef = false;
903 IdentityLHS &= (M == (int)i);
904 IdentityRHS &= ((M - NumElts) == i);
905 }
906
907 if (AllUndef)
908 return DAG.getUNDEF(Op.getValueType());
909 if (IdentityLHS)
910 return Op.getOperand(0);
911 if (IdentityRHS)
912 return Op.getOperand(1);
913 break;
914 }
915 default:
916 // TODO: Probably okay to remove after audit; here to reduce change size
917 // in initial enablement patch for scalable vectors
918 if (VT.isScalableVector())
919 return SDValue();
920
921 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923 Op, DemandedBits, DemandedElts, DAG, Depth))
924 return V;
925 break;
926 }
927 return SDValue();
928}
929
932 unsigned Depth) const {
933 EVT VT = Op.getValueType();
934 // Since the number of lanes in a scalable vector is unknown at compile time,
935 // we track one bit which is implicitly broadcast to all lanes. This means
936 // that all lanes in a scalable vector are considered demanded.
937 APInt DemandedElts = VT.isFixedLengthVector()
939 : APInt(1, 1);
940 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941 Depth);
942}
943
945 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946 unsigned Depth) const {
947 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949 Depth);
950}
951
952// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
956 const TargetLowering &TLI,
957 const APInt &DemandedBits,
958 const APInt &DemandedElts, unsigned Depth) {
959 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960 "SRL or SRA node is required here!");
961 // Is the right shift using an immediate value of 1?
962 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963 if (!N1C || !N1C->isOne())
964 return SDValue();
965
966 // We are looking for an avgfloor
967 // add(ext, ext)
968 // or one of these as a avgceil
969 // add(add(ext, ext), 1)
970 // add(add(ext, 1), ext)
971 // add(ext, add(ext, 1))
972 SDValue Add = Op.getOperand(0);
973 if (Add.getOpcode() != ISD::ADD)
974 return SDValue();
975
976 SDValue ExtOpA = Add.getOperand(0);
977 SDValue ExtOpB = Add.getOperand(1);
978 SDValue Add2;
979 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980 ConstantSDNode *ConstOp;
981 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982 ConstOp->isOne()) {
983 ExtOpA = Op1;
984 ExtOpB = Op3;
985 Add2 = A;
986 return true;
987 }
988 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989 ConstOp->isOne()) {
990 ExtOpA = Op1;
991 ExtOpB = Op2;
992 Add2 = A;
993 return true;
994 }
995 return false;
996 };
997 bool IsCeil =
998 (ExtOpA.getOpcode() == ISD::ADD &&
999 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000 (ExtOpB.getOpcode() == ISD::ADD &&
1001 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002
1003 // If the shift is signed (sra):
1004 // - Needs >= 2 sign bit for both operands.
1005 // - Needs >= 2 zero bits.
1006 // If the shift is unsigned (srl):
1007 // - Needs >= 1 zero bit for both operands.
1008 // - Needs 1 demanded bit zero and >= 2 sign bits.
1009 SelectionDAG &DAG = TLO.DAG;
1010 unsigned ShiftOpc = Op.getOpcode();
1011 bool IsSigned = false;
1012 unsigned KnownBits;
1013 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016 unsigned NumZeroA =
1017 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018 unsigned NumZeroB =
1019 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021
1022 switch (ShiftOpc) {
1023 default:
1024 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025 case ISD::SRA: {
1026 if (NumZero >= 2 && NumSigned < NumZero) {
1027 IsSigned = false;
1028 KnownBits = NumZero;
1029 break;
1030 }
1031 if (NumSigned >= 1) {
1032 IsSigned = true;
1033 KnownBits = NumSigned;
1034 break;
1035 }
1036 return SDValue();
1037 }
1038 case ISD::SRL: {
1039 if (NumZero >= 1 && NumSigned < NumZero) {
1040 IsSigned = false;
1041 KnownBits = NumZero;
1042 break;
1043 }
1044 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045 IsSigned = true;
1046 KnownBits = NumSigned;
1047 break;
1048 }
1049 return SDValue();
1050 }
1051 }
1052
1053 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055
1056 // Find the smallest power-2 type that is legal for this vector size and
1057 // operation, given the original type size and the number of known sign/zero
1058 // bits.
1059 EVT VT = Op.getValueType();
1060 unsigned MinWidth =
1061 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1064 return SDValue();
1065 if (VT.isVector())
1066 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069 // larger type size to do the transform.
1070 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071 return SDValue();
1072 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073 Add.getOperand(1)) &&
1074 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075 Add2.getOperand(1))))
1076 NVT = VT;
1077 else
1078 return SDValue();
1079 }
1080
1081 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082 // this is likely to stop other folds (reassociation, value tracking etc.)
1083 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085 return SDValue();
1086
1087 SDLoc DL(Op);
1088 SDValue ResultAVG =
1089 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092}
1093
1094/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095/// result of Op are ever used downstream. If we can use this information to
1096/// simplify Op, create a new simplified DAG node and return true, returning the
1097/// original and new nodes in Old and New. Otherwise, analyze the expression and
1098/// return a mask of Known bits for the expression (used to simplify the
1099/// caller). The Known bits may only be accurate for those bits in the
1100/// OriginalDemandedBits and OriginalDemandedElts.
1102 SDValue Op, const APInt &OriginalDemandedBits,
1103 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104 unsigned Depth, bool AssumeSingleUse) const {
1105 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107 "Mask size mismatches value type size!");
1108
1109 // Don't know anything.
1110 Known = KnownBits(BitWidth);
1111
1112 EVT VT = Op.getValueType();
1113 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116 "Unexpected vector size");
1117
1118 APInt DemandedBits = OriginalDemandedBits;
1119 APInt DemandedElts = OriginalDemandedElts;
1120 SDLoc dl(Op);
1121
1122 // Undef operand.
1123 if (Op.isUndef())
1124 return false;
1125
1126 // We can't simplify target constants.
1127 if (Op.getOpcode() == ISD::TargetConstant)
1128 return false;
1129
1130 if (Op.getOpcode() == ISD::Constant) {
1131 // We know all of the bits for a constant!
1132 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133 return false;
1134 }
1135
1136 if (Op.getOpcode() == ISD::ConstantFP) {
1137 // We know all of the bits for a floating point constant!
1139 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140 return false;
1141 }
1142
1143 // Other users may use these bits.
1144 bool HasMultiUse = false;
1145 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1147 // Limit search depth.
1148 return false;
1149 }
1150 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1152 DemandedElts = APInt::getAllOnes(NumElts);
1153 HasMultiUse = true;
1154 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155 // Not demanding any bits/elts from Op.
1156 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158 // Limit search depth.
1159 return false;
1160 }
1161
1162 KnownBits Known2;
1163 switch (Op.getOpcode()) {
1164 case ISD::SCALAR_TO_VECTOR: {
1165 if (VT.isScalableVector())
1166 return false;
1167 if (!DemandedElts[0])
1168 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169
1170 KnownBits SrcKnown;
1171 SDValue Src = Op.getOperand(0);
1172 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175 return true;
1176
1177 // Upper elements are undef, so only get the knownbits if we just demand
1178 // the bottom element.
1179 if (DemandedElts == 1)
1180 Known = SrcKnown.anyextOrTrunc(BitWidth);
1181 break;
1182 }
1183 case ISD::BUILD_VECTOR:
1184 // Collect the known bits that are shared by every demanded element.
1185 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187 return false; // Don't fall through, will infinitely loop.
1188 case ISD::SPLAT_VECTOR: {
1189 SDValue Scl = Op.getOperand(0);
1190 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191 KnownBits KnownScl;
1192 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193 return true;
1194
1195 // Implicitly truncate the bits to match the official semantics of
1196 // SPLAT_VECTOR.
1197 Known = KnownScl.trunc(BitWidth);
1198 break;
1199 }
1200 case ISD::LOAD: {
1201 auto *LD = cast<LoadSDNode>(Op);
1202 if (getTargetConstantFromLoad(LD)) {
1203 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204 return false; // Don't fall through, will infinitely loop.
1205 }
1206 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207 // If this is a ZEXTLoad and we are looking at the loaded value.
1208 EVT MemVT = LD->getMemoryVT();
1209 unsigned MemBits = MemVT.getScalarSizeInBits();
1210 Known.Zero.setBitsFrom(MemBits);
1211 return false; // Don't fall through, will infinitely loop.
1212 }
1213 break;
1214 }
1216 if (VT.isScalableVector())
1217 return false;
1218 SDValue Vec = Op.getOperand(0);
1219 SDValue Scl = Op.getOperand(1);
1220 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221 EVT VecVT = Vec.getValueType();
1222
1223 // If index isn't constant, assume we need all vector elements AND the
1224 // inserted element.
1225 APInt DemandedVecElts(DemandedElts);
1226 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227 unsigned Idx = CIdx->getZExtValue();
1228 DemandedVecElts.clearBit(Idx);
1229
1230 // Inserted element is not required.
1231 if (!DemandedElts[Idx])
1232 return TLO.CombineTo(Op, Vec);
1233 }
1234
1235 KnownBits KnownScl;
1236 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239 return true;
1240
1241 Known = KnownScl.anyextOrTrunc(BitWidth);
1242
1243 KnownBits KnownVec;
1244 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245 Depth + 1))
1246 return true;
1247
1248 if (!!DemandedVecElts)
1249 Known = Known.intersectWith(KnownVec);
1250
1251 return false;
1252 }
1253 case ISD::INSERT_SUBVECTOR: {
1254 if (VT.isScalableVector())
1255 return false;
1256 // Demand any elements from the subvector and the remainder from the src its
1257 // inserted into.
1258 SDValue Src = Op.getOperand(0);
1259 SDValue Sub = Op.getOperand(1);
1260 uint64_t Idx = Op.getConstantOperandVal(2);
1261 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263 APInt DemandedSrcElts = DemandedElts;
1264 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265
1266 KnownBits KnownSub, KnownSrc;
1267 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268 Depth + 1))
1269 return true;
1270 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271 Depth + 1))
1272 return true;
1273
1274 Known.Zero.setAllBits();
1275 Known.One.setAllBits();
1276 if (!!DemandedSubElts)
1277 Known = Known.intersectWith(KnownSub);
1278 if (!!DemandedSrcElts)
1279 Known = Known.intersectWith(KnownSrc);
1280
1281 // Attempt to avoid multi-use src if we don't need anything from it.
1282 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283 !DemandedSrcElts.isAllOnes()) {
1284 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288 if (NewSub || NewSrc) {
1289 NewSub = NewSub ? NewSub : Sub;
1290 NewSrc = NewSrc ? NewSrc : Src;
1291 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292 Op.getOperand(2));
1293 return TLO.CombineTo(Op, NewOp);
1294 }
1295 }
1296 break;
1297 }
1299 if (VT.isScalableVector())
1300 return false;
1301 // Offset the demanded elts by the subvector index.
1302 SDValue Src = Op.getOperand(0);
1303 if (Src.getValueType().isScalableVector())
1304 break;
1305 uint64_t Idx = Op.getConstantOperandVal(1);
1306 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308
1309 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310 Depth + 1))
1311 return true;
1312
1313 // Attempt to avoid multi-use src if we don't need anything from it.
1314 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317 if (DemandedSrc) {
1318 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319 Op.getOperand(1));
1320 return TLO.CombineTo(Op, NewOp);
1321 }
1322 }
1323 break;
1324 }
1325 case ISD::CONCAT_VECTORS: {
1326 if (VT.isScalableVector())
1327 return false;
1328 Known.Zero.setAllBits();
1329 Known.One.setAllBits();
1330 EVT SubVT = Op.getOperand(0).getValueType();
1331 unsigned NumSubVecs = Op.getNumOperands();
1332 unsigned NumSubElts = SubVT.getVectorNumElements();
1333 for (unsigned i = 0; i != NumSubVecs; ++i) {
1334 APInt DemandedSubElts =
1335 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337 Known2, TLO, Depth + 1))
1338 return true;
1339 // Known bits are shared by every demanded subvector element.
1340 if (!!DemandedSubElts)
1341 Known = Known.intersectWith(Known2);
1342 }
1343 break;
1344 }
1345 case ISD::VECTOR_SHUFFLE: {
1346 assert(!VT.isScalableVector());
1347 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348
1349 // Collect demanded elements from shuffle operands..
1350 APInt DemandedLHS, DemandedRHS;
1351 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352 DemandedRHS))
1353 break;
1354
1355 if (!!DemandedLHS || !!DemandedRHS) {
1356 SDValue Op0 = Op.getOperand(0);
1357 SDValue Op1 = Op.getOperand(1);
1358
1359 Known.Zero.setAllBits();
1360 Known.One.setAllBits();
1361 if (!!DemandedLHS) {
1362 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363 Depth + 1))
1364 return true;
1365 Known = Known.intersectWith(Known2);
1366 }
1367 if (!!DemandedRHS) {
1368 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369 Depth + 1))
1370 return true;
1371 Known = Known.intersectWith(Known2);
1372 }
1373
1374 // Attempt to avoid multi-use ops if we don't need anything from them.
1375 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379 if (DemandedOp0 || DemandedOp1) {
1380 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383 return TLO.CombineTo(Op, NewOp);
1384 }
1385 }
1386 break;
1387 }
1388 case ISD::AND: {
1389 SDValue Op0 = Op.getOperand(0);
1390 SDValue Op1 = Op.getOperand(1);
1391
1392 // If the RHS is a constant, check to see if the LHS would be zero without
1393 // using the bits from the RHS. Below, we use knowledge about the RHS to
1394 // simplify the LHS, here we're using information from the LHS to simplify
1395 // the RHS.
1396 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397 // Do not increment Depth here; that can cause an infinite loop.
1398 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400 if ((LHSKnown.Zero & DemandedBits) ==
1401 (~RHSC->getAPIntValue() & DemandedBits))
1402 return TLO.CombineTo(Op, Op0);
1403
1404 // If any of the set bits in the RHS are known zero on the LHS, shrink
1405 // the constant.
1406 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407 DemandedElts, TLO))
1408 return true;
1409
1410 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411 // constant, but if this 'and' is only clearing bits that were just set by
1412 // the xor, then this 'and' can be eliminated by shrinking the mask of
1413 // the xor. For example, for a 32-bit X:
1414 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416 LHSKnown.One == ~RHSC->getAPIntValue()) {
1417 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418 return TLO.CombineTo(Op, Xor);
1419 }
1420 }
1421
1422 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425 (Op0.getOperand(0).isUndef() ||
1427 Op0->hasOneUse()) {
1428 unsigned NumSubElts =
1430 unsigned SubIdx = Op0.getConstantOperandVal(2);
1431 APInt DemandedSub =
1432 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433 KnownBits KnownSubMask =
1434 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436 SDValue NewAnd =
1437 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438 SDValue NewInsert =
1439 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440 Op0.getOperand(1), Op0.getOperand(2));
1441 return TLO.CombineTo(Op, NewInsert);
1442 }
1443 }
1444
1445 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446 Depth + 1))
1447 return true;
1448 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449 Known2, TLO, Depth + 1))
1450 return true;
1451
1452 // If all of the demanded bits are known one on one side, return the other.
1453 // These bits cannot contribute to the result of the 'and'.
1454 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455 return TLO.CombineTo(Op, Op0);
1456 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457 return TLO.CombineTo(Op, Op1);
1458 // If all of the demanded bits in the inputs are known zeros, return zero.
1459 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461 // If the RHS is a constant, see if we can simplify it.
1462 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463 TLO))
1464 return true;
1465 // If the operation can be done in a smaller type, do so.
1466 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467 return true;
1468
1469 // Attempt to avoid multi-use ops if we don't need anything from them.
1470 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475 if (DemandedOp0 || DemandedOp1) {
1476 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479 return TLO.CombineTo(Op, NewOp);
1480 }
1481 }
1482
1483 Known &= Known2;
1484 break;
1485 }
1486 case ISD::OR: {
1487 SDValue Op0 = Op.getOperand(0);
1488 SDValue Op1 = Op.getOperand(1);
1489 SDNodeFlags Flags = Op.getNode()->getFlags();
1490 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1491 Depth + 1)) {
1492 if (Flags.hasDisjoint()) {
1493 Flags.setDisjoint(false);
1494 Op->setFlags(Flags);
1495 }
1496 return true;
1497 }
1498
1499 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1500 Known2, TLO, Depth + 1)) {
1501 if (Flags.hasDisjoint()) {
1502 Flags.setDisjoint(false);
1503 Op->setFlags(Flags);
1504 }
1505 return true;
1506 }
1507
1508 // If all of the demanded bits are known zero on one side, return the other.
1509 // These bits cannot contribute to the result of the 'or'.
1510 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1511 return TLO.CombineTo(Op, Op0);
1512 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1513 return TLO.CombineTo(Op, Op1);
1514 // If the RHS is a constant, see if we can simplify it.
1515 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1516 return true;
1517 // If the operation can be done in a smaller type, do so.
1518 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1519 return true;
1520
1521 // Attempt to avoid multi-use ops if we don't need anything from them.
1522 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1523 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1524 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1525 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1526 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1527 if (DemandedOp0 || DemandedOp1) {
1528 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1529 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1530 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1531 return TLO.CombineTo(Op, NewOp);
1532 }
1533 }
1534
1535 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1536 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1537 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1538 Op0->hasOneUse() && Op1->hasOneUse()) {
1539 // Attempt to match all commutations - m_c_Or would've been useful!
1540 for (int I = 0; I != 2; ++I) {
1541 SDValue X = Op.getOperand(I).getOperand(0);
1542 SDValue C1 = Op.getOperand(I).getOperand(1);
1543 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1544 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1545 if (Alt.getOpcode() == ISD::OR) {
1546 for (int J = 0; J != 2; ++J) {
1547 if (X == Alt.getOperand(J)) {
1548 SDValue Y = Alt.getOperand(1 - J);
1549 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1550 {C1, C2})) {
1551 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1552 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1553 return TLO.CombineTo(
1554 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1555 }
1556 }
1557 }
1558 }
1559 }
1560 }
1561
1562 Known |= Known2;
1563 break;
1564 }
1565 case ISD::XOR: {
1566 SDValue Op0 = Op.getOperand(0);
1567 SDValue Op1 = Op.getOperand(1);
1568
1569 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1570 Depth + 1))
1571 return true;
1572 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1573 Depth + 1))
1574 return true;
1575
1576 // If all of the demanded bits are known zero on one side, return the other.
1577 // These bits cannot contribute to the result of the 'xor'.
1578 if (DemandedBits.isSubsetOf(Known.Zero))
1579 return TLO.CombineTo(Op, Op0);
1580 if (DemandedBits.isSubsetOf(Known2.Zero))
1581 return TLO.CombineTo(Op, Op1);
1582 // If the operation can be done in a smaller type, do so.
1583 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1584 return true;
1585
1586 // If all of the unknown bits are known to be zero on one side or the other
1587 // turn this into an *inclusive* or.
1588 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1589 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1590 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1591
1592 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1593 if (C) {
1594 // If one side is a constant, and all of the set bits in the constant are
1595 // also known set on the other side, turn this into an AND, as we know
1596 // the bits will be cleared.
1597 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1598 // NB: it is okay if more bits are known than are requested
1599 if (C->getAPIntValue() == Known2.One) {
1600 SDValue ANDC =
1601 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1602 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1603 }
1604
1605 // If the RHS is a constant, see if we can change it. Don't alter a -1
1606 // constant because that's a 'not' op, and that is better for combining
1607 // and codegen.
1608 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1609 // We're flipping all demanded bits. Flip the undemanded bits too.
1610 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1611 return TLO.CombineTo(Op, New);
1612 }
1613
1614 unsigned Op0Opcode = Op0.getOpcode();
1615 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1616 if (ConstantSDNode *ShiftC =
1617 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1618 // Don't crash on an oversized shift. We can not guarantee that a
1619 // bogus shift has been simplified to undef.
1620 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1621 uint64_t ShiftAmt = ShiftC->getZExtValue();
1623 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1624 : Ones.lshr(ShiftAmt);
1625 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1626 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1627 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1628 // If the xor constant is a demanded mask, do a 'not' before the
1629 // shift:
1630 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1631 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1632 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1633 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1634 Op0.getOperand(1)));
1635 }
1636 }
1637 }
1638 }
1639 }
1640
1641 // If we can't turn this into a 'not', try to shrink the constant.
1642 if (!C || !C->isAllOnes())
1643 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1644 return true;
1645
1646 // Attempt to avoid multi-use ops if we don't need anything from them.
1647 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1648 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1649 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1650 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1651 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1652 if (DemandedOp0 || DemandedOp1) {
1653 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1654 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1655 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1656 return TLO.CombineTo(Op, NewOp);
1657 }
1658 }
1659
1660 Known ^= Known2;
1661 break;
1662 }
1663 case ISD::SELECT:
1664 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1665 Known, TLO, Depth + 1))
1666 return true;
1667 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1668 Known2, TLO, Depth + 1))
1669 return true;
1670
1671 // If the operands are constants, see if we can simplify them.
1672 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1673 return true;
1674
1675 // Only known if known in both the LHS and RHS.
1676 Known = Known.intersectWith(Known2);
1677 break;
1678 case ISD::VSELECT:
1679 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1680 Known, TLO, Depth + 1))
1681 return true;
1682 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1683 Known2, TLO, Depth + 1))
1684 return true;
1685
1686 // Only known if known in both the LHS and RHS.
1687 Known = Known.intersectWith(Known2);
1688 break;
1689 case ISD::SELECT_CC:
1690 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1691 Known, TLO, Depth + 1))
1692 return true;
1693 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1694 Known2, TLO, Depth + 1))
1695 return true;
1696
1697 // If the operands are constants, see if we can simplify them.
1698 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1699 return true;
1700
1701 // Only known if known in both the LHS and RHS.
1702 Known = Known.intersectWith(Known2);
1703 break;
1704 case ISD::SETCC: {
1705 SDValue Op0 = Op.getOperand(0);
1706 SDValue Op1 = Op.getOperand(1);
1707 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1708 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1709 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1710 // -1, we may be able to bypass the setcc.
1711 if (DemandedBits.isSignMask() &&
1715 // If we're testing X < 0, then this compare isn't needed - just use X!
1716 // FIXME: We're limiting to integer types here, but this should also work
1717 // if we don't care about FP signed-zero. The use of SETLT with FP means
1718 // that we don't care about NaNs.
1719 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1721 return TLO.CombineTo(Op, Op0);
1722
1723 // TODO: Should we check for other forms of sign-bit comparisons?
1724 // Examples: X <= -1, X >= 0
1725 }
1726 if (getBooleanContents(Op0.getValueType()) ==
1728 BitWidth > 1)
1729 Known.Zero.setBitsFrom(1);
1730 break;
1731 }
1732 case ISD::SHL: {
1733 SDValue Op0 = Op.getOperand(0);
1734 SDValue Op1 = Op.getOperand(1);
1735 EVT ShiftVT = Op1.getValueType();
1736
1737 if (std::optional<uint64_t> KnownSA =
1738 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1739 unsigned ShAmt = *KnownSA;
1740 if (ShAmt == 0)
1741 return TLO.CombineTo(Op, Op0);
1742
1743 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1744 // single shift. We can do this if the bottom bits (which are shifted
1745 // out) are never demanded.
1746 // TODO - support non-uniform vector amounts.
1747 if (Op0.getOpcode() == ISD::SRL) {
1748 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1749 if (std::optional<uint64_t> InnerSA =
1750 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1751 unsigned C1 = *InnerSA;
1752 unsigned Opc = ISD::SHL;
1753 int Diff = ShAmt - C1;
1754 if (Diff < 0) {
1755 Diff = -Diff;
1756 Opc = ISD::SRL;
1757 }
1758 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1759 return TLO.CombineTo(
1760 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1761 }
1762 }
1763 }
1764
1765 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1766 // are not demanded. This will likely allow the anyext to be folded away.
1767 // TODO - support non-uniform vector amounts.
1768 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1769 SDValue InnerOp = Op0.getOperand(0);
1770 EVT InnerVT = InnerOp.getValueType();
1771 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1772 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1773 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1774 SDValue NarrowShl = TLO.DAG.getNode(
1775 ISD::SHL, dl, InnerVT, InnerOp,
1776 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1777 return TLO.CombineTo(
1778 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1779 }
1780
1781 // Repeat the SHL optimization above in cases where an extension
1782 // intervenes: (shl (anyext (shr x, c1)), c2) to
1783 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1784 // aren't demanded (as above) and that the shifted upper c1 bits of
1785 // x aren't demanded.
1786 // TODO - support non-uniform vector amounts.
1787 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1788 InnerOp.hasOneUse()) {
1789 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1790 InnerOp, DemandedElts, Depth + 2)) {
1791 unsigned InnerShAmt = *SA2;
1792 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1793 DemandedBits.getActiveBits() <=
1794 (InnerBits - InnerShAmt + ShAmt) &&
1795 DemandedBits.countr_zero() >= ShAmt) {
1796 SDValue NewSA =
1797 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1798 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1799 InnerOp.getOperand(0));
1800 return TLO.CombineTo(
1801 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1802 }
1803 }
1804 }
1805 }
1806
1807 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1808 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1809 Depth + 1)) {
1810 SDNodeFlags Flags = Op.getNode()->getFlags();
1811 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1812 // Disable the nsw and nuw flags. We can no longer guarantee that we
1813 // won't wrap after simplification.
1814 Flags.setNoSignedWrap(false);
1815 Flags.setNoUnsignedWrap(false);
1816 Op->setFlags(Flags);
1817 }
1818 return true;
1819 }
1820 Known.Zero <<= ShAmt;
1821 Known.One <<= ShAmt;
1822 // low bits known zero.
1823 Known.Zero.setLowBits(ShAmt);
1824
1825 // Attempt to avoid multi-use ops if we don't need anything from them.
1826 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1827 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1828 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1829 if (DemandedOp0) {
1830 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1831 return TLO.CombineTo(Op, NewOp);
1832 }
1833 }
1834
1835 // TODO: Can we merge this fold with the one below?
1836 // Try shrinking the operation as long as the shift amount will still be
1837 // in range.
1838 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1839 Op.getNode()->hasOneUse()) {
1840 // Search for the smallest integer type with free casts to and from
1841 // Op's type. For expedience, just check power-of-2 integer types.
1842 unsigned DemandedSize = DemandedBits.getActiveBits();
1843 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1844 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1845 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1846 if (isNarrowingProfitable(VT, SmallVT) &&
1847 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1848 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1849 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1850 assert(DemandedSize <= SmallVTBits &&
1851 "Narrowed below demanded bits?");
1852 // We found a type with free casts.
1853 SDValue NarrowShl = TLO.DAG.getNode(
1854 ISD::SHL, dl, SmallVT,
1855 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1856 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1857 return TLO.CombineTo(
1858 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1859 }
1860 }
1861 }
1862
1863 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1864 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1865 // Only do this if we demand the upper half so the knownbits are correct.
1866 unsigned HalfWidth = BitWidth / 2;
1867 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1868 DemandedBits.countLeadingOnes() >= HalfWidth) {
1869 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1870 if (isNarrowingProfitable(VT, HalfVT) &&
1871 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1872 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1873 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1874 // If we're demanding the upper bits at all, we must ensure
1875 // that the upper bits of the shift result are known to be zero,
1876 // which is equivalent to the narrow shift being NUW.
1877 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1878 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1879 SDNodeFlags Flags;
1880 Flags.setNoSignedWrap(IsNSW);
1881 Flags.setNoUnsignedWrap(IsNUW);
1882 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1883 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1884 ShAmt, HalfVT, dl, TLO.LegalTypes());
1885 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1886 NewShiftAmt, Flags);
1887 SDValue NewExt =
1888 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1889 return TLO.CombineTo(Op, NewExt);
1890 }
1891 }
1892 }
1893 } else {
1894 // This is a variable shift, so we can't shift the demand mask by a known
1895 // amount. But if we are not demanding high bits, then we are not
1896 // demanding those bits from the pre-shifted operand either.
1897 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1898 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1899 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1900 Depth + 1)) {
1901 SDNodeFlags Flags = Op.getNode()->getFlags();
1902 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1903 // Disable the nsw and nuw flags. We can no longer guarantee that we
1904 // won't wrap after simplification.
1905 Flags.setNoSignedWrap(false);
1906 Flags.setNoUnsignedWrap(false);
1907 Op->setFlags(Flags);
1908 }
1909 return true;
1910 }
1911 Known.resetAll();
1912 }
1913 }
1914
1915 // If we are only demanding sign bits then we can use the shift source
1916 // directly.
1917 if (std::optional<uint64_t> MaxSA =
1918 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1919 unsigned ShAmt = *MaxSA;
1920 unsigned NumSignBits =
1921 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1922 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1923 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1924 return TLO.CombineTo(Op, Op0);
1925 }
1926 break;
1927 }
1928 case ISD::SRL: {
1929 SDValue Op0 = Op.getOperand(0);
1930 SDValue Op1 = Op.getOperand(1);
1931 EVT ShiftVT = Op1.getValueType();
1932
1933 if (std::optional<uint64_t> KnownSA =
1934 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1935 unsigned ShAmt = *KnownSA;
1936 if (ShAmt == 0)
1937 return TLO.CombineTo(Op, Op0);
1938
1939 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1940 // single shift. We can do this if the top bits (which are shifted out)
1941 // are never demanded.
1942 // TODO - support non-uniform vector amounts.
1943 if (Op0.getOpcode() == ISD::SHL) {
1944 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1945 if (std::optional<uint64_t> InnerSA =
1946 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1947 unsigned C1 = *InnerSA;
1948 unsigned Opc = ISD::SRL;
1949 int Diff = ShAmt - C1;
1950 if (Diff < 0) {
1951 Diff = -Diff;
1952 Opc = ISD::SHL;
1953 }
1954 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1955 return TLO.CombineTo(
1956 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1957 }
1958 }
1959 }
1960
1961 APInt InDemandedMask = (DemandedBits << ShAmt);
1962
1963 // If the shift is exact, then it does demand the low bits (and knows that
1964 // they are zero).
1965 if (Op->getFlags().hasExact())
1966 InDemandedMask.setLowBits(ShAmt);
1967
1968 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1969 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1970 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1972 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1973 if (isNarrowingProfitable(VT, HalfVT) &&
1974 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1975 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1976 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1977 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1978 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1979 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1980 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1981 ShAmt, HalfVT, dl, TLO.LegalTypes());
1982 SDValue NewShift =
1983 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1984 return TLO.CombineTo(
1985 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1986 }
1987 }
1988
1989 // Compute the new bits that are at the top now.
1990 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1991 Depth + 1))
1992 return true;
1993 Known.Zero.lshrInPlace(ShAmt);
1994 Known.One.lshrInPlace(ShAmt);
1995 // High bits known zero.
1996 Known.Zero.setHighBits(ShAmt);
1997
1998 // Attempt to avoid multi-use ops if we don't need anything from them.
1999 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2000 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2001 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2002 if (DemandedOp0) {
2003 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2004 return TLO.CombineTo(Op, NewOp);
2005 }
2006 }
2007 } else {
2008 // Use generic knownbits computation as it has support for non-uniform
2009 // shift amounts.
2010 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2011 }
2012
2013 // Try to match AVG patterns (after shift simplification).
2014 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2015 DemandedElts, Depth + 1))
2016 return TLO.CombineTo(Op, AVG);
2017
2018 break;
2019 }
2020 case ISD::SRA: {
2021 SDValue Op0 = Op.getOperand(0);
2022 SDValue Op1 = Op.getOperand(1);
2023 EVT ShiftVT = Op1.getValueType();
2024
2025 // If we only want bits that already match the signbit then we don't need
2026 // to shift.
2027 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2028 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2029 NumHiDemandedBits)
2030 return TLO.CombineTo(Op, Op0);
2031
2032 // If this is an arithmetic shift right and only the low-bit is set, we can
2033 // always convert this into a logical shr, even if the shift amount is
2034 // variable. The low bit of the shift cannot be an input sign bit unless
2035 // the shift amount is >= the size of the datatype, which is undefined.
2036 if (DemandedBits.isOne())
2037 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2038
2039 if (std::optional<uint64_t> KnownSA =
2040 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2041 unsigned ShAmt = *KnownSA;
2042 if (ShAmt == 0)
2043 return TLO.CombineTo(Op, Op0);
2044
2045 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2046 // supports sext_inreg.
2047 if (Op0.getOpcode() == ISD::SHL) {
2048 if (std::optional<uint64_t> InnerSA =
2049 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2050 unsigned LowBits = BitWidth - ShAmt;
2051 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2052 if (VT.isVector())
2053 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2055
2056 if (*InnerSA == ShAmt) {
2057 if (!TLO.LegalOperations() ||
2059 return TLO.CombineTo(
2060 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2061 Op0.getOperand(0),
2062 TLO.DAG.getValueType(ExtVT)));
2063
2064 // Even if we can't convert to sext_inreg, we might be able to
2065 // remove this shift pair if the input is already sign extended.
2066 unsigned NumSignBits =
2067 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2068 if (NumSignBits > ShAmt)
2069 return TLO.CombineTo(Op, Op0.getOperand(0));
2070 }
2071 }
2072 }
2073
2074 APInt InDemandedMask = (DemandedBits << ShAmt);
2075
2076 // If the shift is exact, then it does demand the low bits (and knows that
2077 // they are zero).
2078 if (Op->getFlags().hasExact())
2079 InDemandedMask.setLowBits(ShAmt);
2080
2081 // If any of the demanded bits are produced by the sign extension, we also
2082 // demand the input sign bit.
2083 if (DemandedBits.countl_zero() < ShAmt)
2084 InDemandedMask.setSignBit();
2085
2086 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2087 Depth + 1))
2088 return true;
2089 Known.Zero.lshrInPlace(ShAmt);
2090 Known.One.lshrInPlace(ShAmt);
2091
2092 // If the input sign bit is known to be zero, or if none of the top bits
2093 // are demanded, turn this into an unsigned shift right.
2094 if (Known.Zero[BitWidth - ShAmt - 1] ||
2095 DemandedBits.countl_zero() >= ShAmt) {
2096 SDNodeFlags Flags;
2097 Flags.setExact(Op->getFlags().hasExact());
2098 return TLO.CombineTo(
2099 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2100 }
2101
2102 int Log2 = DemandedBits.exactLogBase2();
2103 if (Log2 >= 0) {
2104 // The bit must come from the sign.
2105 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2106 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2107 }
2108
2109 if (Known.One[BitWidth - ShAmt - 1])
2110 // New bits are known one.
2111 Known.One.setHighBits(ShAmt);
2112
2113 // Attempt to avoid multi-use ops if we don't need anything from them.
2114 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2115 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2116 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2117 if (DemandedOp0) {
2118 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2119 return TLO.CombineTo(Op, NewOp);
2120 }
2121 }
2122 }
2123
2124 // Try to match AVG patterns (after shift simplification).
2125 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2126 DemandedElts, Depth + 1))
2127 return TLO.CombineTo(Op, AVG);
2128
2129 break;
2130 }
2131 case ISD::FSHL:
2132 case ISD::FSHR: {
2133 SDValue Op0 = Op.getOperand(0);
2134 SDValue Op1 = Op.getOperand(1);
2135 SDValue Op2 = Op.getOperand(2);
2136 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2137
2138 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2139 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2140
2141 // For fshl, 0-shift returns the 1st arg.
2142 // For fshr, 0-shift returns the 2nd arg.
2143 if (Amt == 0) {
2144 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2145 Known, TLO, Depth + 1))
2146 return true;
2147 break;
2148 }
2149
2150 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2151 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2152 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2153 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2154 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2155 Depth + 1))
2156 return true;
2157 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2158 Depth + 1))
2159 return true;
2160
2161 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2162 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2163 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2164 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2165 Known = Known.unionWith(Known2);
2166
2167 // Attempt to avoid multi-use ops if we don't need anything from them.
2168 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2169 !DemandedElts.isAllOnes()) {
2170 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2172 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2173 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2174 if (DemandedOp0 || DemandedOp1) {
2175 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2176 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2177 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2178 DemandedOp1, Op2);
2179 return TLO.CombineTo(Op, NewOp);
2180 }
2181 }
2182 }
2183
2184 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2185 if (isPowerOf2_32(BitWidth)) {
2186 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2187 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2188 Known2, TLO, Depth + 1))
2189 return true;
2190 }
2191 break;
2192 }
2193 case ISD::ROTL:
2194 case ISD::ROTR: {
2195 SDValue Op0 = Op.getOperand(0);
2196 SDValue Op1 = Op.getOperand(1);
2197 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2198
2199 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2200 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2201 return TLO.CombineTo(Op, Op0);
2202
2203 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2204 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2205 unsigned RevAmt = BitWidth - Amt;
2206
2207 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2208 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2209 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2210 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2211 Depth + 1))
2212 return true;
2213
2214 // rot*(x, 0) --> x
2215 if (Amt == 0)
2216 return TLO.CombineTo(Op, Op0);
2217
2218 // See if we don't demand either half of the rotated bits.
2219 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2220 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2221 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2222 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2223 }
2224 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2225 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2226 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2227 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2228 }
2229 }
2230
2231 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2232 if (isPowerOf2_32(BitWidth)) {
2233 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2234 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2235 Depth + 1))
2236 return true;
2237 }
2238 break;
2239 }
2240 case ISD::SMIN:
2241 case ISD::SMAX:
2242 case ISD::UMIN:
2243 case ISD::UMAX: {
2244 unsigned Opc = Op.getOpcode();
2245 SDValue Op0 = Op.getOperand(0);
2246 SDValue Op1 = Op.getOperand(1);
2247
2248 // If we're only demanding signbits, then we can simplify to OR/AND node.
2249 unsigned BitOp =
2250 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2251 unsigned NumSignBits =
2252 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2253 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2254 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2255 if (NumSignBits >= NumDemandedUpperBits)
2256 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2257
2258 // Check if one arg is always less/greater than (or equal) to the other arg.
2259 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2260 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2261 switch (Opc) {
2262 case ISD::SMIN:
2263 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2264 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2265 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2266 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2267 Known = KnownBits::smin(Known0, Known1);
2268 break;
2269 case ISD::SMAX:
2270 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2271 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2272 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2273 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2274 Known = KnownBits::smax(Known0, Known1);
2275 break;
2276 case ISD::UMIN:
2277 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2278 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2279 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2280 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2281 Known = KnownBits::umin(Known0, Known1);
2282 break;
2283 case ISD::UMAX:
2284 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2285 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2286 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2287 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2288 Known = KnownBits::umax(Known0, Known1);
2289 break;
2290 }
2291 break;
2292 }
2293 case ISD::BITREVERSE: {
2294 SDValue Src = Op.getOperand(0);
2295 APInt DemandedSrcBits = DemandedBits.reverseBits();
2296 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2297 Depth + 1))
2298 return true;
2299 Known.One = Known2.One.reverseBits();
2300 Known.Zero = Known2.Zero.reverseBits();
2301 break;
2302 }
2303 case ISD::BSWAP: {
2304 SDValue Src = Op.getOperand(0);
2305
2306 // If the only bits demanded come from one byte of the bswap result,
2307 // just shift the input byte into position to eliminate the bswap.
2308 unsigned NLZ = DemandedBits.countl_zero();
2309 unsigned NTZ = DemandedBits.countr_zero();
2310
2311 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2312 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2313 // have 14 leading zeros, round to 8.
2314 NLZ = alignDown(NLZ, 8);
2315 NTZ = alignDown(NTZ, 8);
2316 // If we need exactly one byte, we can do this transformation.
2317 if (BitWidth - NLZ - NTZ == 8) {
2318 // Replace this with either a left or right shift to get the byte into
2319 // the right place.
2320 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2321 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2322 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2323 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2324 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2325 return TLO.CombineTo(Op, NewOp);
2326 }
2327 }
2328
2329 APInt DemandedSrcBits = DemandedBits.byteSwap();
2330 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2331 Depth + 1))
2332 return true;
2333 Known.One = Known2.One.byteSwap();
2334 Known.Zero = Known2.Zero.byteSwap();
2335 break;
2336 }
2337 case ISD::CTPOP: {
2338 // If only 1 bit is demanded, replace with PARITY as long as we're before
2339 // op legalization.
2340 // FIXME: Limit to scalars for now.
2341 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2343 Op.getOperand(0)));
2344
2345 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2346 break;
2347 }
2349 SDValue Op0 = Op.getOperand(0);
2350 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2351 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2352
2353 // If we only care about the highest bit, don't bother shifting right.
2354 if (DemandedBits.isSignMask()) {
2355 unsigned MinSignedBits =
2356 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2357 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2358 // However if the input is already sign extended we expect the sign
2359 // extension to be dropped altogether later and do not simplify.
2360 if (!AlreadySignExtended) {
2361 // Compute the correct shift amount type, which must be getShiftAmountTy
2362 // for scalar types after legalization.
2363 SDValue ShiftAmt =
2364 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2365 return TLO.CombineTo(Op,
2366 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2367 }
2368 }
2369
2370 // If none of the extended bits are demanded, eliminate the sextinreg.
2371 if (DemandedBits.getActiveBits() <= ExVTBits)
2372 return TLO.CombineTo(Op, Op0);
2373
2374 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2375
2376 // Since the sign extended bits are demanded, we know that the sign
2377 // bit is demanded.
2378 InputDemandedBits.setBit(ExVTBits - 1);
2379
2380 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2381 Depth + 1))
2382 return true;
2383
2384 // If the sign bit of the input is known set or clear, then we know the
2385 // top bits of the result.
2386
2387 // If the input sign bit is known zero, convert this into a zero extension.
2388 if (Known.Zero[ExVTBits - 1])
2389 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2390
2391 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2392 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2393 Known.One.setBitsFrom(ExVTBits);
2394 Known.Zero &= Mask;
2395 } else { // Input sign bit unknown
2396 Known.Zero &= Mask;
2397 Known.One &= Mask;
2398 }
2399 break;
2400 }
2401 case ISD::BUILD_PAIR: {
2402 EVT HalfVT = Op.getOperand(0).getValueType();
2403 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2404
2405 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2406 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2407
2408 KnownBits KnownLo, KnownHi;
2409
2410 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2411 return true;
2412
2413 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2414 return true;
2415
2416 Known = KnownHi.concat(KnownLo);
2417 break;
2418 }
2420 if (VT.isScalableVector())
2421 return false;
2422 [[fallthrough]];
2423 case ISD::ZERO_EXTEND: {
2424 SDValue Src = Op.getOperand(0);
2425 EVT SrcVT = Src.getValueType();
2426 unsigned InBits = SrcVT.getScalarSizeInBits();
2427 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2428 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2429
2430 // If none of the top bits are demanded, convert this into an any_extend.
2431 if (DemandedBits.getActiveBits() <= InBits) {
2432 // If we only need the non-extended bits of the bottom element
2433 // then we can just bitcast to the result.
2434 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2435 VT.getSizeInBits() == SrcVT.getSizeInBits())
2436 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2437
2438 unsigned Opc =
2440 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2441 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2442 }
2443
2444 SDNodeFlags Flags = Op->getFlags();
2445 APInt InDemandedBits = DemandedBits.trunc(InBits);
2446 APInt InDemandedElts = DemandedElts.zext(InElts);
2447 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2448 Depth + 1)) {
2449 if (Flags.hasNonNeg()) {
2450 Flags.setNonNeg(false);
2451 Op->setFlags(Flags);
2452 }
2453 return true;
2454 }
2455 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2456 Known = Known.zext(BitWidth);
2457
2458 // Attempt to avoid multi-use ops if we don't need anything from them.
2459 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2460 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2461 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2462 break;
2463 }
2465 if (VT.isScalableVector())
2466 return false;
2467 [[fallthrough]];
2468 case ISD::SIGN_EXTEND: {
2469 SDValue Src = Op.getOperand(0);
2470 EVT SrcVT = Src.getValueType();
2471 unsigned InBits = SrcVT.getScalarSizeInBits();
2472 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2474
2475 APInt InDemandedElts = DemandedElts.zext(InElts);
2476 APInt InDemandedBits = DemandedBits.trunc(InBits);
2477
2478 // Since some of the sign extended bits are demanded, we know that the sign
2479 // bit is demanded.
2480 InDemandedBits.setBit(InBits - 1);
2481
2482 // If none of the top bits are demanded, convert this into an any_extend.
2483 if (DemandedBits.getActiveBits() <= InBits) {
2484 // If we only need the non-extended bits of the bottom element
2485 // then we can just bitcast to the result.
2486 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2487 VT.getSizeInBits() == SrcVT.getSizeInBits())
2488 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2489
2490 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2492 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2493 InBits) {
2494 unsigned Opc =
2496 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2497 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2498 }
2499 }
2500
2501 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2502 Depth + 1))
2503 return true;
2504 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2505
2506 // If the sign bit is known one, the top bits match.
2507 Known = Known.sext(BitWidth);
2508
2509 // If the sign bit is known zero, convert this to a zero extend.
2510 if (Known.isNonNegative()) {
2511 unsigned Opc =
2513 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2514 SDNodeFlags Flags;
2515 if (!IsVecInReg)
2516 Flags.setNonNeg(true);
2517 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2518 }
2519 }
2520
2521 // Attempt to avoid multi-use ops if we don't need anything from them.
2522 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2523 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2524 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2525 break;
2526 }
2528 if (VT.isScalableVector())
2529 return false;
2530 [[fallthrough]];
2531 case ISD::ANY_EXTEND: {
2532 SDValue Src = Op.getOperand(0);
2533 EVT SrcVT = Src.getValueType();
2534 unsigned InBits = SrcVT.getScalarSizeInBits();
2535 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2536 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2537
2538 // If we only need the bottom element then we can just bitcast.
2539 // TODO: Handle ANY_EXTEND?
2540 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2541 VT.getSizeInBits() == SrcVT.getSizeInBits())
2542 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2543
2544 APInt InDemandedBits = DemandedBits.trunc(InBits);
2545 APInt InDemandedElts = DemandedElts.zext(InElts);
2546 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2547 Depth + 1))
2548 return true;
2549 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2550 Known = Known.anyext(BitWidth);
2551
2552 // Attempt to avoid multi-use ops if we don't need anything from them.
2553 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2554 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2555 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2556 break;
2557 }
2558 case ISD::TRUNCATE: {
2559 SDValue Src = Op.getOperand(0);
2560
2561 // Simplify the input, using demanded bit information, and compute the known
2562 // zero/one bits live out.
2563 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2564 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2565 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2566 Depth + 1))
2567 return true;
2568 Known = Known.trunc(BitWidth);
2569
2570 // Attempt to avoid multi-use ops if we don't need anything from them.
2571 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2572 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2574
2575 // If the input is only used by this truncate, see if we can shrink it based
2576 // on the known demanded bits.
2577 switch (Src.getOpcode()) {
2578 default:
2579 break;
2580 case ISD::SRL:
2581 // Shrink SRL by a constant if none of the high bits shifted in are
2582 // demanded.
2583 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2584 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2585 // undesirable.
2586 break;
2587
2588 if (Src.getNode()->hasOneUse()) {
2589 std::optional<uint64_t> ShAmtC =
2590 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2591 if (!ShAmtC || *ShAmtC >= BitWidth)
2592 break;
2593 uint64_t ShVal = *ShAmtC;
2594
2595 APInt HighBits =
2596 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2597 HighBits.lshrInPlace(ShVal);
2598 HighBits = HighBits.trunc(BitWidth);
2599
2600 if (!(HighBits & DemandedBits)) {
2601 // None of the shifted in bits are needed. Add a truncate of the
2602 // shift input, then shift it.
2603 SDValue NewShAmt =
2604 TLO.DAG.getShiftAmountConstant(ShVal, VT, dl, TLO.LegalTypes());
2605 SDValue NewTrunc =
2606 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2607 return TLO.CombineTo(
2608 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2609 }
2610 }
2611 break;
2612 }
2613
2614 break;
2615 }
2616 case ISD::AssertZext: {
2617 // AssertZext demands all of the high bits, plus any of the low bits
2618 // demanded by its users.
2619 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2621 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2622 TLO, Depth + 1))
2623 return true;
2624
2625 Known.Zero |= ~InMask;
2626 Known.One &= (~Known.Zero);
2627 break;
2628 }
2630 SDValue Src = Op.getOperand(0);
2631 SDValue Idx = Op.getOperand(1);
2632 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2633 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2634
2635 if (SrcEltCnt.isScalable())
2636 return false;
2637
2638 // Demand the bits from every vector element without a constant index.
2639 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2640 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2641 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2642 if (CIdx->getAPIntValue().ult(NumSrcElts))
2643 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2644
2645 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2646 // anything about the extended bits.
2647 APInt DemandedSrcBits = DemandedBits;
2648 if (BitWidth > EltBitWidth)
2649 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2650
2651 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2652 Depth + 1))
2653 return true;
2654
2655 // Attempt to avoid multi-use ops if we don't need anything from them.
2656 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2657 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2658 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2659 SDValue NewOp =
2660 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2661 return TLO.CombineTo(Op, NewOp);
2662 }
2663 }
2664
2665 Known = Known2;
2666 if (BitWidth > EltBitWidth)
2667 Known = Known.anyext(BitWidth);
2668 break;
2669 }
2670 case ISD::BITCAST: {
2671 if (VT.isScalableVector())
2672 return false;
2673 SDValue Src = Op.getOperand(0);
2674 EVT SrcVT = Src.getValueType();
2675 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2676
2677 // If this is an FP->Int bitcast and if the sign bit is the only
2678 // thing demanded, turn this into a FGETSIGN.
2679 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2680 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2681 SrcVT.isFloatingPoint()) {
2682 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2683 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2684 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2685 SrcVT != MVT::f128) {
2686 // Cannot eliminate/lower SHL for f128 yet.
2687 EVT Ty = OpVTLegal ? VT : MVT::i32;
2688 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2689 // place. We expect the SHL to be eliminated by other optimizations.
2690 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2691 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2692 if (!OpVTLegal && OpVTSizeInBits > 32)
2693 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2694 unsigned ShVal = Op.getValueSizeInBits() - 1;
2695 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2696 return TLO.CombineTo(Op,
2697 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2698 }
2699 }
2700
2701 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2702 // Demand the elt/bit if any of the original elts/bits are demanded.
2703 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2704 unsigned Scale = BitWidth / NumSrcEltBits;
2705 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2706 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2707 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2708 for (unsigned i = 0; i != Scale; ++i) {
2709 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2710 unsigned BitOffset = EltOffset * NumSrcEltBits;
2711 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2712 if (!Sub.isZero()) {
2713 DemandedSrcBits |= Sub;
2714 for (unsigned j = 0; j != NumElts; ++j)
2715 if (DemandedElts[j])
2716 DemandedSrcElts.setBit((j * Scale) + i);
2717 }
2718 }
2719
2720 APInt KnownSrcUndef, KnownSrcZero;
2721 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2722 KnownSrcZero, TLO, Depth + 1))
2723 return true;
2724
2725 KnownBits KnownSrcBits;
2726 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2727 KnownSrcBits, TLO, Depth + 1))
2728 return true;
2729 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2730 // TODO - bigendian once we have test coverage.
2731 unsigned Scale = NumSrcEltBits / BitWidth;
2732 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2733 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2734 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2735 for (unsigned i = 0; i != NumElts; ++i)
2736 if (DemandedElts[i]) {
2737 unsigned Offset = (i % Scale) * BitWidth;
2738 DemandedSrcBits.insertBits(DemandedBits, Offset);
2739 DemandedSrcElts.setBit(i / Scale);
2740 }
2741
2742 if (SrcVT.isVector()) {
2743 APInt KnownSrcUndef, KnownSrcZero;
2744 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2745 KnownSrcZero, TLO, Depth + 1))
2746 return true;
2747 }
2748
2749 KnownBits KnownSrcBits;
2750 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2751 KnownSrcBits, TLO, Depth + 1))
2752 return true;
2753
2754 // Attempt to avoid multi-use ops if we don't need anything from them.
2755 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2756 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2757 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2758 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2759 return TLO.CombineTo(Op, NewOp);
2760 }
2761 }
2762 }
2763
2764 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2765 // recursive call where Known may be useful to the caller.
2766 if (Depth > 0) {
2767 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2768 return false;
2769 }
2770 break;
2771 }
2772 case ISD::MUL:
2773 if (DemandedBits.isPowerOf2()) {
2774 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2775 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2776 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2777 unsigned CTZ = DemandedBits.countr_zero();
2778 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2779 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2780 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2781 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2782 return TLO.CombineTo(Op, Shl);
2783 }
2784 }
2785 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2786 // X * X is odd iff X is odd.
2787 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2788 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2789 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2790 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2791 return TLO.CombineTo(Op, And1);
2792 }
2793 [[fallthrough]];
2794 case ISD::ADD:
2795 case ISD::SUB: {
2796 // Add, Sub, and Mul don't demand any bits in positions beyond that
2797 // of the highest bit demanded of them.
2798 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2799 SDNodeFlags Flags = Op.getNode()->getFlags();
2800 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2801 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2802 KnownBits KnownOp0, KnownOp1;
2803 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2804 const KnownBits &KnownRHS) {
2805 if (Op.getOpcode() == ISD::MUL)
2806 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2807 return Demanded;
2808 };
2809 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2810 Depth + 1) ||
2811 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2812 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2813 // See if the operation should be performed at a smaller bit width.
2814 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2815 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2816 // Disable the nsw and nuw flags. We can no longer guarantee that we
2817 // won't wrap after simplification.
2818 Flags.setNoSignedWrap(false);
2819 Flags.setNoUnsignedWrap(false);
2820 Op->setFlags(Flags);
2821 }
2822 return true;
2823 }
2824
2825 // neg x with only low bit demanded is simply x.
2826 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2827 isNullConstant(Op0))
2828 return TLO.CombineTo(Op, Op1);
2829
2830 // Attempt to avoid multi-use ops if we don't need anything from them.
2831 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2832 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2833 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2834 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2835 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2836 if (DemandedOp0 || DemandedOp1) {
2837 Flags.setNoSignedWrap(false);
2838 Flags.setNoUnsignedWrap(false);
2839 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2840 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2841 SDValue NewOp =
2842 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2843 return TLO.CombineTo(Op, NewOp);
2844 }
2845 }
2846
2847 // If we have a constant operand, we may be able to turn it into -1 if we
2848 // do not demand the high bits. This can make the constant smaller to
2849 // encode, allow more general folding, or match specialized instruction
2850 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2851 // is probably not useful (and could be detrimental).
2853 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2854 if (C && !C->isAllOnes() && !C->isOne() &&
2855 (C->getAPIntValue() | HighMask).isAllOnes()) {
2856 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2857 // Disable the nsw and nuw flags. We can no longer guarantee that we
2858 // won't wrap after simplification.
2859 Flags.setNoSignedWrap(false);
2860 Flags.setNoUnsignedWrap(false);
2861 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2862 return TLO.CombineTo(Op, NewOp);
2863 }
2864
2865 // Match a multiply with a disguised negated-power-of-2 and convert to a
2866 // an equivalent shift-left amount.
2867 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2868 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2869 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2870 return 0;
2871
2872 // Don't touch opaque constants. Also, ignore zero and power-of-2
2873 // multiplies. Those will get folded later.
2874 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2875 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2876 !MulC->getAPIntValue().isPowerOf2()) {
2877 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2878 if (UnmaskedC.isNegatedPowerOf2())
2879 return (-UnmaskedC).logBase2();
2880 }
2881 return 0;
2882 };
2883
2884 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2885 unsigned ShlAmt) {
2886 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2887 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2888 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2889 return TLO.CombineTo(Op, Res);
2890 };
2891
2893 if (Op.getOpcode() == ISD::ADD) {
2894 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2895 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2896 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2897 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2898 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2899 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2900 }
2901 if (Op.getOpcode() == ISD::SUB) {
2902 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2903 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2904 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2905 }
2906 }
2907
2908 if (Op.getOpcode() == ISD::MUL) {
2909 Known = KnownBits::mul(KnownOp0, KnownOp1);
2910 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2912 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2913 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2914 }
2915 break;
2916 }
2917 default:
2918 // We also ask the target about intrinsics (which could be specific to it).
2919 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2920 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2921 // TODO: Probably okay to remove after audit; here to reduce change size
2922 // in initial enablement patch for scalable vectors
2923 if (Op.getValueType().isScalableVector())
2924 break;
2925 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2926 Known, TLO, Depth))
2927 return true;
2928 break;
2929 }
2930
2931 // Just use computeKnownBits to compute output bits.
2932 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2933 break;
2934 }
2935
2936 // If we know the value of all of the demanded bits, return this as a
2937 // constant.
2938 if (!isTargetCanonicalConstantNode(Op) &&
2939 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2940 // Avoid folding to a constant if any OpaqueConstant is involved.
2941 const SDNode *N = Op.getNode();
2942 for (SDNode *Op :
2944 if (auto *C = dyn_cast<ConstantSDNode>(Op))
2945 if (C->isOpaque())
2946 return false;
2947 }
2948 if (VT.isInteger())
2949 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2950 if (VT.isFloatingPoint())
2951 return TLO.CombineTo(
2952 Op,
2953 TLO.DAG.getConstantFP(
2954 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2955 }
2956
2957 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2958 // Try again just for the original demanded elts.
2959 // Ensure we do this AFTER constant folding above.
2960 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2961 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2962
2963 return false;
2964}
2965
2967 const APInt &DemandedElts,
2968 DAGCombinerInfo &DCI) const {
2969 SelectionDAG &DAG = DCI.DAG;
2970 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2971 !DCI.isBeforeLegalizeOps());
2972
2973 APInt KnownUndef, KnownZero;
2974 bool Simplified =
2975 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2976 if (Simplified) {
2977 DCI.AddToWorklist(Op.getNode());
2978 DCI.CommitTargetLoweringOpt(TLO);
2979 }
2980
2981 return Simplified;
2982}
2983
2984/// Given a vector binary operation and known undefined elements for each input
2985/// operand, compute whether each element of the output is undefined.
2987 const APInt &UndefOp0,
2988 const APInt &UndefOp1) {
2989 EVT VT = BO.getValueType();
2991 "Vector binop only");
2992
2993 EVT EltVT = VT.getVectorElementType();
2994 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2995 assert(UndefOp0.getBitWidth() == NumElts &&
2996 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2997
2998 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2999 const APInt &UndefVals) {
3000 if (UndefVals[Index])
3001 return DAG.getUNDEF(EltVT);
3002
3003 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3004 // Try hard to make sure that the getNode() call is not creating temporary
3005 // nodes. Ignore opaque integers because they do not constant fold.
3006 SDValue Elt = BV->getOperand(Index);
3007 auto *C = dyn_cast<ConstantSDNode>(Elt);
3008 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3009 return Elt;
3010 }
3011
3012 return SDValue();
3013 };
3014
3015 APInt KnownUndef = APInt::getZero(NumElts);
3016 for (unsigned i = 0; i != NumElts; ++i) {
3017 // If both inputs for this element are either constant or undef and match
3018 // the element type, compute the constant/undef result for this element of
3019 // the vector.
3020 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3021 // not handle FP constants. The code within getNode() should be refactored
3022 // to avoid the danger of creating a bogus temporary node here.
3023 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3024 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3025 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3026 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3027 KnownUndef.setBit(i);
3028 }
3029 return KnownUndef;
3030}
3031
3033 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3034 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3035 bool AssumeSingleUse) const {
3036 EVT VT = Op.getValueType();
3037 unsigned Opcode = Op.getOpcode();
3038 APInt DemandedElts = OriginalDemandedElts;
3039 unsigned NumElts = DemandedElts.getBitWidth();
3040 assert(VT.isVector() && "Expected vector op");
3041
3042 KnownUndef = KnownZero = APInt::getZero(NumElts);
3043
3044 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3045 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3046 return false;
3047
3048 // TODO: For now we assume we know nothing about scalable vectors.
3049 if (VT.isScalableVector())
3050 return false;
3051
3052 assert(VT.getVectorNumElements() == NumElts &&
3053 "Mask size mismatches value type element count!");
3054
3055 // Undef operand.
3056 if (Op.isUndef()) {
3057 KnownUndef.setAllBits();
3058 return false;
3059 }
3060
3061 // If Op has other users, assume that all elements are needed.
3062 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3063 DemandedElts.setAllBits();
3064
3065 // Not demanding any elements from Op.
3066 if (DemandedElts == 0) {
3067 KnownUndef.setAllBits();
3068 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3069 }
3070
3071 // Limit search depth.
3073 return false;
3074
3075 SDLoc DL(Op);
3076 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3077 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3078
3079 // Helper for demanding the specified elements and all the bits of both binary
3080 // operands.
3081 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3082 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3083 TLO.DAG, Depth + 1);
3084 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3085 TLO.DAG, Depth + 1);
3086 if (NewOp0 || NewOp1) {
3087 SDValue NewOp =
3088 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3089 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3090 return TLO.CombineTo(Op, NewOp);
3091 }
3092 return false;
3093 };
3094
3095 switch (Opcode) {
3096 case ISD::SCALAR_TO_VECTOR: {
3097 if (!DemandedElts[0]) {
3098 KnownUndef.setAllBits();
3099 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3100 }
3101 SDValue ScalarSrc = Op.getOperand(0);
3102 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3103 SDValue Src = ScalarSrc.getOperand(0);
3104 SDValue Idx = ScalarSrc.getOperand(1);
3105 EVT SrcVT = Src.getValueType();
3106
3107 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3108
3109 if (SrcEltCnt.isScalable())
3110 return false;
3111
3112 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3113 if (isNullConstant(Idx)) {
3114 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3115 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3116 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3117 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3118 TLO, Depth + 1))
3119 return true;
3120 }
3121 }
3122 KnownUndef.setHighBits(NumElts - 1);
3123 break;
3124 }
3125 case ISD::BITCAST: {
3126 SDValue Src = Op.getOperand(0);
3127 EVT SrcVT = Src.getValueType();
3128
3129 // We only handle vectors here.
3130 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3131 if (!SrcVT.isVector())
3132 break;
3133
3134 // Fast handling of 'identity' bitcasts.
3135 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3136 if (NumSrcElts == NumElts)
3137 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3138 KnownZero, TLO, Depth + 1);
3139
3140 APInt SrcDemandedElts, SrcZero, SrcUndef;
3141
3142 // Bitcast from 'large element' src vector to 'small element' vector, we
3143 // must demand a source element if any DemandedElt maps to it.
3144 if ((NumElts % NumSrcElts) == 0) {
3145 unsigned Scale = NumElts / NumSrcElts;
3146 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3147 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3148 TLO, Depth + 1))
3149 return true;
3150
3151 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3152 // of the large element.
3153 // TODO - bigendian once we have test coverage.
3154 if (IsLE) {
3155 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3156 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3157 for (unsigned i = 0; i != NumElts; ++i)
3158 if (DemandedElts[i]) {
3159 unsigned Ofs = (i % Scale) * EltSizeInBits;
3160 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3161 }
3162
3163 KnownBits Known;
3164 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3165 TLO, Depth + 1))
3166 return true;
3167
3168 // The bitcast has split each wide element into a number of
3169 // narrow subelements. We have just computed the Known bits
3170 // for wide elements. See if element splitting results in
3171 // some subelements being zero. Only for demanded elements!
3172 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3173 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3174 .isAllOnes())
3175 continue;
3176 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3177 unsigned Elt = Scale * SrcElt + SubElt;
3178 if (DemandedElts[Elt])
3179 KnownZero.setBit(Elt);
3180 }
3181 }
3182 }
3183
3184 // If the src element is zero/undef then all the output elements will be -
3185 // only demanded elements are guaranteed to be correct.
3186 for (unsigned i = 0; i != NumSrcElts; ++i) {
3187 if (SrcDemandedElts[i]) {
3188 if (SrcZero[i])
3189 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3190 if (SrcUndef[i])
3191 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3192 }
3193 }
3194 }
3195
3196 // Bitcast from 'small element' src vector to 'large element' vector, we
3197 // demand all smaller source elements covered by the larger demanded element
3198 // of this vector.
3199 if ((NumSrcElts % NumElts) == 0) {
3200 unsigned Scale = NumSrcElts / NumElts;
3201 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3202 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3203 TLO, Depth + 1))
3204 return true;
3205
3206 // If all the src elements covering an output element are zero/undef, then
3207 // the output element will be as well, assuming it was demanded.
3208 for (unsigned i = 0; i != NumElts; ++i) {
3209 if (DemandedElts[i]) {
3210 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3211 KnownZero.setBit(i);
3212 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3213 KnownUndef.setBit(i);
3214 }
3215 }
3216 }
3217 break;
3218 }
3219 case ISD::FREEZE: {
3220 SDValue N0 = Op.getOperand(0);
3221 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3222 /*PoisonOnly=*/false))
3223 return TLO.CombineTo(Op, N0);
3224
3225 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3226 // freeze(op(x, ...)) -> op(freeze(x), ...).
3227 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3228 return TLO.CombineTo(
3230 TLO.DAG.getFreeze(N0.getOperand(0))));
3231 break;
3232 }
3233 case ISD::BUILD_VECTOR: {
3234 // Check all elements and simplify any unused elements with UNDEF.
3235 if (!DemandedElts.isAllOnes()) {
3236 // Don't simplify BROADCASTS.
3237 if (llvm::any_of(Op->op_values(),
3238 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3239 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3240 bool Updated = false;
3241 for (unsigned i = 0; i != NumElts; ++i) {
3242 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3243 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3244 KnownUndef.setBit(i);
3245 Updated = true;
3246 }
3247 }
3248 if (Updated)
3249 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3250 }
3251 }
3252 for (unsigned i = 0; i != NumElts; ++i) {
3253 SDValue SrcOp = Op.getOperand(i);
3254 if (SrcOp.isUndef()) {
3255 KnownUndef.setBit(i);
3256 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3258 KnownZero.setBit(i);
3259 }
3260 }
3261 break;
3262 }
3263 case ISD::CONCAT_VECTORS: {
3264 EVT SubVT = Op.getOperand(0).getValueType();
3265 unsigned NumSubVecs = Op.getNumOperands();
3266 unsigned NumSubElts = SubVT.getVectorNumElements();
3267 for (unsigned i = 0; i != NumSubVecs; ++i) {
3268 SDValue SubOp = Op.getOperand(i);
3269 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3270 APInt SubUndef, SubZero;
3271 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3272 Depth + 1))
3273 return true;
3274 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3275 KnownZero.insertBits(SubZero, i * NumSubElts);
3276 }
3277
3278 // Attempt to avoid multi-use ops if we don't need anything from them.
3279 if (!DemandedElts.isAllOnes()) {
3280 bool FoundNewSub = false;
3281 SmallVector<SDValue, 2> DemandedSubOps;
3282 for (unsigned i = 0; i != NumSubVecs; ++i) {
3283 SDValue SubOp = Op.getOperand(i);
3284 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3285 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3286 SubOp, SubElts, TLO.DAG, Depth + 1);
3287 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3288 FoundNewSub = NewSubOp ? true : FoundNewSub;
3289 }
3290 if (FoundNewSub) {
3291 SDValue NewOp =
3292 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3293 return TLO.CombineTo(Op, NewOp);
3294 }
3295 }
3296 break;
3297 }
3298 case ISD::INSERT_SUBVECTOR: {
3299 // Demand any elements from the subvector and the remainder from the src its
3300 // inserted into.
3301 SDValue Src = Op.getOperand(0);
3302 SDValue Sub = Op.getOperand(1);
3303 uint64_t Idx = Op.getConstantOperandVal(2);
3304 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3305 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3306 APInt DemandedSrcElts = DemandedElts;
3307 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3308
3309 APInt SubUndef, SubZero;
3310 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3311 Depth + 1))
3312 return true;
3313
3314 // If none of the src operand elements are demanded, replace it with undef.
3315 if (!DemandedSrcElts && !Src.isUndef())
3316 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3317 TLO.DAG.getUNDEF(VT), Sub,
3318 Op.getOperand(2)));
3319
3320 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3321 TLO, Depth + 1))
3322 return true;
3323 KnownUndef.insertBits(SubUndef, Idx);
3324 KnownZero.insertBits(SubZero, Idx);
3325
3326 // Attempt to avoid multi-use ops if we don't need anything from them.
3327 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3328 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3329 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3330 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3331 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3332 if (NewSrc || NewSub) {
3333 NewSrc = NewSrc ? NewSrc : Src;
3334 NewSub = NewSub ? NewSub : Sub;
3335 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3336 NewSub, Op.getOperand(2));
3337 return TLO.CombineTo(Op, NewOp);
3338 }
3339 }
3340 break;
3341 }
3343 // Offset the demanded elts by the subvector index.
3344 SDValue Src = Op.getOperand(0);
3345 if (Src.getValueType().isScalableVector())
3346 break;
3347 uint64_t Idx = Op.getConstantOperandVal(1);
3348 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3349 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3350
3351 APInt SrcUndef, SrcZero;
3352 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3353 Depth + 1))
3354 return true;
3355 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3356 KnownZero = SrcZero.extractBits(NumElts, Idx);
3357
3358 // Attempt to avoid multi-use ops if we don't need anything from them.
3359 if (!DemandedElts.isAllOnes()) {
3360 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3361 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3362 if (NewSrc) {
3363 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3364 Op.getOperand(1));
3365 return TLO.CombineTo(Op, NewOp);
3366 }
3367 }
3368 break;
3369 }
3371 SDValue Vec = Op.getOperand(0);
3372 SDValue Scl = Op.getOperand(1);
3373 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3374
3375 // For a legal, constant insertion index, if we don't need this insertion
3376 // then strip it, else remove it from the demanded elts.
3377 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3378 unsigned Idx = CIdx->getZExtValue();
3379 if (!DemandedElts[Idx])
3380 return TLO.CombineTo(Op, Vec);
3381
3382 APInt DemandedVecElts(DemandedElts);
3383 DemandedVecElts.clearBit(Idx);
3384 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3385 KnownZero, TLO, Depth + 1))
3386 return true;
3387
3388 KnownUndef.setBitVal(Idx, Scl.isUndef());
3389
3390 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3391 break;
3392 }
3393
3394 APInt VecUndef, VecZero;
3395 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3396 Depth + 1))
3397 return true;
3398 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3399 break;
3400 }
3401 case ISD::VSELECT: {
3402 SDValue Sel = Op.getOperand(0);
3403 SDValue LHS = Op.getOperand(1);
3404 SDValue RHS = Op.getOperand(2);
3405
3406 // Try to transform the select condition based on the current demanded
3407 // elements.
3408 APInt UndefSel, ZeroSel;
3409 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3410 Depth + 1))
3411 return true;
3412
3413 // See if we can simplify either vselect operand.
3414 APInt DemandedLHS(DemandedElts);
3415 APInt DemandedRHS(DemandedElts);
3416 APInt UndefLHS, ZeroLHS;
3417 APInt UndefRHS, ZeroRHS;
3418 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3419 Depth + 1))
3420 return true;
3421 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3422 Depth + 1))
3423 return true;
3424
3425 KnownUndef = UndefLHS & UndefRHS;
3426 KnownZero = ZeroLHS & ZeroRHS;
3427
3428 // If we know that the selected element is always zero, we don't need the
3429 // select value element.
3430 APInt DemandedSel = DemandedElts & ~KnownZero;
3431 if (DemandedSel != DemandedElts)
3432 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3433 Depth + 1))
3434 return true;
3435
3436 break;
3437 }
3438 case ISD::VECTOR_SHUFFLE: {
3439 SDValue LHS = Op.getOperand(0);
3440 SDValue RHS = Op.getOperand(1);
3441 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3442
3443 // Collect demanded elements from shuffle operands..
3444 APInt DemandedLHS(NumElts, 0);
3445 APInt DemandedRHS(NumElts, 0);
3446 for (unsigned i = 0; i != NumElts; ++i) {
3447 int M = ShuffleMask[i];
3448 if (M < 0 || !DemandedElts[i])
3449 continue;
3450 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3451 if (M < (int)NumElts)
3452 DemandedLHS.setBit(M);
3453 else
3454 DemandedRHS.setBit(M - NumElts);
3455 }
3456
3457 // See if we can simplify either shuffle operand.
3458 APInt UndefLHS, ZeroLHS;
3459 APInt UndefRHS, ZeroRHS;
3460 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3461 Depth + 1))
3462 return true;
3463 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3464 Depth + 1))
3465 return true;
3466
3467 // Simplify mask using undef elements from LHS/RHS.
3468 bool Updated = false;
3469 bool IdentityLHS = true, IdentityRHS = true;
3470 SmallVector<int, 32> NewMask(ShuffleMask);
3471 for (unsigned i = 0; i != NumElts; ++i) {
3472 int &M = NewMask[i];
3473 if (M < 0)
3474 continue;
3475 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3476 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3477 Updated = true;
3478 M = -1;
3479 }
3480 IdentityLHS &= (M < 0) || (M == (int)i);
3481 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3482 }
3483
3484 // Update legal shuffle masks based on demanded elements if it won't reduce
3485 // to Identity which can cause premature removal of the shuffle mask.
3486 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3487 SDValue LegalShuffle =
3488 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3489 if (LegalShuffle)
3490 return TLO.CombineTo(Op, LegalShuffle);
3491 }
3492
3493 // Propagate undef/zero elements from LHS/RHS.
3494 for (unsigned i = 0; i != NumElts; ++i) {
3495 int M = ShuffleMask[i];
3496 if (M < 0) {
3497 KnownUndef.setBit(i);
3498 } else if (M < (int)NumElts) {
3499 if (UndefLHS[M])
3500 KnownUndef.setBit(i);
3501 if (ZeroLHS[M])
3502 KnownZero.setBit(i);
3503 } else {
3504 if (UndefRHS[M - NumElts])
3505 KnownUndef.setBit(i);
3506 if (ZeroRHS[M - NumElts])
3507 KnownZero.setBit(i);
3508 }
3509 }
3510 break;
3511 }
3515 APInt SrcUndef, SrcZero;
3516 SDValue Src = Op.getOperand(0);
3517 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3518 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3519 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3520 Depth + 1))
3521 return true;
3522 KnownZero = SrcZero.zextOrTrunc(NumElts);
3523 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3524
3525 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3526 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3527 DemandedSrcElts == 1) {
3528 // aext - if we just need the bottom element then we can bitcast.
3529 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3530 }
3531
3532 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3533 // zext(undef) upper bits are guaranteed to be zero.
3534 if (DemandedElts.isSubsetOf(KnownUndef))
3535 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3536 KnownUndef.clearAllBits();
3537
3538 // zext - if we just need the bottom element then we can mask:
3539 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3540 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3541 Op->isOnlyUserOf(Src.getNode()) &&
3542 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3543 SDLoc DL(Op);
3544 EVT SrcVT = Src.getValueType();
3545 EVT SrcSVT = SrcVT.getScalarType();
3546 SmallVector<SDValue> MaskElts;
3547 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3548 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3549 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3550 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3551 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3552 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3553 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3554 }
3555 }
3556 }
3557 break;
3558 }
3559
3560 // TODO: There are more binop opcodes that could be handled here - MIN,
3561 // MAX, saturated math, etc.
3562 case ISD::ADD: {
3563 SDValue Op0 = Op.getOperand(0);
3564 SDValue Op1 = Op.getOperand(1);
3565 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3566 APInt UndefLHS, ZeroLHS;
3567 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3568 Depth + 1, /*AssumeSingleUse*/ true))
3569 return true;
3570 }
3571 [[fallthrough]];
3572 }
3573 case ISD::AVGCEILS:
3574 case ISD::AVGCEILU:
3575 case ISD::AVGFLOORS:
3576 case ISD::AVGFLOORU:
3577 case ISD::OR:
3578 case ISD::XOR:
3579 case ISD::SUB:
3580 case ISD::FADD:
3581 case ISD::FSUB:
3582 case ISD::FMUL:
3583 case ISD::FDIV:
3584 case ISD::FREM: {
3585 SDValue Op0 = Op.getOperand(0);
3586 SDValue Op1 = Op.getOperand(1);
3587
3588 APInt UndefRHS, ZeroRHS;
3589 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3590 Depth + 1))
3591 return true;
3592 APInt UndefLHS, ZeroLHS;
3593 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3594 Depth + 1))
3595 return true;
3596
3597 KnownZero = ZeroLHS & ZeroRHS;
3598 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3599
3600 // Attempt to avoid multi-use ops if we don't need anything from them.
3601 // TODO - use KnownUndef to relax the demandedelts?
3602 if (!DemandedElts.isAllOnes())
3603 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3604 return true;
3605 break;
3606 }
3607 case ISD::SHL:
3608 case ISD::SRL:
3609 case ISD::SRA:
3610 case ISD::ROTL:
3611 case ISD::ROTR: {
3612 SDValue Op0 = Op.getOperand(0);
3613 SDValue Op1 = Op.getOperand(1);
3614
3615 APInt UndefRHS, ZeroRHS;
3616 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3617 Depth + 1))
3618 return true;
3619 APInt UndefLHS, ZeroLHS;
3620 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3621 Depth + 1))
3622 return true;
3623
3624 KnownZero = ZeroLHS;
3625 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3626
3627 // Attempt to avoid multi-use ops if we don't need anything from them.
3628 // TODO - use KnownUndef to relax the demandedelts?
3629 if (!DemandedElts.isAllOnes())
3630 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3631 return true;
3632 break;
3633 }
3634 case ISD::MUL:
3635 case ISD::MULHU:
3636 case ISD::MULHS:
3637 case ISD::AND: {
3638 SDValue Op0 = Op.getOperand(0);
3639 SDValue Op1 = Op.getOperand(1);
3640
3641 APInt SrcUndef, SrcZero;
3642 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3643 Depth + 1))
3644 return true;
3645 // If we know that a demanded element was zero in Op1 we don't need to
3646 // demand it in Op0 - its guaranteed to be zero.
3647 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3648 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3649 TLO, Depth + 1))
3650 return true;
3651
3652 KnownUndef &= DemandedElts0;
3653 KnownZero &= DemandedElts0;
3654
3655 // If every element pair has a zero/undef then just fold to zero.
3656 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3657 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3658 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3659 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3660
3661 // If either side has a zero element, then the result element is zero, even
3662 // if the other is an UNDEF.
3663 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3664 // and then handle 'and' nodes with the rest of the binop opcodes.
3665 KnownZero |= SrcZero;
3666 KnownUndef &= SrcUndef;
3667 KnownUndef &= ~KnownZero;
3668
3669 // Attempt to avoid multi-use ops if we don't need anything from them.
3670 if (!DemandedElts.isAllOnes())
3671 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3672 return true;
3673 break;
3674 }
3675 case ISD::TRUNCATE:
3676 case ISD::SIGN_EXTEND:
3677 case ISD::ZERO_EXTEND:
3678 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3679 KnownZero, TLO, Depth + 1))
3680 return true;
3681
3682 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3683 // zext(undef) upper bits are guaranteed to be zero.
3684 if (DemandedElts.isSubsetOf(KnownUndef))
3685 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3686 KnownUndef.clearAllBits();
3687 }
3688 break;
3689 default: {
3690 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3691 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3692 KnownZero, TLO, Depth))
3693 return true;
3694 } else {
3695 KnownBits Known;
3696 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3697 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3698 TLO, Depth, AssumeSingleUse))
3699 return true;
3700 }
3701 break;
3702 }
3703 }
3704 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3705
3706 // Constant fold all undef cases.
3707 // TODO: Handle zero cases as well.
3708 if (DemandedElts.isSubsetOf(KnownUndef))
3709 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3710
3711 return false;
3712}
3713
3714/// Determine which of the bits specified in Mask are known to be either zero or
3715/// one and return them in the Known.
3717 KnownBits &Known,
3718 const APInt &DemandedElts,
3719 const SelectionDAG &DAG,
3720 unsigned Depth) const {
3721 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3722 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3723 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3724 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3725 "Should use MaskedValueIsZero if you don't know whether Op"
3726 " is a target node!");
3727 Known.resetAll();
3728}
3729
3732 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3733 unsigned Depth) const {
3734 Known.resetAll();
3735}
3736
3738 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3739 // The low bits are known zero if the pointer is aligned.
3740 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3741}
3742
3745 unsigned Depth) const {
3746 return Align(1);
3747}
3748
3749/// This method can be implemented by targets that want to expose additional
3750/// information about sign bits to the DAG Combiner.
3752 const APInt &,
3753 const SelectionDAG &,
3754 unsigned Depth) const {
3755 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3756 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3758 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3759 "Should use ComputeNumSignBits if you don't know whether Op"
3760 " is a target node!");
3761 return 1;
3762}
3763
3765 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3766 const MachineRegisterInfo &MRI, unsigned Depth) const {
3767 return 1;
3768}
3769
3771 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3772 TargetLoweringOpt &TLO, unsigned Depth) const {
3773 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3774 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3775 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3776 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3777 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3778 " is a target node!");
3779 return false;
3780}
3781
3783 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3784 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3785 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3786 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3787 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3788 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3789 "Should use SimplifyDemandedBits if you don't know whether Op"
3790 " is a target node!");
3791 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3792 return false;
3793}
3794
3796 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3797 SelectionDAG &DAG, unsigned Depth) const {
3798 assert(
3799 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3800 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3801 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3802 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3803 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3804 " is a target node!");
3805 return SDValue();
3806}
3807
3808SDValue
3811 SelectionDAG &DAG) const {
3812 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3813 if (!LegalMask) {
3814 std::swap(N0, N1);
3816 LegalMask = isShuffleMaskLegal(Mask, VT);
3817 }
3818
3819 if (!LegalMask)
3820 return SDValue();
3821
3822 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3823}
3824
3826 return nullptr;
3827}
3828
3830 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3831 bool PoisonOnly, unsigned Depth) const {
3832 assert(
3833 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3834 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3835 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3836 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3837 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3838 " is a target node!");
3839
3840 // If Op can't create undef/poison and none of its operands are undef/poison
3841 // then Op is never undef/poison.
3842 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3843 /*ConsiderFlags*/ true, Depth) &&
3844 all_of(Op->ops(), [&](SDValue V) {
3845 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3846 Depth + 1);
3847 });
3848}
3849
3851 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3852 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3853 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3854 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3855 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3856 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3857 "Should use canCreateUndefOrPoison if you don't know whether Op"
3858 " is a target node!");
3859 // Be conservative and return true.
3860 return true;
3861}
3862
3864 const SelectionDAG &DAG,
3865 bool SNaN,
3866 unsigned Depth) const {
3867 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3868 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3869 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3870 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3871 "Should use isKnownNeverNaN if you don't know whether Op"
3872 " is a target node!");
3873 return false;
3874}
3875
3877 const APInt &DemandedElts,
3878 APInt &UndefElts,
3879 const SelectionDAG &DAG,
3880 unsigned Depth) const {
3881 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3882 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3883 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3884 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3885 "Should use isSplatValue if you don't know whether Op"
3886 " is a target node!");
3887 return false;
3888}
3889
3890// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3891// work with truncating build vectors and vectors with elements of less than
3892// 8 bits.
3894 if (!N)
3895 return false;
3896
3897 unsigned EltWidth;
3898 APInt CVal;
3899 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3900 /*AllowTruncation=*/true)) {
3901 CVal = CN->getAPIntValue();
3902 EltWidth = N.getValueType().getScalarSizeInBits();
3903 } else
3904 return false;
3905
3906 // If this is a truncating splat, truncate the splat value.
3907 // Otherwise, we may fail to match the expected values below.
3908 if (EltWidth < CVal.getBitWidth())
3909 CVal = CVal.trunc(EltWidth);
3910
3911 switch (getBooleanContents(N.getValueType())) {
3913 return CVal[0];
3915 return CVal.isOne();
3917 return CVal.isAllOnes();
3918 }
3919
3920 llvm_unreachable("Invalid boolean contents");
3921}
3922
3924 if (!N)
3925 return false;
3926
3927 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3928 if (!CN) {
3929 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3930 if (!BV)
3931 return false;
3932
3933 // Only interested in constant splats, we don't care about undef
3934 // elements in identifying boolean constants and getConstantSplatNode
3935 // returns NULL if all ops are undef;
3936 CN = BV->getConstantSplatNode();
3937 if (!CN)
3938 return false;
3939 }
3940
3941 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3942 return !CN->getAPIntValue()[0];
3943
3944 return CN->isZero();
3945}
3946
3948 bool SExt) const {
3949 if (VT == MVT::i1)
3950 return N->isOne();
3951
3953 switch (Cnt) {
3955 // An extended value of 1 is always true, unless its original type is i1,
3956 // in which case it will be sign extended to -1.
3957 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3960 return N->isAllOnes() && SExt;
3961 }
3962 llvm_unreachable("Unexpected enumeration.");
3963}
3964
3965/// This helper function of SimplifySetCC tries to optimize the comparison when
3966/// either operand of the SetCC node is a bitwise-and instruction.
3967SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3968 ISD::CondCode Cond, const SDLoc &DL,
3969 DAGCombinerInfo &DCI) const {
3970 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3971 std::swap(N0, N1);
3972
3973 SelectionDAG &DAG = DCI.DAG;
3974 EVT OpVT = N0.getValueType();
3975 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3976 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3977 return SDValue();
3978
3979 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3980 // iff everything but LSB is known zero:
3981 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3984 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3985 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3986 if (DAG.MaskedValueIsZero(N0, UpperBits))
3987 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3988 }
3989
3990 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3991 // test in a narrow type that we can truncate to with no cost. Examples:
3992 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3993 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3994 // TODO: This conservatively checks for type legality on the source and
3995 // destination types. That may inhibit optimizations, but it also
3996 // allows setcc->shift transforms that may be more beneficial.
3997 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3998 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3999 isTypeLegal(OpVT) && N0.hasOneUse()) {
4000 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4001 AndC->getAPIntValue().getActiveBits());
4002 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4003 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4004 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4005 return DAG.getSetCC(DL, VT, Trunc, Zero,
4007 }
4008 }
4009
4010 // Match these patterns in any of their permutations:
4011 // (X & Y) == Y
4012 // (X & Y) != Y
4013 SDValue X, Y;
4014 if (N0.getOperand(0) == N1) {
4015 X = N0.getOperand(1);
4016 Y = N0.getOperand(0);
4017 } else if (N0.getOperand(1) == N1) {
4018 X = N0.getOperand(0);
4019 Y = N0.getOperand(1);
4020 } else {
4021 return SDValue();
4022 }
4023
4024 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4025 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4026 // its liable to create and infinite loop.
4027 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4028 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4030 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4031 // Note that where Y is variable and is known to have at most one bit set
4032 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4033 // equivalent when Y == 0.
4034 assert(OpVT.isInteger());
4036 if (DCI.isBeforeLegalizeOps() ||
4038 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4039 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4040 // If the target supports an 'and-not' or 'and-complement' logic operation,
4041 // try to use that to make a comparison operation more efficient.
4042 // But don't do this transform if the mask is a single bit because there are
4043 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4044 // 'rlwinm' on PPC).
4045
4046 // Bail out if the compare operand that we want to turn into a zero is
4047 // already a zero (otherwise, infinite loop).
4048 if (isNullConstant(Y))
4049 return SDValue();
4050
4051 // Transform this into: ~X & Y == 0.
4052 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4053 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4054 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4055 }
4056
4057 return SDValue();
4058}
4059
4060/// There are multiple IR patterns that could be checking whether certain
4061/// truncation of a signed number would be lossy or not. The pattern which is
4062/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4063/// We are looking for the following pattern: (KeptBits is a constant)
4064/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4065/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4066/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4067/// We will unfold it into the natural trunc+sext pattern:
4068/// ((%x << C) a>> C) dstcond %x
4069/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4070SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4071 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4072 const SDLoc &DL) const {
4073 // We must be comparing with a constant.
4074 ConstantSDNode *C1;
4075 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4076 return SDValue();
4077
4078 // N0 should be: add %x, (1 << (KeptBits-1))
4079 if (N0->getOpcode() != ISD::ADD)
4080 return SDValue();
4081
4082 // And we must be 'add'ing a constant.
4083 ConstantSDNode *C01;
4084 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4085 return SDValue();
4086
4087 SDValue X = N0->getOperand(0);
4088 EVT XVT = X.getValueType();
4089
4090 // Validate constants ...
4091
4092 APInt I1 = C1->getAPIntValue();
4093
4094 ISD::CondCode NewCond;
4095 if (Cond == ISD::CondCode::SETULT) {
4096 NewCond = ISD::CondCode::SETEQ;
4097 } else if (Cond == ISD::CondCode::SETULE) {
4098 NewCond = ISD::CondCode::SETEQ;
4099 // But need to 'canonicalize' the constant.
4100 I1 += 1;
4101 } else if (Cond == ISD::CondCode::SETUGT) {
4102 NewCond = ISD::CondCode::SETNE;
4103 // But need to 'canonicalize' the constant.
4104 I1 += 1;
4105 } else if (Cond == ISD::CondCode::SETUGE) {
4106 NewCond = ISD::CondCode::SETNE;
4107 } else
4108 return SDValue();
4109
4110 APInt I01 = C01->getAPIntValue();
4111
4112 auto checkConstants = [&I1, &I01]() -> bool {
4113 // Both of them must be power-of-two, and the constant from setcc is bigger.
4114 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4115 };
4116
4117 if (checkConstants()) {
4118 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4119 } else {
4120 // What if we invert constants? (and the target predicate)
4121 I1.negate();
4122 I01.negate();
4123 assert(XVT.isInteger());
4124 NewCond = getSetCCInverse(NewCond, XVT);
4125 if (!checkConstants())
4126 return SDValue();
4127 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4128 }
4129
4130 // They are power-of-two, so which bit is set?
4131 const unsigned KeptBits = I1.logBase2();
4132 const unsigned KeptBitsMinusOne = I01.logBase2();
4133
4134 // Magic!
4135 if (KeptBits != (KeptBitsMinusOne + 1))
4136 return SDValue();
4137 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4138
4139 // We don't want to do this in every single case.
4140 SelectionDAG &DAG = DCI.DAG;
4142 XVT, KeptBits))
4143 return SDValue();
4144
4145 // Unfold into: sext_inreg(%x) cond %x
4146 // Where 'cond' will be either 'eq' or 'ne'.
4147 SDValue SExtInReg = DAG.getNode(
4149 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4150 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4151}
4152
4153// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4154SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4155 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4156 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4158 "Should be a comparison with 0.");
4159 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4160 "Valid only for [in]equality comparisons.");
4161
4162 unsigned NewShiftOpcode;
4163 SDValue X, C, Y;
4164
4165 SelectionDAG &DAG = DCI.DAG;
4166 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4167
4168 // Look for '(C l>>/<< Y)'.
4169 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4170 // The shift should be one-use.
4171 if (!V.hasOneUse())
4172 return false;
4173 unsigned OldShiftOpcode = V.getOpcode();
4174 switch (OldShiftOpcode) {
4175 case ISD::SHL:
4176 NewShiftOpcode = ISD::SRL;
4177 break;
4178 case ISD::SRL:
4179 NewShiftOpcode = ISD::SHL;
4180 break;
4181 default:
4182 return false; // must be a logical shift.
4183 }
4184 // We should be shifting a constant.
4185 // FIXME: best to use isConstantOrConstantVector().
4186 C = V.getOperand(0);
4188 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4189 if (!CC)
4190 return false;
4191 Y = V.getOperand(1);
4192
4194 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4195 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4196 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4197 };
4198
4199 // LHS of comparison should be an one-use 'and'.
4200 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4201 return SDValue();
4202
4203 X = N0.getOperand(0);
4204 SDValue Mask = N0.getOperand(1);
4205
4206 // 'and' is commutative!
4207 if (!Match(Mask)) {
4208 std::swap(X, Mask);
4209 if (!Match(Mask))
4210 return SDValue();
4211 }
4212
4213 EVT VT = X.getValueType();
4214
4215 // Produce:
4216 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4217 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4218 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4219 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4220 return T2;
4221}
4222
4223/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4224/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4225/// handle the commuted versions of these patterns.
4226SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4227 ISD::CondCode Cond, const SDLoc &DL,
4228 DAGCombinerInfo &DCI) const {
4229 unsigned BOpcode = N0.getOpcode();
4230 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4231 "Unexpected binop");
4232 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4233
4234 // (X + Y) == X --> Y == 0
4235 // (X - Y) == X --> Y == 0
4236 // (X ^ Y) == X --> Y == 0
4237 SelectionDAG &DAG = DCI.DAG;
4238 EVT OpVT = N0.getValueType();
4239 SDValue X = N0.getOperand(0);
4240 SDValue Y = N0.getOperand(1);
4241 if (X == N1)
4242 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4243
4244 if (Y != N1)
4245 return SDValue();
4246
4247 // (X + Y) == Y --> X == 0
4248 // (X ^ Y) == Y --> X == 0
4249 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4250 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4251
4252 // The shift would not be valid if the operands are boolean (i1).
4253 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4254 return SDValue();
4255
4256 // (X - Y) == Y --> X == Y << 1
4257 SDValue One =
4258 DAG.getShiftAmountConstant(1, OpVT, DL, !DCI.isBeforeLegalize());
4259 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4260 if (!DCI.isCalledByLegalizer())
4261 DCI.AddToWorklist(YShl1.getNode());
4262 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4263}
4264
4266 SDValue N0, const APInt &C1,
4267 ISD::CondCode Cond, const SDLoc &dl,
4268 SelectionDAG &DAG) {
4269 // Look through truncs that don't change the value of a ctpop.
4270 // FIXME: Add vector support? Need to be careful with setcc result type below.
4271 SDValue CTPOP = N0;
4272 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4274 CTPOP = N0.getOperand(0);
4275
4276 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4277 return SDValue();
4278
4279 EVT CTVT = CTPOP.getValueType();
4280 SDValue CTOp = CTPOP.getOperand(0);
4281
4282 // Expand a power-of-2-or-zero comparison based on ctpop:
4283 // (ctpop x) u< 2 -> (x & x-1) == 0
4284 // (ctpop x) u> 1 -> (x & x-1) != 0
4285 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4286 // Keep the CTPOP if it is a cheap vector op.
4287 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4288 return SDValue();
4289
4290 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4291 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4292 return SDValue();
4293 if (C1 == 0 && (Cond == ISD::SETULT))
4294 return SDValue(); // This is handled elsewhere.
4295
4296 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4297
4298 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4299 SDValue Result = CTOp;
4300 for (unsigned i = 0; i < Passes; i++) {
4301 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4302 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4303 }
4305 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4306 }
4307
4308 // Expand a power-of-2 comparison based on ctpop
4309 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4310 // Keep the CTPOP if it is cheap.
4311 if (TLI.isCtpopFast(CTVT))
4312 return SDValue();
4313
4314 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4315 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4316 assert(CTVT.isInteger());
4317 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4318
4319 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4320 // check before emitting a potentially unnecessary op.
4321 if (DAG.isKnownNeverZero(CTOp)) {
4322 // (ctpop x) == 1 --> (x & x-1) == 0
4323 // (ctpop x) != 1 --> (x & x-1) != 0
4324 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4325 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4326 return RHS;
4327 }
4328
4329 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4330 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4331 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4333 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4334 }
4335
4336 return SDValue();
4337}
4338
4340 ISD::CondCode Cond, const SDLoc &dl,
4341 SelectionDAG &DAG) {
4342 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4343 return SDValue();
4344
4345 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4346 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4347 return SDValue();
4348
4349 auto getRotateSource = [](SDValue X) {
4350 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4351 return X.getOperand(0);
4352 return SDValue();
4353 };
4354
4355 // Peek through a rotated value compared against 0 or -1:
4356 // (rot X, Y) == 0/-1 --> X == 0/-1
4357 // (rot X, Y) != 0/-1 --> X != 0/-1
4358 if (SDValue R = getRotateSource(N0))
4359 return DAG.getSetCC(dl, VT, R, N1, Cond);
4360
4361 // Peek through an 'or' of a rotated value compared against 0:
4362 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4363 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4364 //
4365 // TODO: Add the 'and' with -1 sibling.
4366 // TODO: Recurse through a series of 'or' ops to find the rotate.
4367 EVT OpVT = N0.getValueType();
4368 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4369 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4370 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4371 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4372 }
4373 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4374 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4375 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4376 }
4377 }
4378
4379 return SDValue();
4380}
4381
4383 ISD::CondCode Cond, const SDLoc &dl,
4384 SelectionDAG &DAG) {
4385 // If we are testing for all-bits-clear, we might be able to do that with
4386 // less shifting since bit-order does not matter.
4387 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4388 return SDValue();
4389
4390 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4391 if (!C1 || !C1->isZero())
4392 return SDValue();
4393
4394 if (!N0.hasOneUse() ||
4395 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4396 return SDValue();
4397
4398 unsigned BitWidth = N0.getScalarValueSizeInBits();
4399 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4400 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4401 return SDValue();
4402
4403 // Canonicalize fshr as fshl to reduce pattern-matching.
4404 unsigned ShAmt = ShAmtC->getZExtValue();
4405 if (N0.getOpcode() == ISD::FSHR)
4406 ShAmt = BitWidth - ShAmt;
4407
4408 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4409 SDValue X, Y;
4410 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4411 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4412 return false;
4413 if (Or.getOperand(0) == Other) {
4414 X = Or.getOperand(0);
4415 Y = Or.getOperand(1);
4416 return true;
4417 }
4418 if (Or.getOperand(1) == Other) {
4419 X = Or.getOperand(1);
4420 Y = Or.getOperand(0);
4421 return true;
4422 }
4423 return false;
4424 };
4425
4426 EVT OpVT = N0.getValueType();
4427 EVT ShAmtVT = N0.getOperand(2).getValueType();
4428 SDValue F0 = N0.getOperand(0);
4429 SDValue F1 = N0.getOperand(1);
4430 if (matchOr(F0, F1)) {
4431 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4432 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4433 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4434 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4435 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4436 }
4437 if (matchOr(F1, F0)) {
4438 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4439 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4440 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4441 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4442 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4443 }
4444
4445 return SDValue();
4446}
4447
4448/// Try to simplify a setcc built with the specified operands and cc. If it is
4449/// unable to simplify it, return a null SDValue.
4451 ISD::CondCode Cond, bool foldBooleans,
4452 DAGCombinerInfo &DCI,
4453 const SDLoc &dl) const {
4454 SelectionDAG &DAG = DCI.DAG;
4455 const DataLayout &Layout = DAG.getDataLayout();
4456 EVT OpVT = N0.getValueType();
4458
4459 // Constant fold or commute setcc.
4460 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4461 return Fold;
4462
4463 bool N0ConstOrSplat =
4464 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4465 bool N1ConstOrSplat =
4466 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4467
4468 // Canonicalize toward having the constant on the RHS.
4469 // TODO: Handle non-splat vector constants. All undef causes trouble.
4470 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4471 // infinite loop here when we encounter one.
4473 if (N0ConstOrSplat && !N1ConstOrSplat &&
4474 (DCI.isBeforeLegalizeOps() ||
4475 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4476 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4477
4478 // If we have a subtract with the same 2 non-constant operands as this setcc
4479 // -- but in reverse order -- then try to commute the operands of this setcc
4480 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4481 // instruction on some targets.
4482 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4483 (DCI.isBeforeLegalizeOps() ||
4484 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4485 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4486 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4487 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4488
4489 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4490 return V;
4491
4492 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4493 return V;
4494
4495 if (auto *N1C = isConstOrConstSplat(N1)) {
4496 const APInt &C1 = N1C->getAPIntValue();
4497
4498 // Optimize some CTPOP cases.
4499 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4500 return V;
4501
4502 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4503 // X * Y == 0 --> (X == 0) || (Y == 0)
4504 // X * Y != 0 --> (X != 0) && (Y != 0)
4505 // TODO: This bails out if minsize is set, but if the target doesn't have a
4506 // single instruction multiply for this type, it would likely be
4507 // smaller to decompose.
4508 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4509 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4510 (N0->getFlags().hasNoUnsignedWrap() ||
4511 N0->getFlags().hasNoSignedWrap()) &&
4512 !Attr.hasFnAttr(Attribute::MinSize)) {
4513 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4514 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4515 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4516 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4517 }
4518
4519 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4520 // equality comparison, then we're just comparing whether X itself is
4521 // zero.
4522 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4523 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4524 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4525 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4526 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4527 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4528 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4529 // (srl (ctlz x), 5) == 0 -> X != 0
4530 // (srl (ctlz x), 5) != 1 -> X != 0
4531 Cond = ISD::SETNE;
4532 } else {
4533 // (srl (ctlz x), 5) != 0 -> X == 0
4534 // (srl (ctlz x), 5) == 1 -> X == 0
4535 Cond = ISD::SETEQ;
4536 }
4537 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4538 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4539 Cond);
4540 }
4541 }
4542 }
4543 }
4544
4545 // FIXME: Support vectors.
4546 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4547 const APInt &C1 = N1C->getAPIntValue();
4548
4549 // (zext x) == C --> x == (trunc C)
4550 // (sext x) == C --> x == (trunc C)
4551 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4552 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4553 unsigned MinBits = N0.getValueSizeInBits();
4554 SDValue PreExt;
4555 bool Signed = false;
4556 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4557 // ZExt
4558 MinBits = N0->getOperand(0).getValueSizeInBits();
4559 PreExt = N0->getOperand(0);
4560 } else if (N0->getOpcode() == ISD::AND) {
4561 // DAGCombine turns costly ZExts into ANDs
4562 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4563 if ((C->getAPIntValue()+1).isPowerOf2()) {
4564 MinBits = C->getAPIntValue().countr_one();
4565 PreExt = N0->getOperand(0);
4566 }
4567 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4568 // SExt
4569 MinBits = N0->getOperand(0).getValueSizeInBits();
4570 PreExt = N0->getOperand(0);
4571 Signed = true;
4572 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4573 // ZEXTLOAD / SEXTLOAD
4574 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4575 MinBits = LN0->getMemoryVT().getSizeInBits();
4576 PreExt = N0;
4577 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4578 Signed = true;
4579 MinBits = LN0->getMemoryVT().getSizeInBits();
4580 PreExt = N0;
4581 }
4582 }
4583
4584 // Figure out how many bits we need to preserve this constant.
4585 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4586
4587 // Make sure we're not losing bits from the constant.
4588 if (MinBits > 0 &&
4589 MinBits < C1.getBitWidth() &&
4590 MinBits >= ReqdBits) {
4591 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4592 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4593 // Will get folded away.
4594 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4595 if (MinBits == 1 && C1 == 1)
4596 // Invert the condition.
4597 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4599 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4600 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4601 }
4602
4603 // If truncating the setcc operands is not desirable, we can still
4604 // simplify the expression in some cases:
4605 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4606 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4607 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4608 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4609 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4610 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4611 SDValue TopSetCC = N0->getOperand(0);
4612 unsigned N0Opc = N0->getOpcode();
4613 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4614 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4615 TopSetCC.getOpcode() == ISD::SETCC &&
4616 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4617 (isConstFalseVal(N1) ||
4618 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4619
4620 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4621 (!N1C->isZero() && Cond == ISD::SETNE);
4622
4623 if (!Inverse)
4624 return TopSetCC;
4625
4627 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4628 TopSetCC.getOperand(0).getValueType());
4629 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4630 TopSetCC.getOperand(1),
4631 InvCond);
4632 }
4633 }
4634 }
4635
4636 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4637 // equality or unsigned, and all 1 bits of the const are in the same
4638 // partial word, see if we can shorten the load.
4639 if (DCI.isBeforeLegalize() &&
4641 N0.getOpcode() == ISD::AND && C1 == 0 &&
4642 N0.getNode()->hasOneUse() &&
4643 isa<LoadSDNode>(N0.getOperand(0)) &&
4644 N0.getOperand(0).getNode()->hasOneUse() &&
4645 isa<ConstantSDNode>(N0.getOperand(1))) {
4646 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4647 APInt bestMask;
4648 unsigned bestWidth = 0, bestOffset = 0;
4649 if (Lod->isSimple() && Lod->isUnindexed() &&
4650 (Lod->getMemoryVT().isByteSized() ||
4651 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4652 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4653 unsigned origWidth = N0.getValueSizeInBits();
4654 unsigned maskWidth = origWidth;
4655 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4656 // 8 bits, but have to be careful...
4657 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4658 origWidth = Lod->getMemoryVT().getSizeInBits();
4659 const APInt &Mask = N0.getConstantOperandAPInt(1);
4660 // Only consider power-of-2 widths (and at least one byte) as candiates
4661 // for the narrowed load.
4662 for (unsigned width = 8; width < origWidth; width *= 2) {
4663 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4664 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4665 continue;
4666 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4667 // Avoid accessing any padding here for now (we could use memWidth
4668 // instead of origWidth here otherwise).
4669 unsigned maxOffset = origWidth - width;
4670 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4671 if (Mask.isSubsetOf(newMask)) {
4672 unsigned ptrOffset =
4673 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4674 unsigned IsFast = 0;
4675 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4677 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4678 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4679 IsFast) {
4680 bestOffset = ptrOffset / 8;
4681 bestMask = Mask.lshr(offset);
4682 bestWidth = width;
4683 break;
4684 }
4685 }
4686 newMask <<= 8;
4687 }
4688 if (bestWidth)
4689 break;
4690 }
4691 }
4692 if (bestWidth) {
4693 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4694 SDValue Ptr = Lod->getBasePtr();
4695 if (bestOffset != 0)
4696 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4697 SDValue NewLoad =
4698 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4699 Lod->getPointerInfo().getWithOffset(bestOffset),
4700 Lod->getOriginalAlign());
4701 SDValue And =
4702 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4703 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4704 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4705 }
4706 }
4707
4708 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4709 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4710 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4711
4712 // If the comparison constant has bits in the upper part, the
4713 // zero-extended value could never match.
4715 C1.getBitWidth() - InSize))) {
4716 switch (Cond) {
4717 case ISD::SETUGT:
4718 case ISD::SETUGE:
4719 case ISD::SETEQ:
4720 return DAG.getConstant(0, dl, VT);
4721 case ISD::SETULT:
4722 case ISD::SETULE:
4723 case ISD::SETNE:
4724 return DAG.getConstant(1, dl, VT);
4725 case ISD::SETGT:
4726 case ISD::SETGE:
4727 // True if the sign bit of C1 is set.
4728 return DAG.getConstant(C1.isNegative(), dl, VT);
4729 case ISD::SETLT:
4730 case ISD::SETLE:
4731 // True if the sign bit of C1 isn't set.
4732 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4733 default:
4734 break;
4735 }
4736 }
4737
4738 // Otherwise, we can perform the comparison with the low bits.
4739 switch (Cond) {
4740 case ISD::SETEQ:
4741 case ISD::SETNE:
4742 case ISD::SETUGT:
4743 case ISD::SETUGE:
4744 case ISD::SETULT:
4745 case ISD::SETULE: {
4746 EVT newVT = N0.getOperand(0).getValueType();
4747 if (DCI.isBeforeLegalizeOps() ||
4748 (isOperationLegal(ISD::SETCC, newVT) &&
4749 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4750 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4751 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4752
4753 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4754 NewConst, Cond);
4755 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4756 }
4757 break;
4758 }
4759 default:
4760 break; // todo, be more careful with signed comparisons
4761 }
4762 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4763 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4764 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4765 OpVT)) {
4766 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4767 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4768 EVT ExtDstTy = N0.getValueType();
4769 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4770
4771 // If the constant doesn't fit into the number of bits for the source of
4772 // the sign extension, it is impossible for both sides to be equal.
4773 if (C1.getSignificantBits() > ExtSrcTyBits)
4774 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4775
4776 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4777 ExtDstTy != ExtSrcTy && "Unexpected types!");
4778 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4779 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4780 DAG.getConstant(Imm, dl, ExtDstTy));
4781 if (!DCI.isCalledByLegalizer())
4782 DCI.AddToWorklist(ZextOp.getNode());
4783 // Otherwise, make this a use of a zext.
4784 return DAG.getSetCC(dl, VT, ZextOp,
4785 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4786 } else if ((N1C->isZero() || N1C->isOne()) &&
4787 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4788 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4789 // excluded as they are handled below whilst checking for foldBooleans.
4790 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4791 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4792 (N0.getValueType() == MVT::i1 ||
4796 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4797 if (TrueWhenTrue)
4798 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4799 // Invert the condition.
4800 if (N0.getOpcode() == ISD::SETCC) {
4801 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4803 if (DCI.isBeforeLegalizeOps() ||
4805 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4806 }
4807 }
4808
4809 if ((N0.getOpcode() == ISD::XOR ||
4810 (N0.getOpcode() == ISD::AND &&
4811 N0.getOperand(0).getOpcode() == ISD::XOR &&
4812 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4813 isOneConstant(N0.getOperand(1))) {
4814 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4815 // can only do this if the top bits are known zero.
4816 unsigned BitWidth = N0.getValueSizeInBits();
4817 if (DAG.MaskedValueIsZero(N0,
4819 BitWidth-1))) {
4820 // Okay, get the un-inverted input value.
4821 SDValue Val;
4822 if (N0.getOpcode() == ISD::XOR) {
4823 Val = N0.getOperand(0);
4824 } else {
4825 assert(N0.getOpcode() == ISD::AND &&
4826 N0.getOperand(0).getOpcode() == ISD::XOR);
4827 // ((X^1)&1)^1 -> X & 1
4828 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4829 N0.getOperand(0).getOperand(0),
4830 N0.getOperand(1));
4831 }
4832
4833 return DAG.getSetCC(dl, VT, Val, N1,
4835 }
4836 } else if (N1C->isOne()) {
4837 SDValue Op0 = N0;
4838 if (Op0.getOpcode() == ISD::TRUNCATE)
4839 Op0 = Op0.getOperand(0);
4840
4841 if ((Op0.getOpcode() == ISD::XOR) &&
4842 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4843 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4844 SDValue XorLHS = Op0.getOperand(0);
4845 SDValue XorRHS = Op0.getOperand(1);
4846 // Ensure that the input setccs return an i1 type or 0/1 value.
4847 if (Op0.getValueType() == MVT::i1 ||
4852 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4854 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4855 }
4856 }
4857 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4858 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4859 if (Op0.getValueType().bitsGT(VT))
4860 Op0 = DAG.getNode(ISD::AND, dl, VT,
4861 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4862 DAG.getConstant(1, dl, VT));
4863 else if (Op0.getValueType().bitsLT(VT))
4864 Op0 = DAG.getNode(ISD::AND, dl, VT,
4865 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4866 DAG.getConstant(1, dl, VT));
4867
4868 return DAG.getSetCC(dl, VT, Op0,
4869 DAG.getConstant(0, dl, Op0.getValueType()),
4871 }
4872 if (Op0.getOpcode() == ISD::AssertZext &&
4873 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4874 return DAG.getSetCC(dl, VT, Op0,
4875 DAG.getConstant(0, dl, Op0.getValueType()),
4877 }
4878 }
4879
4880 // Given:
4881 // icmp eq/ne (urem %x, %y), 0
4882 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4883 // icmp eq/ne %x, 0
4884 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4885 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4886 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4887 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4888 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4889 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4890 }
4891
4892 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4893 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4894 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4895 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4896 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4897 N1C && N1C->isAllOnes()) {
4898 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4899 DAG.getConstant(0, dl, OpVT),
4901 }
4902
4903 if (SDValue V =
4904 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4905 return V;
4906 }
4907
4908 // These simplifications apply to splat vectors as well.
4909 // TODO: Handle more splat vector cases.
4910 if (auto *N1C = isConstOrConstSplat(N1)) {
4911 const APInt &C1 = N1C->getAPIntValue();
4912
4913 APInt MinVal, MaxVal;
4914 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4916 MinVal = APInt::getSignedMinValue(OperandBitSize);
4917 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4918 } else {
4919 MinVal = APInt::getMinValue(OperandBitSize);
4920 MaxVal = APInt::getMaxValue(OperandBitSize);
4921 }
4922
4923 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4924 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4925 // X >= MIN --> true
4926 if (C1 == MinVal)
4927 return DAG.getBoolConstant(true, dl, VT, OpVT);
4928
4929 if (!VT.isVector()) { // TODO: Support this for vectors.
4930 // X >= C0 --> X > (C0 - 1)
4931 APInt C = C1 - 1;
4933 if ((DCI.isBeforeLegalizeOps() ||
4934 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4935 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4936 isLegalICmpImmediate(C.getSExtValue())))) {
4937 return DAG.getSetCC(dl, VT, N0,
4938 DAG.getConstant(C, dl, N1.getValueType()),
4939 NewCC);
4940 }
4941 }
4942 }
4943
4944 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4945 // X <= MAX --> true
4946 if (C1 == MaxVal)
4947 return DAG.getBoolConstant(true, dl, VT, OpVT);
4948
4949 // X <= C0 --> X < (C0 + 1)
4950 if (!VT.isVector()) { // TODO: Support this for vectors.
4951 APInt C = C1 + 1;
4953 if ((DCI.isBeforeLegalizeOps() ||
4954 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4955 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4956 isLegalICmpImmediate(C.getSExtValue())))) {
4957 return DAG.getSetCC(dl, VT, N0,
4958 DAG.getConstant(C, dl, N1.getValueType()),
4959 NewCC);
4960 }
4961 }
4962 }
4963
4964 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4965 if (C1 == MinVal)
4966 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4967
4968 // TODO: Support this for vectors after legalize ops.
4969 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4970 // Canonicalize setlt X, Max --> setne X, Max
4971 if (C1 == MaxVal)
4972 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4973
4974 // If we have setult X, 1, turn it into seteq X, 0
4975 if (C1 == MinVal+1)
4976 return DAG.getSetCC(dl, VT, N0,
4977 DAG.getConstant(MinVal, dl, N0.getValueType()),
4978 ISD::SETEQ);
4979 }
4980 }
4981
4982 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4983 if (C1 == MaxVal)
4984 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4985
4986 // TODO: Support this for vectors after legalize ops.
4987 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4988 // Canonicalize setgt X, Min --> setne X, Min
4989 if (C1 == MinVal)
4990 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4991
4992 // If we have setugt X, Max-1, turn it into seteq X, Max
4993 if (C1 == MaxVal-1)
4994 return DAG.getSetCC(dl, VT, N0,
4995 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4996 ISD::SETEQ);
4997 }
4998 }
4999
5000 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5001 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5002 if (C1.isZero())
5003 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5004 VT, N0, N1, Cond, DCI, dl))
5005 return CC;
5006
5007 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5008 // For example, when high 32-bits of i64 X are known clear:
5009 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5010 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5011 bool CmpZero = N1C->isZero();
5012 bool CmpNegOne = N1C->isAllOnes();
5013 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5014 // Match or(lo,shl(hi,bw/2)) pattern.
5015 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5016 unsigned EltBits = V.getScalarValueSizeInBits();
5017 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5018 return false;
5019 SDValue LHS = V.getOperand(0);
5020 SDValue RHS = V.getOperand(1);
5021 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5022 // Unshifted element must have zero upperbits.
5023 if (RHS.getOpcode() == ISD::SHL &&
5024 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5025 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5026 DAG.MaskedValueIsZero(LHS, HiBits)) {
5027 Lo = LHS;
5028 Hi = RHS.getOperand(0);
5029 return true;
5030 }
5031 if (LHS.getOpcode() == ISD::SHL &&
5032 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5033 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5034 DAG.MaskedValueIsZero(RHS, HiBits)) {
5035 Lo = RHS;
5036 Hi = LHS.getOperand(0);
5037 return true;
5038 }
5039 return false;
5040 };
5041
5042 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5043 unsigned EltBits = N0.getScalarValueSizeInBits();
5044 unsigned HalfBits = EltBits / 2;
5045 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5046 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5047 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5048 SDValue NewN0 =
5049 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5050 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5051 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5052 };
5053
5054 SDValue Lo, Hi;
5055 if (IsConcat(N0, Lo, Hi))
5056 return MergeConcat(Lo, Hi);
5057
5058 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5059 SDValue Lo0, Lo1, Hi0, Hi1;
5060 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5061 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5062 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5063 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5064 }
5065 }
5066 }
5067 }
5068
5069 // If we have "setcc X, C0", check to see if we can shrink the immediate
5070 // by changing cc.
5071 // TODO: Support this for vectors after legalize ops.
5072 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5073 // SETUGT X, SINTMAX -> SETLT X, 0
5074 // SETUGE X, SINTMIN -> SETLT X, 0
5075 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5076 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5077 return DAG.getSetCC(dl, VT, N0,
5078 DAG.getConstant(0, dl, N1.getValueType()),
5079 ISD::SETLT);
5080
5081 // SETULT X, SINTMIN -> SETGT X, -1
5082 // SETULE X, SINTMAX -> SETGT X, -1
5083 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5084 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5085 return DAG.getSetCC(dl, VT, N0,
5086 DAG.getAllOnesConstant(dl, N1.getValueType()),
5087 ISD::SETGT);
5088 }
5089 }
5090
5091 // Back to non-vector simplifications.
5092 // TODO: Can we do these for vector splats?
5093 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5094 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5095 const APInt &C1 = N1C->getAPIntValue();
5096 EVT ShValTy = N0.getValueType();
5097
5098 // Fold bit comparisons when we can. This will result in an
5099 // incorrect value when boolean false is negative one, unless
5100 // the bitsize is 1 in which case the false value is the same
5101 // in practice regardless of the representation.
5102 if ((VT.getSizeInBits() == 1 ||
5104 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5105 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5106 N0.getOpcode() == ISD::AND) {
5107 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5108 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5109 // Perform the xform if the AND RHS is a single bit.
5110 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5111 if (AndRHS->getAPIntValue().isPowerOf2() &&
5112 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5113 return DAG.getNode(
5114 ISD::TRUNCATE, dl, VT,
5115 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5117 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5118 }
5119 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5120 // (X & 8) == 8 --> (X & 8) >> 3
5121 // Perform the xform if C1 is a single bit.
5122 unsigned ShCt = C1.logBase2();
5123 if (C1.isPowerOf2() &&
5124 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5125 return DAG.getNode(
5126 ISD::TRUNCATE, dl, VT,
5127 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5129 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5130 }
5131 }
5132 }
5133 }
5134
5135 if (C1.getSignificantBits() <= 64 &&
5137 // (X & -256) == 256 -> (X >> 8) == 1
5138 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5139 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5140 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5141 const APInt &AndRHSC = AndRHS->getAPIntValue();
5142 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5143 unsigned ShiftBits = AndRHSC.countr_zero();
5144 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5145 SDValue Shift = DAG.getNode(
5146 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5147 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5148 !DCI.isBeforeLegalize()));
5149 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5150 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5151 }
5152 }
5153 }
5154 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5155 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5156 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5157 // X < 0x100000000 -> (X >> 32) < 1
5158 // X >= 0x100000000 -> (X >> 32) >= 1
5159 // X <= 0x0ffffffff -> (X >> 32) < 1
5160 // X > 0x0ffffffff -> (X >> 32) >= 1
5161 unsigned ShiftBits;
5162 APInt NewC = C1;
5163 ISD::CondCode NewCond = Cond;
5164 if (AdjOne) {
5165 ShiftBits = C1.countr_one();
5166 NewC = NewC + 1;
5167 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5168 } else {
5169 ShiftBits = C1.countr_zero();
5170 }
5171 NewC.lshrInPlace(ShiftBits);
5172 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5174 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5175 SDValue Shift =
5176 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5177 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5178 !DCI.isBeforeLegalize()));
5179 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5180 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5181 }
5182 }
5183 }
5184 }
5185
5186 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5187 auto *CFP = cast<ConstantFPSDNode>(N1);
5188 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5189
5190 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5191 // constant if knowing that the operand is non-nan is enough. We prefer to
5192 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5193 // materialize 0.0.
5194 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5195 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5196
5197 // setcc (fneg x), C -> setcc swap(pred) x, -C
5198 if (N0.getOpcode() == ISD::FNEG) {
5200 if (DCI.isBeforeLegalizeOps() ||
5201 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5202 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5203 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5204 }
5205 }
5206
5207 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5209 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5210 bool IsFabs = N0.getOpcode() == ISD::FABS;
5211 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5212 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5213 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5214 : (IsFabs ? fcInf : fcPosInf);
5215 if (Cond == ISD::SETUEQ)
5216 Flag |= fcNan;
5217 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5218 DAG.getTargetConstant(Flag, dl, MVT::i32));
5219 }
5220 }
5221
5222 // If the condition is not legal, see if we can find an equivalent one
5223 // which is legal.
5225 // If the comparison was an awkward floating-point == or != and one of
5226 // the comparison operands is infinity or negative infinity, convert the
5227 // condition to a less-awkward <= or >=.
5228 if (CFP->getValueAPF().isInfinity()) {
5229 bool IsNegInf = CFP->getValueAPF().isNegative();
5231 switch (Cond) {
5232 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5233 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5234 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5235 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5236 default: break;
5237 }
5238 if (NewCond != ISD::SETCC_INVALID &&
5239 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5240 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5241 }
5242 }
5243 }
5244
5245 if (N0 == N1) {
5246 // The sext(setcc()) => setcc() optimization relies on the appropriate
5247 // constant being emitted.
5248 assert(!N0.getValueType().isInteger() &&
5249 "Integer types should be handled by FoldSetCC");
5250
5251 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5252 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5253 if (UOF == 2) // FP operators that are undefined on NaNs.
5254 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5255 if (UOF == unsigned(EqTrue))
5256 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5257 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5258 // if it is not already.
5259 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5260 if (NewCond != Cond &&
5261 (DCI.isBeforeLegalizeOps() ||
5262 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5263 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5264 }
5265
5266 // ~X > ~Y --> Y > X
5267 // ~X < ~Y --> Y < X
5268 // ~X < C --> X > ~C
5269 // ~X > C --> X < ~C
5270 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5271 N0.getValueType().isInteger()) {
5272 if (isBitwiseNot(N0)) {
5273 if (isBitwiseNot(N1))
5274 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5275
5278 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5279 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5280 }
5281 }
5282 }
5283
5284 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5285 N0.getValueType().isInteger()) {
5286 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5287 N0.getOpcode() == ISD::XOR) {
5288 // Simplify (X+Y) == (X+Z) --> Y == Z
5289 if (N0.getOpcode() == N1.getOpcode()) {
5290 if (N0.getOperand(0) == N1.getOperand(0))
5291 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5292 if (N0.getOperand(1) == N1.getOperand(1))
5293 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5294 if (isCommutativeBinOp(N0.getOpcode())) {
5295 // If X op Y == Y op X, try other combinations.
5296 if (N0.getOperand(0) == N1.getOperand(1))
5297 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5298 Cond);
5299 if (N0.getOperand(1) == N1.getOperand(0))
5300 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5301 Cond);
5302 }
5303 }
5304
5305 // If RHS is a legal immediate value for a compare instruction, we need
5306 // to be careful about increasing register pressure needlessly.
5307 bool LegalRHSImm = false;
5308
5309 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5310 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5311 // Turn (X+C1) == C2 --> X == C2-C1
5312 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5313 return DAG.getSetCC(
5314 dl, VT, N0.getOperand(0),
5315 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5316 dl, N0.getValueType()),
5317 Cond);
5318
5319 // Turn (X^C1) == C2 --> X == C1^C2
5320 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5321 return DAG.getSetCC(
5322 dl, VT, N0.getOperand(0),
5323 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5324 dl, N0.getValueType()),
5325 Cond);
5326 }
5327
5328 // Turn (C1-X) == C2 --> X == C1-C2
5329 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5330 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5331 return DAG.getSetCC(
5332 dl, VT, N0.getOperand(1),
5333 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5334 dl, N0.getValueType()),
5335 Cond);
5336
5337 // Could RHSC fold directly into a compare?
5338 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5339 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5340 }
5341
5342 // (X+Y) == X --> Y == 0 and similar folds.
5343 // Don't do this if X is an immediate that can fold into a cmp
5344 // instruction and X+Y has other uses. It could be an induction variable
5345 // chain, and the transform would increase register pressure.
5346 if (!LegalRHSImm || N0.hasOneUse())
5347 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5348 return V;
5349 }
5350
5351 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5352 N1.getOpcode() == ISD::XOR)
5353 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5354 return V;
5355
5356 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5357 return V;
5358 }
5359
5360 // Fold remainder of division by a constant.
5361 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5362 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5363 // When division is cheap or optimizing for minimum size,
5364 // fall through to DIVREM creation by skipping this fold.
5365 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5366 if (N0.getOpcode() == ISD::UREM) {
5367 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5368 return Folded;
5369 } else if (N0.getOpcode() == ISD::SREM) {
5370 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5371 return Folded;
5372 }
5373 }
5374 }
5375
5376 // Fold away ALL boolean setcc's.
5377 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5378 SDValue Temp;
5379 switch (Cond) {
5380 default: llvm_unreachable("Unknown integer setcc!");
5381 case ISD::SETEQ: // X == Y -> ~(X^Y)
5382 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5383 N0 = DAG.getNOT(dl, Temp, OpVT);
5384 if (!DCI.isCalledByLegalizer())
5385 DCI.AddToWorklist(Temp.getNode());
5386 break;
5387 case ISD::SETNE: // X != Y --> (X^Y)
5388 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5389 break;
5390 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5391 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5392 Temp = DAG.getNOT(dl, N0, OpVT);
5393 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5394 if (!DCI.isCalledByLegalizer())
5395 DCI.AddToWorklist(Temp.getNode());
5396 break;
5397 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5398 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5399 Temp = DAG.getNOT(dl, N1, OpVT);
5400 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5401 if (!DCI.isCalledByLegalizer())
5402 DCI.AddToWorklist(Temp.getNode());
5403 break;
5404 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5405 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5406 Temp = DAG.getNOT(dl, N0, OpVT);
5407 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5408 if (!DCI.isCalledByLegalizer())
5409 DCI.AddToWorklist(Temp.getNode());
5410 break;
5411 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5412 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5413 Temp = DAG.getNOT(dl, N1, OpVT);
5414 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5415 break;
5416 }
5417 if (VT.getScalarType() != MVT::i1) {
5418 if (!DCI.isCalledByLegalizer())
5419 DCI.AddToWorklist(N0.getNode());
5420 // FIXME: If running after legalize, we probably can't do this.
5422 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5423 }
5424 return N0;
5425 }
5426
5427 // Could not fold it.
5428 return SDValue();
5429}
5430
5431/// Returns true (and the GlobalValue and the offset) if the node is a
5432/// GlobalAddress + offset.
5434 int64_t &Offset) const {
5435
5436 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5437
5438 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5439 GA = GASD->getGlobal();
5440 Offset += GASD->getOffset();
5441 return true;
5442 }
5443
5444 if (N->getOpcode() == ISD::ADD) {
5445 SDValue N1 = N->getOperand(0);
5446 SDValue N2 = N->getOperand(1);
5447 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5448 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5449 Offset += V->getSExtValue();
5450 return true;
5451 }
5452 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5453 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5454 Offset += V->getSExtValue();
5455 return true;
5456 }
5457 }
5458 }
5459
5460 return false;
5461}
5462
5464 DAGCombinerInfo &DCI) const {
5465 // Default implementation: no optimization.
5466 return SDValue();
5467}
5468
5469//===----------------------------------------------------------------------===//
5470// Inline Assembler Implementation Methods
5471//===----------------------------------------------------------------------===//
5472
5475 unsigned S = Constraint.size();
5476
5477 if (S == 1) {
5478 switch (Constraint[0]) {
5479 default: break;
5480 case 'r':
5481 return C_RegisterClass;
5482 case 'm': // memory
5483 case 'o': // offsetable
5484 case 'V': // not offsetable
5485 return C_Memory;
5486 case 'p': // Address.
5487 return C_Address;
5488 case 'n': // Simple Integer
5489 case 'E': // Floating Point Constant
5490 case 'F': // Floating Point Constant
5491 return C_Immediate;
5492 case 'i': // Simple Integer or Relocatable Constant
5493 case 's': // Relocatable Constant
5494 case 'X': // Allow ANY value.
5495 case 'I': // Target registers.
5496 case 'J':
5497 case 'K':
5498 case 'L':
5499 case 'M':
5500 case 'N':
5501 case 'O':
5502 case 'P':
5503 case '<':
5504 case '>':
5505 return C_Other;
5506 }
5507 }
5508
5509 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5510 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5511 return C_Memory;
5512 return C_Register;
5513 }
5514 return C_Unknown;
5515}
5516
5517/// Try to replace an X constraint, which matches anything, with another that
5518/// has more specific requirements based on the type of the corresponding
5519/// operand.
5520const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5521 if (ConstraintVT.isInteger())
5522 return "r";
5523 if (ConstraintVT.isFloatingPoint())
5524 return "f"; // works for many targets
5525 return nullptr;
5526}
5527
5529 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5530 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5531 return SDValue();
5532}
5533
5534/// Lower the specified operand into the Ops vector.
5535/// If it is invalid, don't add anything to Ops.
5537 StringRef Constraint,
5538 std::vector<SDValue> &Ops,
5539 SelectionDAG &DAG) const {
5540
5541 if (Constraint.size() > 1)
5542 return;
5543
5544 char ConstraintLetter = Constraint[0];
5545 switch (ConstraintLetter) {
5546 default: break;
5547 case 'X': // Allows any operand
5548 case 'i': // Simple Integer or Relocatable Constant
5549 case 'n': // Simple Integer
5550 case 's': { // Relocatable Constant
5551
5553 uint64_t Offset = 0;
5554
5555 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5556 // etc., since getelementpointer is variadic. We can't use
5557 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5558 // while in this case the GA may be furthest from the root node which is
5559 // likely an ISD::ADD.
5560 while (true) {
5561 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5562 // gcc prints these as sign extended. Sign extend value to 64 bits
5563 // now; without this it would get ZExt'd later in
5564 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5565 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5566 BooleanContent BCont = getBooleanContents(MVT::i64);
5567 ISD::NodeType ExtOpc =
5568 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5569 int64_t ExtVal =
5570 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5571 Ops.push_back(
5572 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5573 return;
5574 }
5575 if (ConstraintLetter != 'n') {
5576 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5577 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5578 GA->getValueType(0),
5579 Offset + GA->getOffset()));
5580 return;
5581 }
5582 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5583 Ops.push_back(DAG.getTargetBlockAddress(
5584 BA->getBlockAddress(), BA->getValueType(0),
5585 Offset + BA->getOffset(), BA->getTargetFlags()));
5586 return;
5587 }
5588 if (isa<BasicBlockSDNode>(Op)) {
5589 Ops.push_back(Op);
5590 return;
5591 }
5592 }
5593 const unsigned OpCode = Op.getOpcode();
5594 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5595 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5596 Op = Op.getOperand(1);
5597 // Subtraction is not commutative.
5598 else if (OpCode == ISD::ADD &&
5599 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5600 Op = Op.getOperand(0);
5601 else
5602 return;
5603 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5604 continue;
5605 }
5606 return;
5607 }
5608 break;
5609 }
5610 }
5611}
5612
5614 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5615}
5616
5617std::pair<unsigned, const TargetRegisterClass *>
5619 StringRef Constraint,
5620 MVT VT) const {
5621 if (!Constraint.starts_with("{"))
5622 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5623 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5624
5625 // Remove the braces from around the name.
5626 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5627
5628 std::pair<unsigned, const TargetRegisterClass *> R =
5629 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5630
5631 // Figure out which register class contains this reg.
5632 for (const TargetRegisterClass *RC : RI->regclasses()) {
5633 // If none of the value types for this register class are valid, we
5634 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5635 if (!isLegalRC(*RI, *RC))
5636 continue;
5637
5638 for (const MCPhysReg &PR : *RC) {
5639 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5640 std::pair<unsigned, const TargetRegisterClass *> S =
5641 std::make_pair(PR, RC);
5642
5643 // If this register class has the requested value type, return it,
5644 // otherwise keep searching and return the first class found
5645 // if no other is found which explicitly has the requested type.
5646 if (RI->isTypeLegalForClass(*RC, VT))
5647 return S;
5648 if (!R.second)
5649 R = S;
5650 }
5651 }
5652 }
5653
5654 return R;
5655}
5656
5657//===----------------------------------------------------------------------===//
5658// Constraint Selection.
5659
5660/// Return true of this is an input operand that is a matching constraint like
5661/// "4".
5663 assert(!ConstraintCode.empty() && "No known constraint!");
5664 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5665}
5666
5667/// If this is an input matching constraint, this method returns the output
5668/// operand it matches.
5670 assert(!ConstraintCode.empty() && "No known constraint!");
5671 return atoi(ConstraintCode.c_str());
5672}
5673
5674/// Split up the constraint string from the inline assembly value into the
5675/// specific constraints and their prefixes, and also tie in the associated
5676/// operand values.
5677/// If this returns an empty vector, and if the constraint string itself
5678/// isn't empty, there was an error parsing.
5681 const TargetRegisterInfo *TRI,
5682 const CallBase &Call) const {
5683 /// Information about all of the constraints.
5684 AsmOperandInfoVector ConstraintOperands;
5685 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5686 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5687
5688 // Do a prepass over the constraints, canonicalizing them, and building up the
5689 // ConstraintOperands list.
5690 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5691 unsigned ResNo = 0; // ResNo - The result number of the next output.
5692 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5693
5694 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5695 ConstraintOperands.emplace_back(std::move(CI));
5696 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5697
5698 // Update multiple alternative constraint count.
5699 if (OpInfo.multipleAlternatives.size() > maCount)
5700 maCount = OpInfo.multipleAlternatives.size();
5701
5702 OpInfo.ConstraintVT = MVT::Other;
5703
5704 // Compute the value type for each operand.
5705 switch (OpInfo.Type) {
5707 // Indirect outputs just consume an argument.
5708 if (OpInfo.isIndirect) {
5709 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5710 break;
5711 }
5712
5713 // The return value of the call is this value. As such, there is no
5714 // corresponding argument.
5715 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5716 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5717 OpInfo.ConstraintVT =
5718 getSimpleValueType(DL, STy->getElementType(ResNo));
5719 } else {
5720 assert(ResNo == 0 && "Asm only has one result!");
5721 OpInfo.ConstraintVT =
5722 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5723 }
5724 ++ResNo;
5725 break;
5726 case InlineAsm::isInput:
5727 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5728 break;
5729 case InlineAsm::isLabel:
5730 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5731 ++LabelNo;
5732 continue;
5734 // Nothing to do.
5735 break;
5736 }
5737
5738 if (OpInfo.CallOperandVal) {
5739 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5740 if (OpInfo.isIndirect) {
5741 OpTy = Call.getParamElementType(ArgNo);
5742 assert(OpTy && "Indirect operand must have elementtype attribute");
5743 }
5744
5745 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5746 if (StructType *STy = dyn_cast<StructType>(OpTy))
5747 if (STy->getNumElements() == 1)
5748 OpTy = STy->getElementType(0);
5749
5750 // If OpTy is not a single value, it may be a struct/union that we
5751 // can tile with integers.
5752 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5753 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5754 switch (BitSize) {
5755 default: break;
5756 case 1:
5757 case 8:
5758 case 16:
5759 case 32:
5760 case 64:
5761 case 128:
5762 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5763 break;
5764 }
5765 }
5766
5767 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5768 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5769 ArgNo++;
5770 }
5771 }
5772
5773 // If we have multiple alternative constraints, select the best alternative.
5774 if (!ConstraintOperands.empty()) {
5775 if (maCount) {
5776 unsigned bestMAIndex = 0;
5777 int bestWeight = -1;
5778 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5779 int weight = -1;
5780 unsigned maIndex;
5781 // Compute the sums of the weights for each alternative, keeping track
5782 // of the best (highest weight) one so far.
5783 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5784 int weightSum = 0;
5785 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5786 cIndex != eIndex; ++cIndex) {
5787 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5788 if (OpInfo.Type == InlineAsm::isClobber)
5789 continue;
5790
5791 // If this is an output operand with a matching input operand,
5792 // look up the matching input. If their types mismatch, e.g. one
5793 // is an integer, the other is floating point, or their sizes are
5794 // different, flag it as an maCantMatch.
5795 if (OpInfo.hasMatchingInput()) {
5796 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5797 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5798 if ((OpInfo.ConstraintVT.isInteger() !=
5799 Input.ConstraintVT.isInteger()) ||
5800 (OpInfo.ConstraintVT.getSizeInBits() !=
5801 Input.ConstraintVT.getSizeInBits())) {
5802 weightSum = -1; // Can't match.
5803 break;
5804 }
5805 }
5806 }
5807 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5808 if (weight == -1) {
5809 weightSum = -1;
5810 break;
5811 }
5812 weightSum += weight;
5813 }
5814 // Update best.
5815 if (weightSum > bestWeight) {
5816 bestWeight = weightSum;
5817 bestMAIndex = maIndex;
5818 }
5819 }
5820
5821 // Now select chosen alternative in each constraint.
5822 for (AsmOperandInfo &cInfo : ConstraintOperands)
5823 if (cInfo.Type != InlineAsm::isClobber)
5824 cInfo.selectAlternative(bestMAIndex);
5825 }
5826 }
5827
5828 // Check and hook up tied operands, choose constraint code to use.
5829 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5830 cIndex != eIndex; ++cIndex) {
5831 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5832
5833 // If this is an output operand with a matching input operand, look up the
5834 // matching input. If their types mismatch, e.g. one is an integer, the
5835 // other is floating point, or their sizes are different, flag it as an
5836 // error.
5837 if (OpInfo.hasMatchingInput()) {
5838 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5839
5840 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5841 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5842 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5843 OpInfo.ConstraintVT);
5844 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5845 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5846 Input.ConstraintVT);
5847 if ((OpInfo.ConstraintVT.isInteger() !=
5848 Input.ConstraintVT.isInteger()) ||
5849 (MatchRC.second != InputRC.second)) {
5850 report_fatal_error("Unsupported asm: input constraint"
5851 " with a matching output constraint of"
5852 " incompatible type!");
5853 }
5854 }
5855 }
5856 }
5857
5858 return ConstraintOperands;
5859}
5860
5861/// Return a number indicating our preference for chosing a type of constraint
5862/// over another, for the purpose of sorting them. Immediates are almost always
5863/// preferrable (when they can be emitted). A higher return value means a
5864/// stronger preference for one constraint type relative to another.
5865/// FIXME: We should prefer registers over memory but doing so may lead to
5866/// unrecoverable register exhaustion later.
5867/// https://github.com/llvm/llvm-project/issues/20571
5869 switch (CT) {
5872 return 4;
5875 return 3;
5877 return 2;
5879 return 1;
5881 return 0;
5882 }
5883 llvm_unreachable("Invalid constraint type");
5884}
5885
5886/// Examine constraint type and operand type and determine a weight value.
5887/// This object must already have been set up with the operand type
5888/// and the current alternative constraint selected.
5891 AsmOperandInfo &info, int maIndex) const {
5893 if (maIndex >= (int)info.multipleAlternatives.size())
5894 rCodes = &info.Codes;
5895 else
5896 rCodes = &info.multipleAlternatives[maIndex].Codes;
5897 ConstraintWeight BestWeight = CW_Invalid;
5898
5899 // Loop over the options, keeping track of the most general one.
5900 for (const std::string &rCode : *rCodes) {
5901 ConstraintWeight weight =
5902 getSingleConstraintMatchWeight(info, rCode.c_str());
5903 if (weight > BestWeight)
5904 BestWeight = weight;
5905 }
5906
5907 return BestWeight;
5908}
5909
5910/// Examine constraint type and operand type and determine a weight value.
5911/// This object must already have been set up with the operand type
5912/// and the current alternative constraint selected.
5915 AsmOperandInfo &info, const char *constraint) const {
5916 ConstraintWeight weight = CW_Invalid;
5917 Value *CallOperandVal = info.CallOperandVal;
5918 // If we don't have a value, we can't do a match,
5919 // but allow it at the lowest weight.
5920 if (!CallOperandVal)
5921 return CW_Default;
5922 // Look at the constraint type.
5923 switch (*constraint) {
5924 case 'i': // immediate integer.
5925 case 'n': // immediate integer with a known value.
5926 if (isa<ConstantInt>(CallOperandVal))
5927 weight = CW_Constant;
5928 break;
5929 case 's': // non-explicit intregal immediate.
5930 if (isa<GlobalValue>(CallOperandVal))
5931 weight = CW_Constant;
5932 break;
5933 case 'E': // immediate float if host format.
5934 case 'F': // immediate float.
5935 if (isa<ConstantFP>(CallOperandVal))
5936 weight = CW_Constant;
5937 break;
5938 case '<': // memory operand with autodecrement.
5939 case '>': // memory operand with autoincrement.
5940 case 'm': // memory operand.
5941 case 'o': // offsettable memory operand
5942 case 'V': // non-offsettable memory operand
5943 weight = CW_Memory;
5944 break;
5945 case 'r': // general register.
5946 case 'g': // general register, memory operand or immediate integer.
5947 // note: Clang converts "g" to "imr".
5948 if (CallOperandVal->getType()->isIntegerTy())
5949 weight = CW_Register;
5950 break;
5951 case 'X': // any operand.
5952 default:
5953 weight = CW_Default;
5954 break;
5955 }
5956 return weight;
5957}
5958
5959/// If there are multiple different constraints that we could pick for this
5960/// operand (e.g. "imr") try to pick the 'best' one.
5961/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5962/// into seven classes:
5963/// Register -> one specific register
5964/// RegisterClass -> a group of regs
5965/// Memory -> memory
5966/// Address -> a symbolic memory reference
5967/// Immediate -> immediate values
5968/// Other -> magic values (such as "Flag Output Operands")
5969/// Unknown -> something we don't recognize yet and can't handle
5970/// Ideally, we would pick the most specific constraint possible: if we have
5971/// something that fits into a register, we would pick it. The problem here
5972/// is that if we have something that could either be in a register or in
5973/// memory that use of the register could cause selection of *other*
5974/// operands to fail: they might only succeed if we pick memory. Because of
5975/// this the heuristic we use is:
5976///
5977/// 1) If there is an 'other' constraint, and if the operand is valid for
5978/// that constraint, use it. This makes us take advantage of 'i'
5979/// constraints when available.
5980/// 2) Otherwise, pick the most general constraint present. This prefers
5981/// 'm' over 'r', for example.
5982///
5984 TargetLowering::AsmOperandInfo &OpInfo) const {
5985 ConstraintGroup Ret;
5986
5987 Ret.reserve(OpInfo.Codes.size());
5988 for (StringRef Code : OpInfo.Codes) {
5989 TargetLowering::ConstraintType CType = getConstraintType(Code);
5990
5991 // Indirect 'other' or 'immediate' constraints are not allowed.
5992 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5993 CType == TargetLowering::C_Register ||
5995 continue;
5996
5997 // Things with matching constraints can only be registers, per gcc
5998 // documentation. This mainly affects "g" constraints.
5999 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6000 continue;
6001
6002 Ret.emplace_back(Code, CType);
6003 }
6004
6005 std::stable_sort(
6006 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6007 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6008 });
6009
6010 return Ret;
6011}
6012
6013/// If we have an immediate, see if we can lower it. Return true if we can,
6014/// false otherwise.
6016 SDValue Op, SelectionDAG *DAG,
6017 const TargetLowering &TLI) {
6018
6019 assert((P.second == TargetLowering::C_Other ||
6020 P.second == TargetLowering::C_Immediate) &&
6021 "need immediate or other");
6022
6023 if (!Op.getNode())
6024 return false;
6025
6026 std::vector<SDValue> ResultOps;
6027 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6028 return !ResultOps.empty();
6029}
6030
6031/// Determines the constraint code and constraint type to use for the specific
6032/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6034 SDValue Op,
6035 SelectionDAG *DAG) const {
6036 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6037
6038 // Single-letter constraints ('r') are very common.
6039 if (OpInfo.Codes.size() == 1) {
6040 OpInfo.ConstraintCode = OpInfo.Codes[0];
6041 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6042 } else {
6043 ConstraintGroup G = getConstraintPreferences(OpInfo);
6044 if (G.empty())
6045 return;
6046
6047 unsigned BestIdx = 0;
6048 for (const unsigned E = G.size();
6049 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6050 G[BestIdx].second == TargetLowering::C_Immediate);
6051 ++BestIdx) {
6052 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6053 break;
6054 // If we're out of constraints, just pick the first one.
6055 if (BestIdx + 1 == E) {
6056 BestIdx = 0;
6057 break;
6058 }
6059 }
6060
6061 OpInfo.ConstraintCode = G[BestIdx].first;
6062 OpInfo.ConstraintType = G[BestIdx].second;
6063 }
6064
6065 // 'X' matches anything.
6066 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6067 // Constants are handled elsewhere. For Functions, the type here is the
6068 // type of the result, which is not what we want to look at; leave them
6069 // alone.
6070 Value *v = OpInfo.CallOperandVal;
6071 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6072 return;
6073 }
6074
6075 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6076 OpInfo.ConstraintCode = "i";
6077 return;
6078 }
6079
6080 // Otherwise, try to resolve it to something we know about by looking at
6081 // the actual operand type.
6082 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6083 OpInfo.ConstraintCode = Repl;
6084 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6085 }
6086 }
6087}
6088
6089/// Given an exact SDIV by a constant, create a multiplication
6090/// with the multiplicative inverse of the constant.
6092 const SDLoc &dl, SelectionDAG &DAG,
6093 SmallVectorImpl<SDNode *> &Created) {
6094 SDValue Op0 = N->getOperand(0);
6095 SDValue Op1 = N->getOperand(1);
6096 EVT VT = N->getValueType(0);
6097 EVT SVT = VT.getScalarType();
6098 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6099 EVT ShSVT = ShVT.getScalarType();
6100
6101 bool UseSRA = false;
6102 SmallVector<SDValue, 16> Shifts, Factors;
6103
6104 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6105 if (C->isZero())
6106 return false;
6107 APInt Divisor = C->getAPIntValue();
6108 unsigned Shift = Divisor.countr_zero();
6109 if (Shift) {
6110 Divisor.ashrInPlace(Shift);
6111 UseSRA = true;
6112 }
6113 APInt Factor = Divisor.multiplicativeInverse();
6114 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6115 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6116 return true;
6117 };
6118
6119 // Collect all magic values from the build vector.
6120 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6121 return SDValue();
6122
6123 SDValue Shift, Factor;
6124 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6125 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6126 Factor = DAG.getBuildVector(VT, dl, Factors);
6127 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6128 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6129 "Expected matchUnaryPredicate to return one element for scalable "
6130 "vectors");
6131 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6132 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6133 } else {
6134 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6135 Shift = Shifts[0];
6136 Factor = Factors[0];
6137 }
6138
6139 SDValue Res = Op0;
6140
6141 // Shift the value upfront if it is even, so the LSB is one.
6142 if (UseSRA) {
6143 // TODO: For UDIV use SRL instead of SRA.
6144 SDNodeFlags Flags;
6145 Flags.setExact(true);
6146 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6147 Created.push_back(Res.getNode());
6148 }
6149
6150 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6151}
6152
6154 SelectionDAG &DAG,
6155 SmallVectorImpl<SDNode *> &Created) const {
6157 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6158 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6159 return SDValue(N, 0); // Lower SDIV as SDIV
6160 return SDValue();
6161}
6162
6163SDValue
6165 SelectionDAG &DAG,
6166 SmallVectorImpl<SDNode *> &Created) const {
6168 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6169 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6170 return SDValue(N, 0); // Lower SREM as SREM
6171 return SDValue();
6172}
6173
6174/// Build sdiv by power-of-2 with conditional move instructions
6175/// Ref: "Hacker's Delight" by Henry Warren 10-1
6176/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6177/// bgez x, label
6178/// add x, x, 2**k-1
6179/// label:
6180/// sra res, x, k
6181/// neg res, res (when the divisor is negative)
6183 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6184 SmallVectorImpl<SDNode *> &Created) const {
6185 unsigned Lg2 = Divisor.countr_zero();
6186 EVT VT = N->getValueType(0);
6187
6188 SDLoc DL(N);
6189 SDValue N0 = N->getOperand(0);
6190 SDValue Zero = DAG.getConstant(0, DL, VT);
6191 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6192 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6193
6194 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6195 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6196 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6197 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6198 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6199
6200 Created.push_back(Cmp.getNode());
6201 Created.push_back(Add.getNode());
6202 Created.push_back(CMov.getNode());
6203
6204 // Divide by pow2.
6205 SDValue SRA =
6206 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6207
6208 // If we're dividing by a positive value, we're done. Otherwise, we must
6209 // negate the result.
6210 if (Divisor.isNonNegative())
6211 return SRA;
6212
6213 Created.push_back(SRA.getNode());
6214 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6215}
6216
6217/// Given an ISD::SDIV node expressing a divide by constant,
6218/// return a DAG expression to select that will generate the same value by
6219/// multiplying by a magic number.
6220/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6222 bool IsAfterLegalization,
6223 SmallVectorImpl<SDNode *> &Created) const {
6224 SDLoc dl(N);
6225 EVT VT = N->getValueType(0);
6226 EVT SVT = VT.getScalarType();
6227 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6228 EVT ShSVT = ShVT.getScalarType();
6229 unsigned EltBits = VT.getScalarSizeInBits();
6230 EVT MulVT;
6231
6232 // Check to see if we can do this.
6233 // FIXME: We should be more aggressive here.
6234 if (!isTypeLegal(VT)) {
6235 // Limit this to simple scalars for now.
6236 if (VT.isVector() || !VT.isSimple())
6237 return SDValue();
6238
6239 // If this type will be promoted to a large enough type with a legal
6240 // multiply operation, we can go ahead and do this transform.
6242 return SDValue();
6243
6244 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6245 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6246 !isOperationLegal(ISD::MUL, MulVT))
6247 return SDValue();
6248 }
6249
6250 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6251 if (N->getFlags().hasExact())
6252 return BuildExactSDIV(*this, N, dl, DAG, Created);
6253
6254 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6255
6256 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6257 if (C->isZero())
6258 return false;
6259
6260 const APInt &Divisor = C->getAPIntValue();
6262 int NumeratorFactor = 0;
6263 int ShiftMask = -1;
6264
6265 if (Divisor.isOne() || Divisor.isAllOnes()) {
6266 // If d is +1/-1, we just multiply the numerator by +1/-1.
6267 NumeratorFactor = Divisor.getSExtValue();
6268 magics.Magic = 0;
6269 magics.ShiftAmount = 0;
6270 ShiftMask = 0;
6271 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6272 // If d > 0 and m < 0, add the numerator.
6273 NumeratorFactor = 1;
6274 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6275 // If d < 0 and m > 0, subtract the numerator.
6276 NumeratorFactor = -1;
6277 }
6278
6279 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6280 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6281 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6282 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6283 return true;
6284 };
6285
6286 SDValue N0 = N->getOperand(0);
6287 SDValue N1 = N->getOperand(1);
6288
6289 // Collect the shifts / magic values from each element.
6290 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6291 return SDValue();
6292
6293 SDValue MagicFactor, Factor, Shift, ShiftMask;
6294 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6295 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6296 Factor = DAG.getBuildVector(VT, dl, Factors);
6297 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6298 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6299 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6300 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6301 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6302 "Expected matchUnaryPredicate to return one element for scalable "
6303 "vectors");
6304 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6305 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6306 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6307 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6308 } else {
6309 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6310 MagicFactor = MagicFactors[0];
6311 Factor = Factors[0];
6312 Shift = Shifts[0];
6313 ShiftMask = ShiftMasks[0];
6314 }
6315
6316 // Multiply the numerator (operand 0) by the magic value.
6317 // FIXME: We should support doing a MUL in a wider type.
6318 auto GetMULHS = [&](SDValue X, SDValue Y) {
6319 // If the type isn't legal, use a wider mul of the type calculated
6320 // earlier.
6321 if (!isTypeLegal(VT)) {
6322 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6323 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6324 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6325 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6326 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6327 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6328 }
6329
6330 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6331 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6332 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6333 SDValue LoHi =
6334 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6335 return SDValue(LoHi.getNode(), 1);
6336 }
6337 // If type twice as wide legal, widen and use a mul plus a shift.
6338 unsigned Size = VT.getScalarSizeInBits();
6339 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6340 if (VT.isVector())
6341 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6343 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6344 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6345 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6346 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6347 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6348 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6350 }
6351 return SDValue();
6352 };
6353
6354 SDValue Q = GetMULHS(N0, MagicFactor);
6355 if (!Q)
6356 return SDValue();
6357
6358 Created.push_back(Q.getNode());
6359
6360 // (Optionally) Add/subtract the numerator using Factor.
6361 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6362 Created.push_back(Factor.getNode());
6363 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6364 Created.push_back(Q.getNode());
6365
6366 // Shift right algebraic by shift value.
6367 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6368 Created.push_back(Q.getNode());
6369
6370 // Extract the sign bit, mask it and add it to the quotient.
6371 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6372 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6373 Created.push_back(T.getNode());
6374 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6375 Created.push_back(T.getNode());
6376 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6377}
6378
6379/// Given an ISD::UDIV node expressing a divide by constant,
6380/// return a DAG expression to select that will generate the same value by
6381/// multiplying by a magic number.
6382/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6384 bool IsAfterLegalization,
6385 SmallVectorImpl<SDNode *> &Created) const {
6386 SDLoc dl(N);
6387 EVT VT = N->getValueType(0);
6388 EVT SVT = VT.getScalarType();
6389 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6390 EVT ShSVT = ShVT.getScalarType();
6391 unsigned EltBits = VT.getScalarSizeInBits();
6392 EVT MulVT;
6393
6394 // Check to see if we can do this.
6395 // FIXME: We should be more aggressive here.
6396 if (!isTypeLegal(VT)) {
6397 // Limit this to simple scalars for now.
6398 if (VT.isVector() || !VT.isSimple())
6399 return SDValue();
6400
6401 // If this type will be promoted to a large enough type with a legal
6402 // multiply operation, we can go ahead and do this transform.
6404 return SDValue();
6405
6406 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6407 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6408 !isOperationLegal(ISD::MUL, MulVT))
6409 return SDValue();
6410 }
6411
6412 SDValue N0 = N->getOperand(0);
6413 SDValue N1 = N->getOperand(1);
6414
6415 // Try to use leading zeros of the dividend to reduce the multiplier and
6416 // avoid expensive fixups.
6417 // TODO: Support vectors.
6418 unsigned LeadingZeros = 0;
6419 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6420 assert(!isOneConstant(N1) && "Unexpected divisor");
6421 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6422 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6423 // the dividend exceeds the leading zeros for the divisor.
6424 LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6425 }
6426
6427 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6428 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6429
6430 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6431 if (C->isZero())
6432 return false;
6433 const APInt& Divisor = C->getAPIntValue();
6434
6435 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6436
6437 // Magic algorithm doesn't work for division by 1. We need to emit a select
6438 // at the end.
6439 if (Divisor.isOne()) {
6440 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6441 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6442 } else {
6444 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6445
6446 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6447
6448 assert(magics.PreShift < Divisor.getBitWidth() &&
6449 "We shouldn't generate an undefined shift!");
6450 assert(magics.PostShift < Divisor.getBitWidth() &&
6451 "We shouldn't generate an undefined shift!");
6452 assert((!magics.IsAdd || magics.PreShift == 0) &&
6453 "Unexpected pre-shift");
6454 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6455 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6456 NPQFactor = DAG.getConstant(
6457 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6458 : APInt::getZero(EltBits),
6459 dl, SVT);
6460 UseNPQ |= magics.IsAdd;
6461 UsePreShift |= magics.PreShift != 0;
6462 UsePostShift |= magics.PostShift != 0;
6463 }
6464
6465 PreShifts.push_back(PreShift);
6466 MagicFactors.push_back(MagicFactor);
6467 NPQFactors.push_back(NPQFactor);
6468 PostShifts.push_back(PostShift);
6469 return true;
6470 };
6471
6472 // Collect the shifts/magic values from each element.
6473 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6474 return SDValue();
6475
6476 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6477 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6478 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6479 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6480 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6481 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6482 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6483 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6484 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6485 "Expected matchUnaryPredicate to return one for scalable vectors");
6486 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6487 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6488 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6489 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6490 } else {
6491 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6492 PreShift = PreShifts[0];
6493 MagicFactor = MagicFactors[0];
6494 PostShift = PostShifts[0];
6495 }
6496
6497 SDValue Q = N0;
6498 if (UsePreShift) {
6499 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6500 Created.push_back(Q.getNode());
6501 }
6502
6503 // FIXME: We should support doing a MUL in a wider type.
6504 auto GetMULHU = [&](SDValue X, SDValue Y) {
6505 // If the type isn't legal, use a wider mul of the type calculated
6506 // earlier.
6507 if (!isTypeLegal(VT)) {
6508 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6509 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6510 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6511 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6512 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6513 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6514 }
6515
6516 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6517 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6518 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6519 SDValue LoHi =
6520 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6521 return SDValue(LoHi.getNode(), 1);
6522 }
6523 // If type twice as wide legal, widen and use a mul plus a shift.
6524 unsigned Size = VT.getScalarSizeInBits();
6525 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6526 if (VT.isVector())
6527 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6529 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6530 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6531 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6532 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6533 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6534 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6535 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6536 }
6537 return SDValue(); // No mulhu or equivalent
6538 };
6539
6540 // Multiply the numerator (operand 0) by the magic value.
6541 Q = GetMULHU(Q, MagicFactor);
6542 if (!Q)
6543 return SDValue();
6544
6545 Created.push_back(Q.getNode());
6546
6547 if (UseNPQ) {
6548 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6549 Created.push_back(NPQ.getNode());
6550
6551 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6552 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6553 if (VT.isVector())
6554 NPQ = GetMULHU(NPQ, NPQFactor);
6555 else
6556 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6557
6558 Created.push_back(NPQ.getNode());
6559
6560 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6561 Created.push_back(Q.getNode());
6562 }
6563
6564 if (UsePostShift) {
6565 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6566 Created.push_back(Q.getNode());
6567 }
6568
6569 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6570
6571 SDValue One = DAG.getConstant(1, dl, VT);
6572 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6573 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6574}
6575
6576/// If all values in Values that *don't* match the predicate are same 'splat'
6577/// value, then replace all values with that splat value.
6578/// Else, if AlternativeReplacement was provided, then replace all values that
6579/// do match predicate with AlternativeReplacement value.
6580static void
6582 std::function<bool(SDValue)> Predicate,
6583 SDValue AlternativeReplacement = SDValue()) {
6584 SDValue Replacement;
6585 // Is there a value for which the Predicate does *NOT* match? What is it?
6586 auto SplatValue = llvm::find_if_not(Values, Predicate);
6587 if (SplatValue != Values.end()) {
6588 // Does Values consist only of SplatValue's and values matching Predicate?
6589 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6590 return Value == *SplatValue || Predicate(Value);
6591 })) // Then we shall replace values matching predicate with SplatValue.
6592 Replacement = *SplatValue;
6593 }
6594 if (!Replacement) {
6595 // Oops, we did not find the "baseline" splat value.
6596 if (!AlternativeReplacement)
6597 return; // Nothing to do.
6598 // Let's replace with provided value then.
6599 Replacement = AlternativeReplacement;
6600 }
6601 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6602}
6603
6604/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6605/// where the divisor is constant and the comparison target is zero,
6606/// return a DAG expression that will generate the same comparison result
6607/// using only multiplications, additions and shifts/rotations.
6608/// Ref: "Hacker's Delight" 10-17.
6609SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6610 SDValue CompTargetNode,
6612 DAGCombinerInfo &DCI,
6613 const SDLoc &DL) const {
6615 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6616 DCI, DL, Built)) {
6617 for (SDNode *N : Built)
6618 DCI.AddToWorklist(N);
6619 return Folded;
6620 }
6621
6622 return SDValue();
6623}
6624
6625SDValue
6626TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6627 SDValue CompTargetNode, ISD::CondCode Cond,
6628 DAGCombinerInfo &DCI, const SDLoc &DL,
6629 SmallVectorImpl<SDNode *> &Created) const {
6630 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6631 // - D must be constant, with D = D0 * 2^K where D0 is odd
6632 // - P is the multiplicative inverse of D0 modulo 2^W
6633 // - Q = floor(((2^W) - 1) / D)
6634 // where W is the width of the common type of N and D.
6635 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6636 "Only applicable for (in)equality comparisons.");
6637
6638 SelectionDAG &DAG = DCI.DAG;
6639
6640 EVT VT = REMNode.getValueType();
6641 EVT SVT = VT.getScalarType();
6642 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6643 EVT ShSVT = ShVT.getScalarType();
6644
6645 // If MUL is unavailable, we cannot proceed in any case.
6646 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6647 return SDValue();
6648
6649 bool ComparingWithAllZeros = true;
6650 bool AllComparisonsWithNonZerosAreTautological = true;
6651 bool HadTautologicalLanes = false;
6652 bool AllLanesAreTautological = true;
6653 bool HadEvenDivisor = false;
6654 bool AllDivisorsArePowerOfTwo = true;
6655 bool HadTautologicalInvertedLanes = false;
6656 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6657
6658 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6659 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6660 if (CDiv->isZero())
6661 return false;
6662
6663 const APInt &D = CDiv->getAPIntValue();
6664 const APInt &Cmp = CCmp->getAPIntValue();
6665
6666 ComparingWithAllZeros &= Cmp.isZero();
6667
6668 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6669 // if C2 is not less than C1, the comparison is always false.
6670 // But we will only be able to produce the comparison that will give the
6671 // opposive tautological answer. So this lane would need to be fixed up.
6672 bool TautologicalInvertedLane = D.ule(Cmp);
6673 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6674
6675 // If all lanes are tautological (either all divisors are ones, or divisor
6676 // is not greater than the constant we are comparing with),
6677 // we will prefer to avoid the fold.
6678 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6679 HadTautologicalLanes |= TautologicalLane;
6680 AllLanesAreTautological &= TautologicalLane;
6681
6682 // If we are comparing with non-zero, we need'll need to subtract said
6683 // comparison value from the LHS. But there is no point in doing that if
6684 // every lane where we are comparing with non-zero is tautological..
6685 if (!Cmp.isZero())
6686 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6687
6688 // Decompose D into D0 * 2^K
6689 unsigned K = D.countr_zero();
6690 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6691 APInt D0 = D.lshr(K);
6692
6693 // D is even if it has trailing zeros.
6694 HadEvenDivisor |= (K != 0);
6695 // D is a power-of-two if D0 is one.
6696 // If all divisors are power-of-two, we will prefer to avoid the fold.
6697 AllDivisorsArePowerOfTwo &= D0.isOne();
6698
6699 // P = inv(D0, 2^W)
6700 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6701 unsigned W = D.getBitWidth();
6703 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6704
6705 // Q = floor((2^W - 1) u/ D)
6706 // R = ((2^W - 1) u% D)
6707 APInt Q, R;
6709
6710 // If we are comparing with zero, then that comparison constant is okay,
6711 // else it may need to be one less than that.
6712 if (Cmp.ugt(R))
6713 Q -= 1;
6714
6716 "We are expecting that K is always less than all-ones for ShSVT");
6717
6718 // If the lane is tautological the result can be constant-folded.
6719 if (TautologicalLane) {
6720 // Set P and K amount to a bogus values so we can try to splat them.
6721 P = 0;
6722 K = -1;
6723 // And ensure that comparison constant is tautological,
6724 // it will always compare true/false.
6725 Q = -1;
6726 }
6727
6728 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6729 KAmts.push_back(
6730 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6731 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6732 return true;
6733 };
6734
6735 SDValue N = REMNode.getOperand(0);
6736 SDValue D = REMNode.getOperand(1);
6737
6738 // Collect the values from each element.
6739 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6740 return SDValue();
6741
6742 // If all lanes are tautological, the result can be constant-folded.
6743 if (AllLanesAreTautological)
6744 return SDValue();
6745
6746 // If this is a urem by a powers-of-two, avoid the fold since it can be
6747 // best implemented as a bit test.
6748 if (AllDivisorsArePowerOfTwo)
6749 return SDValue();
6750
6751 SDValue PVal, KVal, QVal;
6752 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6753 if (HadTautologicalLanes) {
6754 // Try to turn PAmts into a splat, since we don't care about the values
6755 // that are currently '0'. If we can't, just keep '0'`s.
6757 // Try to turn KAmts into a splat, since we don't care about the values
6758 // that are currently '-1'. If we can't, change them to '0'`s.
6760 DAG.getConstant(0, DL, ShSVT));
6761 }
6762
6763 PVal = DAG.getBuildVector(VT, DL, PAmts);
6764 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6765 QVal = DAG.getBuildVector(VT, DL, QAmts);
6766 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6767 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6768 "Expected matchBinaryPredicate to return one element for "
6769 "SPLAT_VECTORs");
6770 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6771 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6772 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6773 } else {
6774 PVal = PAmts[0];
6775 KVal = KAmts[0];
6776 QVal = QAmts[0];
6777 }
6778
6779 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6780 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6781 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6782 assert(CompTargetNode.getValueType() == N.getValueType() &&
6783 "Expecting that the types on LHS and RHS of comparisons match.");
6784 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6785 }
6786
6787 // (mul N, P)
6788 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6789 Created.push_back(Op0.getNode());
6790
6791 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6792 // divisors as a performance improvement, since rotating by 0 is a no-op.
6793 if (HadEvenDivisor) {
6794 // We need ROTR to do this.
6795 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6796 return SDValue();
6797 // UREM: (rotr (mul N, P), K)
6798 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6799 Created.push_back(Op0.getNode());
6800 }
6801
6802 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6803 SDValue NewCC =
6804 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6806 if (!HadTautologicalInvertedLanes)
6807 return NewCC;
6808
6809 // If any lanes previously compared always-false, the NewCC will give
6810 // always-true result for them, so we need to fixup those lanes.
6811 // Or the other way around for inequality predicate.
6812 assert(VT.isVector() && "Can/should only get here for vectors.");
6813 Created.push_back(NewCC.getNode());
6814
6815 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6816 // if C2 is not less than C1, the comparison is always false.
6817 // But we have produced the comparison that will give the
6818 // opposive tautological answer. So these lanes would need to be fixed up.
6819 SDValue TautologicalInvertedChannels =
6820 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6821 Created.push_back(TautologicalInvertedChannels.getNode());
6822
6823 // NOTE: we avoid letting illegal types through even if we're before legalize
6824 // ops – legalization has a hard time producing good code for this.
6825 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6826 // If we have a vector select, let's replace the comparison results in the
6827 // affected lanes with the correct tautological result.
6828 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6829 DL, SETCCVT, SETCCVT);
6830 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6831 Replacement, NewCC);
6832 }
6833
6834 // Else, we can just invert the comparison result in the appropriate lanes.
6835 //
6836 // NOTE: see the note above VSELECT above.
6837 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6838 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6839 TautologicalInvertedChannels);
6840
6841 return SDValue(); // Don't know how to lower.
6842}
6843
6844/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6845/// where the divisor is constant and the comparison target is zero,
6846/// return a DAG expression that will generate the same comparison result
6847/// using only multiplications, additions and shifts/rotations.
6848/// Ref: "Hacker's Delight" 10-17.
6849SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6850 SDValue CompTargetNode,
6852 DAGCombinerInfo &DCI,
6853 const SDLoc &DL) const {
6855 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6856 DCI, DL, Built)) {
6857 assert(Built.size() <= 7 && "Max size prediction failed.");
6858 for (SDNode *N : Built)
6859 DCI.AddToWorklist(N);
6860 return Folded;
6861 }
6862
6863 return SDValue();
6864}
6865
6866SDValue
6867TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6868 SDValue CompTargetNode, ISD::CondCode Cond,
6869 DAGCombinerInfo &DCI, const SDLoc &DL,
6870 SmallVectorImpl<SDNode *> &Created) const {
6871 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6872 // Fold:
6873 // (seteq/ne (srem N, D), 0)
6874 // To:
6875 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6876 //
6877 // - D must be constant, with D = D0 * 2^K where D0 is odd
6878 // - P is the multiplicative inverse of D0 modulo 2^W
6879 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6880 // - Q = floor((2 * A) / (2^K))
6881 // where W is the width of the common type of N and D.
6882 //
6883 // When D is a power of two (and thus D0 is 1), the normal
6884 // formula for A and Q don't apply, because the derivation
6885 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6886 // does not apply. This specifically fails when N = INT_MIN.
6887 //
6888 // Instead, for power-of-two D, we use:
6889 // - A = 2^(W-1)
6890 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6891 // - Q = 2^(W-K) - 1
6892 // |-> Test that the top K bits are zero after rotation
6893 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6894 "Only applicable for (in)equality comparisons.");
6895
6896 SelectionDAG &DAG = DCI.DAG;
6897
6898 EVT VT = REMNode.getValueType();
6899 EVT SVT = VT.getScalarType();
6900 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6901 EVT ShSVT = ShVT.getScalarType();
6902
6903 // If we are after ops legalization, and MUL is unavailable, we can not
6904 // proceed.
6905 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6906 return SDValue();
6907
6908 // TODO: Could support comparing with non-zero too.
6909 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6910 if (!CompTarget || !CompTarget->isZero())
6911 return SDValue();
6912
6913 bool HadIntMinDivisor = false;
6914 bool HadOneDivisor = false;
6915 bool AllDivisorsAreOnes = true;
6916 bool HadEvenDivisor = false;
6917 bool NeedToApplyOffset = false;
6918 bool AllDivisorsArePowerOfTwo = true;
6919 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6920
6921 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6922 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6923 if (C->isZero())
6924 return false;
6925
6926 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6927
6928 // WARNING: this fold is only valid for positive divisors!
6929 APInt D = C->getAPIntValue();
6930 if (D.isNegative())
6931 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6932
6933 HadIntMinDivisor |= D.isMinSignedValue();
6934
6935 // If all divisors are ones, we will prefer to avoid the fold.
6936 HadOneDivisor |= D.isOne();
6937 AllDivisorsAreOnes &= D.isOne();
6938
6939 // Decompose D into D0 * 2^K
6940 unsigned K = D.countr_zero();
6941 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6942 APInt D0 = D.lshr(K);
6943
6944 if (!D.isMinSignedValue()) {
6945 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6946 // we don't care about this lane in this fold, we'll special-handle it.
6947 HadEvenDivisor |= (K != 0);
6948 }
6949
6950 // D is a power-of-two if D0 is one. This includes INT_MIN.
6951 // If all divisors are power-of-two, we will prefer to avoid the fold.
6952 AllDivisorsArePowerOfTwo &= D0.isOne();
6953
6954 // P = inv(D0, 2^W)
6955 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6956 unsigned W = D.getBitWidth();
6958 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6959
6960 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6962 A.clearLowBits(K);
6963
6964 if (!D.isMinSignedValue()) {
6965 // If divisor INT_MIN, then we don't care about this lane in this fold,
6966 // we'll special-handle it.
6967 NeedToApplyOffset |= A != 0;
6968 }
6969
6970 // Q = floor((2 * A) / (2^K))
6971 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6972
6974 "We are expecting that A is always less than all-ones for SVT");
6976 "We are expecting that K is always less than all-ones for ShSVT");
6977
6978 // If D was a power of two, apply the alternate constant derivation.
6979 if (D0.isOne()) {
6980 // A = 2^(W-1)
6982 // - Q = 2^(W-K) - 1
6983 Q = APInt::getAllOnes(W - K).zext(W);
6984 }
6985
6986 // If the divisor is 1 the result can be constant-folded. Likewise, we
6987 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6988 if (D.isOne()) {
6989 // Set P, A and K to a bogus values so we can try to splat them.
6990 P = 0;
6991 A = -1;
6992 K = -1;
6993
6994 // x ?% 1 == 0 <--> true <--> x u<= -1
6995 Q = -1;
6996 }
6997
6998 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6999 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7000 KAmts.push_back(
7001 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
7002 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7003 return true;
7004 };
7005
7006 SDValue N = REMNode.getOperand(0);
7007 SDValue D = REMNode.getOperand(1);
7008
7009 // Collect the values from each element.
7010 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7011 return SDValue();
7012
7013 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7014 if (AllDivisorsAreOnes)
7015 return SDValue();
7016
7017 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7018 // since it can be best implemented as a bit test.
7019 if (AllDivisorsArePowerOfTwo)
7020 return SDValue();
7021
7022 SDValue PVal, AVal, KVal, QVal;
7023 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7024 if (HadOneDivisor) {
7025 // Try to turn PAmts into a splat, since we don't care about the values
7026 // that are currently '0'. If we can't, just keep '0'`s.
7028 // Try to turn AAmts into a splat, since we don't care about the
7029 // values that are currently '-1'. If we can't, change them to '0'`s.
7031 DAG.getConstant(0, DL, SVT));
7032 // Try to turn KAmts into a splat, since we don't care about the values
7033 // that are currently '-1'. If we can't, change them to '0'`s.
7035 DAG.getConstant(0, DL, ShSVT));
7036 }
7037
7038 PVal = DAG.getBuildVector(VT, DL, PAmts);
7039 AVal = DAG.getBuildVector(VT, DL, AAmts);
7040 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7041 QVal = DAG.getBuildVector(VT, DL, QAmts);
7042 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7043 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7044 QAmts.size() == 1 &&
7045 "Expected matchUnaryPredicate to return one element for scalable "
7046 "vectors");
7047 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7048 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7049 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7050 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7051 } else {
7052 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7053 PVal = PAmts[0];
7054 AVal = AAmts[0];
7055 KVal = KAmts[0];
7056 QVal = QAmts[0];
7057 }
7058
7059 // (mul N, P)
7060 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7061 Created.push_back(Op0.getNode());
7062
7063 if (NeedToApplyOffset) {
7064 // We need ADD to do this.
7065 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7066 return SDValue();
7067
7068 // (add (mul N, P), A)
7069 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7070 Created.push_back(Op0.getNode());
7071 }
7072
7073 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7074 // divisors as a performance improvement, since rotating by 0 is a no-op.
7075 if (HadEvenDivisor) {
7076 // We need ROTR to do this.
7077 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7078 return SDValue();
7079 // SREM: (rotr (add (mul N, P), A), K)
7080 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7081 Created.push_back(Op0.getNode());
7082 }
7083
7084 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7085 SDValue Fold =
7086 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7088
7089 // If we didn't have lanes with INT_MIN divisor, then we're done.
7090 if (!HadIntMinDivisor)
7091 return Fold;
7092
7093 // That fold is only valid for positive divisors. Which effectively means,
7094 // it is invalid for INT_MIN divisors. So if we have such a lane,
7095 // we must fix-up results for said lanes.
7096 assert(VT.isVector() && "Can/should only get here for vectors.");
7097
7098 // NOTE: we avoid letting illegal types through even if we're before legalize
7099 // ops – legalization has a hard time producing good code for the code that
7100 // follows.
7101 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7105 return SDValue();
7106
7107 Created.push_back(Fold.getNode());
7108
7109 SDValue IntMin = DAG.getConstant(
7111 SDValue IntMax = DAG.getConstant(
7113 SDValue Zero =
7115
7116 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7117 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7118 Created.push_back(DivisorIsIntMin.getNode());
7119
7120 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7121 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7122 Created.push_back(Masked.getNode());
7123 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7124 Created.push_back(MaskedIsZero.getNode());
7125
7126 // To produce final result we need to blend 2 vectors: 'SetCC' and
7127 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7128 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7129 // constant-folded, select can get lowered to a shuffle with constant mask.
7130 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7131 MaskedIsZero, Fold);
7132
7133 return Blended;
7134}
7135
7138 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7139 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7140 "be a constant integer");
7141 return true;
7142 }
7143
7144 return false;
7145}
7146
7148 const DenormalMode &Mode) const {
7149 SDLoc DL(Op);
7150 EVT VT = Op.getValueType();
7151 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7152 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7153
7154 // This is specifically a check for the handling of denormal inputs, not the
7155 // result.
7156 if (Mode.Input == DenormalMode::PreserveSign ||
7157 Mode.Input == DenormalMode::PositiveZero) {
7158 // Test = X == 0.0
7159 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7160 }
7161
7162 // Testing it with denormal inputs to avoid wrong estimate.
7163 //
7164 // Test = fabs(X) < SmallestNormal
7165 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7166 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7167 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7168 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7169 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7170}
7171
7173 bool LegalOps, bool OptForSize,
7175 unsigned Depth) const {
7176 // fneg is removable even if it has multiple uses.
7177 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7179 return Op.getOperand(0);
7180 }
7181
7182 // Don't recurse exponentially.
7184 return SDValue();
7185
7186 // Pre-increment recursion depth for use in recursive calls.
7187 ++Depth;
7188 const SDNodeFlags Flags = Op->getFlags();
7189 const TargetOptions &Options = DAG.getTarget().Options;
7190 EVT VT = Op.getValueType();
7191 unsigned Opcode = Op.getOpcode();
7192
7193 // Don't allow anything with multiple uses unless we know it is free.
7194 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7195 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7196 isFPExtFree(VT, Op.getOperand(0).getValueType());
7197 if (!IsFreeExtend)
7198 return SDValue();
7199 }
7200
7201 auto RemoveDeadNode = [&](SDValue N) {
7202 if (N && N.getNode()->use_empty())
7203 DAG.RemoveDeadNode(N.getNode());
7204 };
7205
7206 SDLoc DL(Op);
7207
7208 // Because getNegatedExpression can delete nodes we need a handle to keep
7209 // temporary nodes alive in case the recursion manages to create an identical
7210 // node.
7211 std::list<HandleSDNode> Handles;
7212
7213 switch (Opcode) {
7214 case ISD::ConstantFP: {
7215 // Don't invert constant FP values after legalization unless the target says
7216 // the negated constant is legal.
7217 bool IsOpLegal =
7219 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7220 OptForSize);
7221
7222 if (LegalOps && !IsOpLegal)
7223 break;
7224
7225 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7226 V.changeSign();
7227 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7228
7229 // If we already have the use of the negated floating constant, it is free
7230 // to negate it even it has multiple uses.
7231 if (!Op.hasOneUse() && CFP.use_empty())
7232 break;
7234 return CFP;
7235 }
7236 case ISD::BUILD_VECTOR: {
7237 // Only permit BUILD_VECTOR of constants.
7238 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7239 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7240 }))
7241 break;
7242
7243 bool IsOpLegal =
7246 llvm::all_of(Op->op_values(), [&](SDValue N) {
7247 return N.isUndef() ||
7248 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7249 OptForSize);
7250 });
7251
7252 if (LegalOps && !IsOpLegal)
7253 break;
7254
7256 for (SDValue C : Op->op_values()) {
7257 if (C.isUndef()) {
7258 Ops.push_back(C);
7259 continue;
7260 }
7261 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7262 V.changeSign();
7263 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7264 }
7266 return DAG.getBuildVector(VT, DL, Ops);
7267 }
7268 case ISD::FADD: {
7269 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7270 break;
7271
7272 // After operation legalization, it might not be legal to create new FSUBs.
7273 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7274 break;
7275 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7276
7277 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7279 SDValue NegX =
7280 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7281 // Prevent this node from being deleted by the next call.
7282 if (NegX)
7283 Handles.emplace_back(NegX);
7284
7285 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7287 SDValue NegY =
7288 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7289
7290 // We're done with the handles.
7291 Handles.clear();
7292
7293 // Negate the X if its cost is less or equal than Y.
7294 if (NegX && (CostX <= CostY)) {
7295 Cost = CostX;
7296 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7297 if (NegY != N)
7298 RemoveDeadNode(NegY);
7299 return N;
7300 }
7301
7302 // Negate the Y if it is not expensive.
7303 if (NegY) {
7304 Cost = CostY;
7305 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7306 if (NegX != N)
7307 RemoveDeadNode(NegX);
7308 return N;
7309 }
7310 break;
7311 }
7312 case ISD::FSUB: {
7313 // We can't turn -(A-B) into B-A when we honor signed zeros.
7314 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7315 break;
7316
7317 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7318 // fold (fneg (fsub 0, Y)) -> Y
7319 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7320 if (C->isZero()) {
7322 return Y;
7323 }
7324
7325 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7327 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7328 }
7329 case ISD::FMUL:
7330 case ISD::FDIV: {
7331 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7332
7333 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7335 SDValue NegX =
7336 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7337 // Prevent this node from being deleted by the next call.
7338 if (NegX)
7339 Handles.emplace_back(NegX);
7340
7341 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7343 SDValue NegY =
7344 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7345
7346 // We're done with the handles.
7347 Handles.clear();
7348
7349 // Negate the X if its cost is less or equal than Y.
7350 if (NegX && (CostX <= CostY)) {
7351 Cost = CostX;
7352 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7353 if (NegY != N)
7354 RemoveDeadNode(NegY);
7355 return N;
7356 }
7357
7358 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7359 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7360 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7361 break;
7362
7363 // Negate the Y if it is not expensive.
7364 if (NegY) {
7365 Cost = CostY;
7366 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7367 if (NegX != N)
7368 RemoveDeadNode(NegX);
7369 return N;
7370 }
7371 break;
7372 }
7373 case ISD::FMA:
7374 case ISD::FMAD: {
7375 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7376 break;
7377
7378 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7380 SDValue NegZ =
7381 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7382 // Give up if fail to negate the Z.
7383 if (!NegZ)
7384 break;
7385
7386 // Prevent this node from being deleted by the next two calls.
7387 Handles.emplace_back(NegZ);
7388
7389 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7391 SDValue NegX =
7392 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7393 // Prevent this node from being deleted by the next call.
7394 if (NegX)
7395 Handles.emplace_back(NegX);
7396
7397 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7399 SDValue NegY =
7400 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7401
7402 // We're done with the handles.
7403 Handles.clear();
7404
7405 // Negate the X if its cost is less or equal than Y.
7406 if (NegX && (CostX <= CostY)) {
7407 Cost = std::min(CostX, CostZ);
7408 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7409 if (NegY != N)
7410 RemoveDeadNode(NegY);
7411 return N;
7412 }
7413
7414 // Negate the Y if it is not expensive.
7415 if (NegY) {
7416 Cost = std::min(CostY, CostZ);
7417 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7418 if (NegX != N)
7419 RemoveDeadNode(NegX);
7420 return N;
7421 }
7422 break;
7423 }
7424
7425 case ISD::FP_EXTEND:
7426 case ISD::FSIN:
7427 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7428 OptForSize, Cost, Depth))
7429 return DAG.getNode(Opcode, DL, VT, NegV);
7430 break;
7431 case ISD::FP_ROUND:
7432 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7433 OptForSize, Cost, Depth))
7434 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7435 break;
7436 case ISD::SELECT:
7437 case ISD::VSELECT: {
7438 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7439 // iff at least one cost is cheaper and the other is neutral/cheaper
7440 SDValue LHS = Op.getOperand(1);
7442 SDValue NegLHS =
7443 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7444 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7445 RemoveDeadNode(NegLHS);
7446 break;
7447 }
7448
7449 // Prevent this node from being deleted by the next call.
7450 Handles.emplace_back(NegLHS);
7451
7452 SDValue RHS = Op.getOperand(2);
7454 SDValue NegRHS =
7455 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7456
7457 // We're done with the handles.
7458 Handles.clear();
7459
7460 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7461 (CostLHS != NegatibleCost::Cheaper &&
7462 CostRHS != NegatibleCost::Cheaper)) {
7463 RemoveDeadNode(NegLHS);
7464 RemoveDeadNode(NegRHS);
7465 break;
7466 }
7467
7468 Cost = std::min(CostLHS, CostRHS);
7469 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7470 }
7471 }
7472
7473 return SDValue();
7474}
7475
7476//===----------------------------------------------------------------------===//
7477// Legalization Utilities
7478//===----------------------------------------------------------------------===//
7479
7480bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7481 SDValue LHS, SDValue RHS,
7483 EVT HiLoVT, SelectionDAG &DAG,
7484 MulExpansionKind Kind, SDValue LL,
7485 SDValue LH, SDValue RL, SDValue RH) const {
7486 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7487 Opcode == ISD::SMUL_LOHI);
7488
7489 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7491 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7493 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7495 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7497
7498 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7499 return false;
7500
7501 unsigned OuterBitSize = VT.getScalarSizeInBits();
7502 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7503
7504 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7505 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7506 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7507
7508 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7509 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7510 bool Signed) -> bool {
7511 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7512 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7513 Hi = SDValue(Lo.getNode(), 1);
7514 return true;
7515 }
7516 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7517 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7518 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7519 return true;
7520 }
7521 return false;
7522 };
7523
7524 SDValue Lo, Hi;
7525
7526 if (!LL.getNode() && !RL.getNode() &&
7528 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7529 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7530 }
7531
7532 if (!LL.getNode())
7533 return false;
7534
7535 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7536 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7537 DAG.MaskedValueIsZero(RHS, HighMask)) {
7538 // The inputs are both zero-extended.
7539 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7540 Result.push_back(Lo);
7541 Result.push_back(Hi);
7542 if (Opcode != ISD::MUL) {
7543 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7544 Result.push_back(Zero);
7545 Result.push_back(Zero);
7546 }
7547 return true;
7548 }
7549 }
7550
7551 if (!VT.isVector() && Opcode == ISD::MUL &&
7552 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7553 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7554 // The input values are both sign-extended.
7555 // TODO non-MUL case?
7556 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7557 Result.push_back(Lo);
7558 Result.push_back(Hi);
7559 return true;
7560 }
7561 }
7562
7563 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7564 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7565
7566 if (!LH.getNode() && !RH.getNode() &&
7569 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7570 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7571 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7572 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7573 }
7574
7575 if (!LH.getNode())
7576 return false;
7577
7578 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7579 return false;
7580
7581 Result.push_back(Lo);
7582
7583 if (Opcode == ISD::MUL) {
7584 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7585 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7586 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7587 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7588 Result.push_back(Hi);
7589 return true;
7590 }
7591
7592 // Compute the full width result.
7593 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7594 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7595 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7596 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7597 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7598 };
7599
7600 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7601 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7602 return false;
7603
7604 // This is effectively the add part of a multiply-add of half-sized operands,
7605 // so it cannot overflow.
7606 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7607
7608 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7609 return false;
7610
7611 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7612 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7613
7614 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7616 if (UseGlue)
7617 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7618 Merge(Lo, Hi));
7619 else
7620 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7621 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7622
7623 SDValue Carry = Next.getValue(1);
7624 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7625 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7626
7627 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7628 return false;
7629
7630 if (UseGlue)
7631 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7632 Carry);
7633 else
7634 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7635 Zero, Carry);
7636
7637 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7638
7639 if (Opcode == ISD::SMUL_LOHI) {
7640 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7641 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7642 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7643
7644 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7645 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7646 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7647 }
7648
7649 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7650 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7651 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7652 return true;
7653}
7654
7656 SelectionDAG &DAG, MulExpansionKind Kind,
7657 SDValue LL, SDValue LH, SDValue RL,
7658 SDValue RH) const {
7660 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7661 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7662 DAG, Kind, LL, LH, RL, RH);
7663 if (Ok) {
7664 assert(Result.size() == 2);
7665 Lo = Result[0];
7666 Hi = Result[1];
7667 }
7668 return Ok;
7669}
7670
7671// Optimize unsigned division or remainder by constants for types twice as large
7672// as a legal VT.
7673//
7674// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7675// can be computed
7676// as:
7677// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7678// Remainder = Sum % Constant
7679// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7680//
7681// For division, we can compute the remainder using the algorithm described
7682// above, subtract it from the dividend to get an exact multiple of Constant.
7683// Then multiply that exact multiply by the multiplicative inverse modulo
7684// (1 << (BitWidth / 2)) to get the quotient.
7685
7686// If Constant is even, we can shift right the dividend and the divisor by the
7687// number of trailing zeros in Constant before applying the remainder algorithm.
7688// If we're after the quotient, we can subtract this value from the shifted
7689// dividend and multiply by the multiplicative inverse of the shifted divisor.
7690// If we want the remainder, we shift the value left by the number of trailing
7691// zeros and add the bits that were shifted out of the dividend.
7694 EVT HiLoVT, SelectionDAG &DAG,
7695 SDValue LL, SDValue LH) const {
7696 unsigned Opcode = N->getOpcode();
7697 EVT VT = N->getValueType(0);
7698
7699 // TODO: Support signed division/remainder.
7700 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7701 return false;
7702 assert(
7703 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7704 "Unexpected opcode");
7705
7706 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7707 if (!CN)
7708 return false;
7709
7710 APInt Divisor = CN->getAPIntValue();
7711 unsigned BitWidth = Divisor.getBitWidth();
7712 unsigned HBitWidth = BitWidth / 2;
7714 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7715
7716 // Divisor needs to less than (1 << HBitWidth).
7717 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7718 if (Divisor.uge(HalfMaxPlus1))
7719 return false;
7720
7721 // We depend on the UREM by constant optimization in DAGCombiner that requires
7722 // high multiply.
7723 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7725 return false;
7726
7727 // Don't expand if optimizing for size.
7728 if (DAG.shouldOptForSize())
7729 return false;
7730
7731 // Early out for 0 or 1 divisors.
7732 if (Divisor.ule(1))
7733 return false;
7734
7735 // If the divisor is even, shift it until it becomes odd.
7736 unsigned TrailingZeros = 0;
7737 if (!Divisor[0]) {
7738 TrailingZeros = Divisor.countr_zero();
7739 Divisor.lshrInPlace(TrailingZeros);
7740 }
7741
7742 SDLoc dl(N);
7743 SDValue Sum;
7744 SDValue PartialRem;
7745
7746 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7747 // then add in the carry.
7748 // TODO: If we can't split it in half, we might be able to split into 3 or
7749 // more pieces using a smaller bit width.
7750 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7751 assert(!LL == !LH && "Expected both input halves or no input halves!");
7752 if (!LL)
7753 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7754
7755 // Shift the input by the number of TrailingZeros in the divisor. The
7756 // shifted out bits will be added to the remainder later.
7757 if (TrailingZeros) {
7758 // Save the shifted off bits if we need the remainder.
7759 if (Opcode != ISD::UDIV) {
7760 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7761 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7762 DAG.getConstant(Mask, dl, HiLoVT));
7763 }
7764
7765 LL = DAG.getNode(
7766 ISD::OR, dl, HiLoVT,
7767 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7768 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7769 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7770 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7771 HiLoVT, dl)));
7772 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7773 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7774 }
7775
7776 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7777 EVT SetCCType =
7778 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7780 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7781 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7782 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7783 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7784 } else {
7785 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7786 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7787 // If the boolean for the target is 0 or 1, we can add the setcc result
7788 // directly.
7789 if (getBooleanContents(HiLoVT) ==
7791 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7792 else
7793 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7794 DAG.getConstant(0, dl, HiLoVT));
7795 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7796 }
7797 }
7798
7799 // If we didn't find a sum, we can't do the expansion.
7800 if (!Sum)
7801 return false;
7802
7803 // Perform a HiLoVT urem on the Sum using truncated divisor.
7804 SDValue RemL =
7805 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7806 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7807 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7808
7809 if (Opcode != ISD::UREM) {
7810 // Subtract the remainder from the shifted dividend.
7811 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7812 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7813
7814 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7815
7816 // Multiply by the multiplicative inverse of the divisor modulo
7817 // (1 << BitWidth).
7818 APInt MulFactor = Divisor.multiplicativeInverse();
7819
7820 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7821 DAG.getConstant(MulFactor, dl, VT));
7822
7823 // Split the quotient into low and high parts.
7824 SDValue QuotL, QuotH;
7825 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7826 Result.push_back(QuotL);
7827 Result.push_back(QuotH);
7828 }
7829
7830 if (Opcode != ISD::UDIV) {
7831 // If we shifted the input, shift the remainder left and add the bits we
7832 // shifted off the input.
7833 if (TrailingZeros) {
7834 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7835 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7836 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7837 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7838 }
7839 Result.push_back(RemL);
7840 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7841 }
7842
7843 return true;
7844}
7845
7846// Check that (every element of) Z is undef or not an exact multiple of BW.
7847static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7849 Z,
7850 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7851 true);
7852}
7853
7855 EVT VT = Node->getValueType(0);
7856 SDValue ShX, ShY;
7857 SDValue ShAmt, InvShAmt;
7858 SDValue X = Node->getOperand(0);
7859 SDValue Y = Node->getOperand(1);
7860 SDValue Z = Node->getOperand(2);
7861 SDValue Mask = Node->getOperand(3);
7862 SDValue VL = Node->getOperand(4);
7863
7864 unsigned BW = VT.getScalarSizeInBits();
7865 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7866 SDLoc DL(SDValue(Node, 0));
7867
7868 EVT ShVT = Z.getValueType();
7869 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7870 // fshl: X << C | Y >> (BW - C)
7871 // fshr: X << (BW - C) | Y >> C
7872 // where C = Z % BW is not zero
7873 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7874 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7875 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7876 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7877 VL);
7878 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7879 VL);
7880 } else {
7881 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7882 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7883 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7884 if (isPowerOf2_32(BW)) {
7885 // Z % BW -> Z & (BW - 1)
7886 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7887 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7888 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7889 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7890 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7891 } else {
7892 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7893 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7894 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7895 }
7896
7897 SDValue One = DAG.getConstant(1, DL, ShVT);
7898 if (IsFSHL) {
7899 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7900 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7901 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7902 } else {
7903 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7904 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7905 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7906 }
7907 }
7908 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7909}
7910
7912 SelectionDAG &DAG) const {
7913 if (Node->isVPOpcode())
7914 return expandVPFunnelShift(Node, DAG);
7915
7916 EVT VT = Node->getValueType(0);
7917
7918 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7922 return SDValue();
7923
7924 SDValue X = Node->getOperand(0);
7925 SDValue Y = Node->getOperand(1);
7926 SDValue Z = Node->getOperand(2);
7927
7928 unsigned BW = VT.getScalarSizeInBits();
7929 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7930 SDLoc DL(SDValue(Node, 0));
7931
7932 EVT ShVT = Z.getValueType();
7933
7934 // If a funnel shift in the other direction is more supported, use it.
7935 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7936 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7937 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7938 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7939 // fshl X, Y, Z -> fshr X, Y, -Z
7940 // fshr X, Y, Z -> fshl X, Y, -Z
7941 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7942 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7943 } else {
7944 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7945 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7946 SDValue One = DAG.getConstant(1, DL, ShVT);
7947 if (IsFSHL) {
7948 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7949 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7950 } else {
7951 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7952 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7953 }
7954 Z = DAG.getNOT(DL, Z, ShVT);
7955 }
7956 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7957 }
7958
7959 SDValue ShX, ShY;
7960 SDValue ShAmt, InvShAmt;
7961 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7962 // fshl: X << C | Y >> (BW - C)
7963 // fshr: X << (BW - C) | Y >> C
7964 // where C = Z % BW is not zero
7965 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7966 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7967 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7968 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7969 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7970 } else {
7971 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7972 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7973 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7974 if (isPowerOf2_32(BW)) {
7975 // Z % BW -> Z & (BW - 1)
7976 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7977 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7978 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7979 } else {
7980 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7981 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7982 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7983 }
7984
7985 SDValue One = DAG.getConstant(1, DL, ShVT);
7986 if (IsFSHL) {
7987 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7988 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7989 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7990 } else {
7991 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7992 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7993 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7994 }
7995 }
7996 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7997}
7998
7999// TODO: Merge with expandFunnelShift.
8000SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8001 SelectionDAG &DAG) const {
8002 EVT VT = Node->getValueType(0);
8003 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8004 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8005 SDValue Op0 = Node->getOperand(0);
8006 SDValue Op1 = Node->getOperand(1);
8007 SDLoc DL(SDValue(Node, 0));
8008
8009 EVT ShVT = Op1.getValueType();
8010 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8011
8012 // If a rotate in the other direction is more supported, use it.
8013 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8014 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8015 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8016 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8017 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8018 }
8019
8020 if (!AllowVectorOps && VT.isVector() &&
8026 return SDValue();
8027
8028 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8029 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8030 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8031 SDValue ShVal;
8032 SDValue HsVal;
8033 if (isPowerOf2_32(EltSizeInBits)) {
8034 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8035 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8036 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8037 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8038 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8039 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8040 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8041 } else {
8042 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8043 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8044 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8045 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8046 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8047 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8048 SDValue One = DAG.getConstant(1, DL, ShVT);
8049 HsVal =
8050 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8051 }
8052 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8053}
8054
8056 SelectionDAG &DAG) const {
8057 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8058 EVT VT = Node->getValueType(0);
8059 unsigned VTBits = VT.getScalarSizeInBits();
8060 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8061
8062 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8063 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8064 SDValue ShOpLo = Node->getOperand(0);
8065 SDValue ShOpHi = Node->getOperand(1);
8066 SDValue ShAmt = Node->getOperand(2);
8067 EVT ShAmtVT = ShAmt.getValueType();
8068 EVT ShAmtCCVT =
8069 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8070 SDLoc dl(Node);
8071
8072 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8073 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8074 // away during isel.
8075 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8076 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8077 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8078 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8079 : DAG.getConstant(0, dl, VT);
8080
8081 SDValue Tmp2, Tmp3;
8082 if (IsSHL) {
8083 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8084 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8085 } else {
8086 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8087 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8088 }
8089
8090 // If the shift amount is larger or equal than the width of a part we don't
8091 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8092 // values for large shift amounts.
8093 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8094 DAG.getConstant(VTBits, dl, ShAmtVT));
8095 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8096 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8097
8098 if (IsSHL) {
8099 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8100 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8101 } else {
8102 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8103 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8104 }
8105}
8106
8108 SelectionDAG &DAG) const {
8109 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8110 SDValue Src = Node->getOperand(OpNo);
8111 EVT SrcVT = Src.getValueType();
8112 EVT DstVT = Node->getValueType(0);
8113 SDLoc dl(SDValue(Node, 0));
8114
8115 // FIXME: Only f32 to i64 conversions are supported.
8116 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8117 return false;
8118
8119 if (Node->isStrictFPOpcode())
8120 // When a NaN is converted to an integer a trap is allowed. We can't
8121 // use this expansion here because it would eliminate that trap. Other
8122 // traps are also allowed and cannot be eliminated. See
8123 // IEEE 754-2008 sec 5.8.
8124 return false;
8125
8126 // Expand f32 -> i64 conversion
8127 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8128 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8129 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8130 EVT IntVT = SrcVT.changeTypeToInteger();
8131 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8132
8133 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8134 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8135 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8136 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8137 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8138 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8139
8140 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8141
8142 SDValue ExponentBits = DAG.getNode(
8143 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8144 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8145 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8146
8147 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8148 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8149 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8150 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8151
8152 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8153 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8154 DAG.getConstant(0x00800000, dl, IntVT));
8155
8156 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8157
8158 R = DAG.getSelectCC(
8159 dl, Exponent, ExponentLoBit,
8160 DAG.getNode(ISD::SHL, dl, DstVT, R,
8161 DAG.getZExtOrTrunc(
8162 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8163 dl, IntShVT)),
8164 DAG.getNode(ISD::SRL, dl, DstVT, R,
8165 DAG.getZExtOrTrunc(
8166 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8167 dl, IntShVT)),
8168 ISD::SETGT);
8169
8170 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8171 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8172
8173 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8174 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8175 return true;
8176}
8177
8179 SDValue &Chain,
8180 SelectionDAG &DAG) const {
8181 SDLoc dl(SDValue(Node, 0));
8182 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8183 SDValue Src = Node->getOperand(OpNo);
8184
8185 EVT SrcVT = Src.getValueType();
8186 EVT DstVT = Node->getValueType(0);
8187 EVT SetCCVT =
8188 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8189 EVT DstSetCCVT =
8190 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8191
8192 // Only expand vector types if we have the appropriate vector bit operations.
8193 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8195 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8197 return false;
8198
8199 // If the maximum float value is smaller then the signed integer range,
8200 // the destination signmask can't be represented by the float, so we can
8201 // just use FP_TO_SINT directly.
8202 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8203 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8204 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8206 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8207 if (Node->isStrictFPOpcode()) {
8208 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8209 { Node->getOperand(0), Src });
8210 Chain = Result.getValue(1);
8211 } else
8212 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8213 return true;
8214 }
8215
8216 // Don't expand it if there isn't cheap fsub instruction.
8218 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8219 return false;
8220
8221 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8222 SDValue Sel;
8223
8224 if (Node->isStrictFPOpcode()) {
8225 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8226 Node->getOperand(0), /*IsSignaling*/ true);
8227 Chain = Sel.getValue(1);
8228 } else {
8229 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8230 }
8231
8232 bool Strict = Node->isStrictFPOpcode() ||
8233 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8234
8235 if (Strict) {
8236 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8237 // signmask then offset (the result of which should be fully representable).
8238 // Sel = Src < 0x8000000000000000
8239 // FltOfs = select Sel, 0, 0x8000000000000000
8240 // IntOfs = select Sel, 0, 0x8000000000000000
8241 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8242
8243 // TODO: Should any fast-math-flags be set for the FSUB?
8244 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8245 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8246 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8247 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8248 DAG.getConstant(0, dl, DstVT),
8249 DAG.getConstant(SignMask, dl, DstVT));
8250 SDValue SInt;
8251 if (Node->isStrictFPOpcode()) {
8252 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8253 { Chain, Src, FltOfs });
8254 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8255 { Val.getValue(1), Val });
8256 Chain = SInt.getValue(1);
8257 } else {
8258 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8259 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8260 }
8261 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8262 } else {
8263 // Expand based on maximum range of FP_TO_SINT:
8264 // True = fp_to_sint(Src)
8265 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8266 // Result = select (Src < 0x8000000000000000), True, False
8267
8268 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8269 // TODO: Should any fast-math-flags be set for the FSUB?
8270 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8271 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8272 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8273 DAG.getConstant(SignMask, dl, DstVT));
8274 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8275 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8276 }
8277 return true;
8278}
8279
8281 SDValue &Chain,
8282 SelectionDAG &DAG) const {
8283 // This transform is not correct for converting 0 when rounding mode is set
8284 // to round toward negative infinity which will produce -0.0. So disable under
8285 // strictfp.
8286 if (Node->isStrictFPOpcode())
8287 return false;
8288
8289 SDValue Src = Node->getOperand(0);
8290 EVT SrcVT = Src.getValueType();
8291 EVT DstVT = Node->getValueType(0);
8292
8293 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8294 return false;
8295
8296 // Only expand vector types if we have the appropriate vector bit operations.
8297 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8302 return false;
8303
8304 SDLoc dl(SDValue(Node, 0));
8305 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8306
8307 // Implementation of unsigned i64 to f64 following the algorithm in
8308 // __floatundidf in compiler_rt. This implementation performs rounding
8309 // correctly in all rounding modes with the exception of converting 0
8310 // when rounding toward negative infinity. In that case the fsub will produce
8311 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8312 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8313 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8314 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8315 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8316 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8317 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8318
8319 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8320 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8321 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8322 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8323 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8324 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8325 SDValue HiSub =
8326 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8327 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8328 return true;
8329}
8330
8331SDValue
8333 SelectionDAG &DAG) const {
8334 unsigned Opcode = Node->getOpcode();
8335 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8336 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8337 "Wrong opcode");
8338
8339 if (Node->getFlags().hasNoNaNs()) {
8340 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8341 SDValue Op1 = Node->getOperand(0);
8342 SDValue Op2 = Node->getOperand(1);
8343 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8344 // Copy FMF flags, but always set the no-signed-zeros flag
8345 // as this is implied by the FMINNUM/FMAXNUM semantics.
8346 SDNodeFlags Flags = Node->getFlags();
8347 Flags.setNoSignedZeros(true);
8348 SelCC->setFlags(Flags);
8349 return SelCC;
8350 }
8351
8352 return SDValue();
8353}
8354
8356 SelectionDAG &DAG) const {
8357 SDLoc dl(Node);
8358 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8360 EVT VT = Node->getValueType(0);
8361
8362 if (VT.isScalableVector())
8364 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8365
8366 if (isOperationLegalOrCustom(NewOp, VT)) {
8367 SDValue Quiet0 = Node->getOperand(0);
8368 SDValue Quiet1 = Node->getOperand(1);
8369
8370 if (!Node->getFlags().hasNoNaNs()) {
8371 // Insert canonicalizes if it's possible we need to quiet to get correct
8372 // sNaN behavior.
8373 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8374 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8375 Node->getFlags());
8376 }
8377 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8378 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8379 Node->getFlags());
8380 }
8381 }
8382
8383 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8384 }
8385
8386 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8387 // instead if there are no NaNs and there can't be an incompatible zero
8388 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8389 if ((Node->getFlags().hasNoNaNs() ||
8390 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8391 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8392 (Node->getFlags().hasNoSignedZeros() ||
8393 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8394 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8395 unsigned IEEE2018Op =
8396 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8397 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8398 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8399 Node->getOperand(1), Node->getFlags());
8400 }
8401
8402 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8403 return SelCC;
8404
8405 return SDValue();
8406}
8407
8409 SelectionDAG &DAG) const {
8410 SDLoc DL(N);
8411 SDValue LHS = N->getOperand(0);
8412 SDValue RHS = N->getOperand(1);
8413 unsigned Opc = N->getOpcode();
8414 EVT VT = N->getValueType(0);
8415 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8416 bool IsMax = Opc == ISD::FMAXIMUM;
8417 SDNodeFlags Flags = N->getFlags();
8418
8419 // First, implement comparison not propagating NaN. If no native fmin or fmax
8420 // available, use plain select with setcc instead.
8422 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8423 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8424
8425 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8426 // signed zero behavior.
8427 bool MinMaxMustRespectOrderedZero = false;
8428
8429 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8430 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8431 MinMaxMustRespectOrderedZero = true;
8432 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8433 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8434 } else {
8436 return DAG.UnrollVectorOp(N);
8437
8438 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8439 SDValue Compare =
8440 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8441 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8442 }
8443
8444 // Propagate any NaN of both operands
8445 if (!N->getFlags().hasNoNaNs() &&
8446 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8447 ConstantFP *FPNaN = ConstantFP::get(
8449 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8450 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8451 }
8452
8453 // fminimum/fmaximum requires -0.0 less than +0.0
8454 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8456 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8457 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8458 SDValue TestZero =
8459 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8460 SDValue LCmp = DAG.getSelect(
8461 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8462 MinMax, Flags);
8463 SDValue RCmp = DAG.getSelect(
8464 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8465 LCmp, Flags);
8466 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8467 }
8468
8469 return MinMax;
8470}
8471
8472/// Returns a true value if if this FPClassTest can be performed with an ordered
8473/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8474/// std::nullopt if it cannot be performed as a compare with 0.
8475static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8476 const fltSemantics &Semantics,
8477 const MachineFunction &MF) {
8478 FPClassTest OrderedMask = Test & ~fcNan;
8479 FPClassTest NanTest = Test & fcNan;
8480 bool IsOrdered = NanTest == fcNone;
8481 bool IsUnordered = NanTest == fcNan;
8482
8483 // Skip cases that are testing for only a qnan or snan.
8484 if (!IsOrdered && !IsUnordered)
8485 return std::nullopt;
8486
8487 if (OrderedMask == fcZero &&
8488 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8489 return IsOrdered;
8490 if (OrderedMask == (fcZero | fcSubnormal) &&
8491 MF.getDenormalMode(Semantics).inputsAreZero())
8492 return IsOrdered;
8493 return std::nullopt;
8494}
8495
8498 const SDLoc &DL,
8499 SelectionDAG &DAG) const {
8500 EVT OperandVT = Op.getValueType();
8501 assert(OperandVT.isFloatingPoint());
8502
8503 // Degenerated cases.
8504 if (Test == fcNone)
8505 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8506 if ((Test & fcAllFlags) == fcAllFlags)
8507 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8508
8509 // PPC double double is a pair of doubles, of which the higher part determines
8510 // the value class.
8511 if (OperandVT == MVT::ppcf128) {
8512 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8513 DAG.getConstant(1, DL, MVT::i32));
8514 OperandVT = MVT::f64;
8515 }
8516
8517 // Some checks may be represented as inversion of simpler check, for example
8518 // "inf|normal|subnormal|zero" => !"nan".
8519 bool IsInverted = false;
8520 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8521 IsInverted = true;
8522 Test = InvertedCheck;
8523 }
8524
8525 // Floating-point type properties.
8526 EVT ScalarFloatVT = OperandVT.getScalarType();
8527 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8528 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8529 bool IsF80 = (ScalarFloatVT == MVT::f80);
8530
8531 // Some checks can be implemented using float comparisons, if floating point
8532 // exceptions are ignored.
8533 if (Flags.hasNoFPExcept() &&
8535 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8536 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8537
8538 if (std::optional<bool> IsCmp0 =
8539 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8540 IsCmp0 && (isCondCodeLegalOrCustom(
8541 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8542 OperandVT.getScalarType().getSimpleVT()))) {
8543
8544 // If denormals could be implicitly treated as 0, this is not equivalent
8545 // to a compare with 0 since it will also be true for denormals.
8546 return DAG.getSetCC(DL, ResultVT, Op,
8547 DAG.getConstantFP(0.0, DL, OperandVT),
8548 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8549 }
8550
8551 if (Test == fcNan &&
8553 OperandVT.getScalarType().getSimpleVT())) {
8554 return DAG.getSetCC(DL, ResultVT, Op, Op,
8555 IsInverted ? ISD::SETO : ISD::SETUO);
8556 }
8557
8558 if (Test == fcInf &&
8560 OperandVT.getScalarType().getSimpleVT()) &&
8562 // isinf(x) --> fabs(x) == inf
8563 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8564 SDValue Inf =
8565 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8566 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8567 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8568 }
8569 }
8570
8571 // In the general case use integer operations.
8572 unsigned BitSize = OperandVT.getScalarSizeInBits();
8573 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8574 if (OperandVT.isVector())
8575 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8576 OperandVT.getVectorElementCount());
8577 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8578
8579 // Various masks.
8580 APInt SignBit = APInt::getSignMask(BitSize);
8581 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8582 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8583 const unsigned ExplicitIntBitInF80 = 63;
8584 APInt ExpMask = Inf;
8585 if (IsF80)
8586 ExpMask.clearBit(ExplicitIntBitInF80);
8587 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8588 APInt QNaNBitMask =
8589 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8590 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8591
8592 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8593 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8594 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8595 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8596 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8597 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8598
8599 SDValue Res;
8600 const auto appendResult = [&](SDValue PartialRes) {
8601 if (PartialRes) {
8602 if (Res)
8603 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8604 else
8605 Res = PartialRes;
8606 }
8607 };
8608
8609 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8610 const auto getIntBitIsSet = [&]() -> SDValue {
8611 if (!IntBitIsSetV) {
8612 APInt IntBitMask(BitSize, 0);
8613 IntBitMask.setBit(ExplicitIntBitInF80);
8614 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8615 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8616 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8617 }
8618 return IntBitIsSetV;
8619 };
8620
8621 // Split the value into sign bit and absolute value.
8622 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8623 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8624 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8625
8626 // Tests that involve more than one class should be processed first.
8627 SDValue PartialRes;
8628
8629 if (IsF80)
8630 ; // Detect finite numbers of f80 by checking individual classes because
8631 // they have different settings of the explicit integer bit.
8632 else if ((Test & fcFinite) == fcFinite) {
8633 // finite(V) ==> abs(V) < exp_mask
8634 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8635 Test &= ~fcFinite;
8636 } else if ((Test & fcFinite) == fcPosFinite) {
8637 // finite(V) && V > 0 ==> V < exp_mask
8638 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8639 Test &= ~fcPosFinite;
8640 } else if ((Test & fcFinite) == fcNegFinite) {
8641 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8642 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8643 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8644 Test &= ~fcNegFinite;
8645 }
8646 appendResult(PartialRes);
8647
8648 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8649 // fcZero | fcSubnormal => test all exponent bits are 0
8650 // TODO: Handle sign bit specific cases
8651 if (PartialCheck == (fcZero | fcSubnormal)) {
8652 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8653 SDValue ExpIsZero =
8654 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8655 appendResult(ExpIsZero);
8656 Test &= ~PartialCheck & fcAllFlags;
8657 }
8658 }
8659
8660 // Check for individual classes.
8661
8662 if (unsigned PartialCheck = Test & fcZero) {
8663 if (PartialCheck == fcPosZero)
8664 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8665 else if (PartialCheck == fcZero)
8666 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8667 else // ISD::fcNegZero
8668 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8669 appendResult(PartialRes);
8670 }
8671
8672 if (unsigned PartialCheck = Test & fcSubnormal) {
8673 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8674 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8675 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8676 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8677 SDValue VMinusOneV =
8678 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8679 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8680 if (PartialCheck == fcNegSubnormal)
8681 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8682 appendResult(PartialRes);
8683 }
8684
8685 if (unsigned PartialCheck = Test & fcInf) {
8686 if (PartialCheck == fcPosInf)
8687 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8688 else if (PartialCheck == fcInf)
8689 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8690 else { // ISD::fcNegInf
8691 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8692 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8693 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8694 }
8695 appendResult(PartialRes);
8696 }
8697
8698 if (unsigned PartialCheck = Test & fcNan) {
8699 APInt InfWithQnanBit = Inf | QNaNBitMask;
8700 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8701 if (PartialCheck == fcNan) {
8702 // isnan(V) ==> abs(V) > int(inf)
8703 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8704 if (IsF80) {
8705 // Recognize unsupported values as NaNs for compatibility with glibc.
8706 // In them (exp(V)==0) == int_bit.
8707 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8708 SDValue ExpIsZero =
8709 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8710 SDValue IsPseudo =
8711 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8712 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8713 }
8714 } else if (PartialCheck == fcQNan) {
8715 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8716 PartialRes =
8717 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8718 } else { // ISD::fcSNan
8719 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8720 // abs(V) < (unsigned(Inf) | quiet_bit)
8721 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8722 SDValue IsNotQnan =
8723 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8724 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8725 }
8726 appendResult(PartialRes);
8727 }
8728
8729 if (unsigned PartialCheck = Test & fcNormal) {
8730 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8731 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8732 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8733 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8734 APInt ExpLimit = ExpMask - ExpLSB;
8735 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8736 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8737 if (PartialCheck == fcNegNormal)
8738 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8739 else if (PartialCheck == fcPosNormal) {
8740 SDValue PosSignV =
8741 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8742 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8743 }
8744 if (IsF80)
8745 PartialRes =
8746 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8747 appendResult(PartialRes);
8748 }
8749
8750 if (!Res)
8751 return DAG.getConstant(IsInverted, DL, ResultVT);
8752 if (IsInverted)
8753 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8754 return Res;
8755}
8756
8757// Only expand vector types if we have the appropriate vector bit operations.
8758static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8759 assert(VT.isVector() && "Expected vector type");
8760 unsigned Len = VT.getScalarSizeInBits();
8761 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8764 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8766}
8767
8769 SDLoc dl(Node);
8770 EVT VT = Node->getValueType(0);
8771 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8772 SDValue Op = Node->getOperand(0);
8773 unsigned Len = VT.getScalarSizeInBits();
8774 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8775
8776 // TODO: Add support for irregular type lengths.
8777 if (!(Len <= 128 && Len % 8 == 0))
8778 return SDValue();
8779
8780 // Only expand vector types if we have the appropriate vector bit operations.
8781 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8782 return SDValue();
8783
8784 // This is the "best" algorithm from
8785 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8786 SDValue Mask55 =
8787 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8788 SDValue Mask33 =
8789 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8790 SDValue Mask0F =
8791 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8792
8793 // v = v - ((v >> 1) & 0x55555555...)
8794 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8795 DAG.getNode(ISD::AND, dl, VT,
8796 DAG.getNode(ISD::SRL, dl, VT, Op,
8797 DAG.getConstant(1, dl, ShVT)),
8798 Mask55));
8799 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8800 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8801 DAG.getNode(ISD::AND, dl, VT,
8802 DAG.getNode(ISD::SRL, dl, VT, Op,
8803 DAG.getConstant(2, dl, ShVT)),
8804 Mask33));
8805 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8806 Op = DAG.getNode(ISD::AND, dl, VT,
8807 DAG.getNode(ISD::ADD, dl, VT, Op,
8808 DAG.getNode(ISD::SRL, dl, VT, Op,
8809 DAG.getConstant(4, dl, ShVT))),
8810 Mask0F);
8811
8812 if (Len <= 8)
8813 return Op;
8814
8815 // Avoid the multiply if we only have 2 bytes to add.
8816 // TODO: Only doing this for scalars because vectors weren't as obviously
8817 // improved.
8818 if (Len == 16 && !VT.isVector()) {
8819 // v = (v + (v >> 8)) & 0x00FF;
8820 return DAG.getNode(ISD::AND, dl, VT,
8821 DAG.getNode(ISD::ADD, dl, VT, Op,
8822 DAG.getNode(ISD::SRL, dl, VT, Op,
8823 DAG.getConstant(8, dl, ShVT))),
8824 DAG.getConstant(0xFF, dl, VT));
8825 }
8826
8827 // v = (v * 0x01010101...) >> (Len - 8)
8828 SDValue V;
8831 SDValue Mask01 =
8832 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8833 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8834 } else {
8835 V = Op;
8836 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8837 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8838 V = DAG.getNode(ISD::ADD, dl, VT, V,
8839 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8840 }
8841 }
8842 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
8843}
8844
8846 SDLoc dl(Node);
8847 EVT VT = Node->getValueType(0);
8848 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8849 SDValue Op = Node->getOperand(0);
8850 SDValue Mask = Node->getOperand(1);
8851 SDValue VL = Node->getOperand(2);
8852 unsigned Len = VT.getScalarSizeInBits();
8853 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8854
8855 // TODO: Add support for irregular type lengths.
8856 if (!(Len <= 128 && Len % 8 == 0))
8857 return SDValue();
8858
8859 // This is same algorithm of expandCTPOP from
8860 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8861 SDValue Mask55 =
8862 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8863 SDValue Mask33 =
8864 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8865 SDValue Mask0F =
8866 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8867
8868 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8869
8870 // v = v - ((v >> 1) & 0x55555555...)
8871 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8872 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8873 DAG.getConstant(1, dl, ShVT), Mask, VL),
8874 Mask55, Mask, VL);
8875 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8876
8877 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8878 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8879 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8880 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8881 DAG.getConstant(2, dl, ShVT), Mask, VL),
8882 Mask33, Mask, VL);
8883 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8884
8885 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8886 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8887 Mask, VL),
8888 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8889 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8890
8891 if (Len <= 8)
8892 return Op;
8893
8894 // v = (v * 0x01010101...) >> (Len - 8)
8895 SDValue V;
8897 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8898 SDValue Mask01 =
8899 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8900 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
8901 } else {
8902 V = Op;
8903 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8904 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8905 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
8906 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
8907 Mask, VL);
8908 }
8909 }
8910 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
8911 Mask, VL);
8912}
8913
8915 SDLoc dl(Node);
8916 EVT VT = Node->getValueType(0);
8917 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8918 SDValue Op = Node->getOperand(0);
8919 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8920
8921 // If the non-ZERO_UNDEF version is supported we can use that instead.
8922 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8924 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8925
8926 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8928 EVT SetCCVT =
8929 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8930 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8931 SDValue Zero = DAG.getConstant(0, dl, VT);
8932 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8933 return DAG.getSelect(dl, VT, SrcIsZero,
8934 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8935 }
8936
8937 // Only expand vector types if we have the appropriate vector bit operations.
8938 // This includes the operations needed to expand CTPOP if it isn't supported.
8939 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8941 !canExpandVectorCTPOP(*this, VT)) ||
8944 return SDValue();
8945
8946 // for now, we do this:
8947 // x = x | (x >> 1);
8948 // x = x | (x >> 2);
8949 // ...
8950 // x = x | (x >>16);
8951 // x = x | (x >>32); // for 64-bit input
8952 // return popcount(~x);
8953 //
8954 // Ref: "Hacker's Delight" by Henry Warren
8955 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8956 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8957 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8958 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8959 }
8960 Op = DAG.getNOT(dl, Op, VT);
8961 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8962}
8963
8965 SDLoc dl(Node);
8966 EVT VT = Node->getValueType(0);
8967 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8968 SDValue Op = Node->getOperand(0);
8969 SDValue Mask = Node->getOperand(1);
8970 SDValue VL = Node->getOperand(2);
8971 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8972
8973 // do this:
8974 // x = x | (x >> 1);
8975 // x = x | (x >> 2);
8976 // ...
8977 // x = x | (x >>16);
8978 // x = x | (x >>32); // for 64-bit input
8979 // return popcount(~x);
8980 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8981 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8982 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8983 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
8984 VL);
8985 }
8986 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8987 VL);
8988 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8989}
8990
8992 const SDLoc &DL, EVT VT, SDValue Op,
8993 unsigned BitWidth) const {
8994 if (BitWidth != 32 && BitWidth != 64)
8995 return SDValue();
8996 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8997 : APInt(64, 0x0218A392CD3D5DBFULL);
8998 const DataLayout &TD = DAG.getDataLayout();
8999 MachinePointerInfo PtrInfo =
9001 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9002 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9003 SDValue Lookup = DAG.getNode(
9004 ISD::SRL, DL, VT,
9005 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9006 DAG.getConstant(DeBruijn, DL, VT)),
9007 DAG.getConstant(ShiftAmt, DL, VT));
9009
9011 for (unsigned i = 0; i < BitWidth; i++) {
9012 APInt Shl = DeBruijn.shl(i);
9013 APInt Lshr = Shl.lshr(ShiftAmt);
9014 Table[Lshr.getZExtValue()] = i;
9015 }
9016
9017 // Create a ConstantArray in Constant Pool
9018 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9019 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9020 TD.getPrefTypeAlign(CA->getType()));
9021 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9022 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9023 PtrInfo, MVT::i8);
9024 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9025 return ExtLoad;
9026
9027 EVT SetCCVT =
9028 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9029 SDValue Zero = DAG.getConstant(0, DL, VT);
9030 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9031 return DAG.getSelect(DL, VT, SrcIsZero,
9032 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9033}
9034
9036 SDLoc dl(Node);
9037 EVT VT = Node->getValueType(0);
9038 SDValue Op = Node->getOperand(0);
9039 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9040
9041 // If the non-ZERO_UNDEF version is supported we can use that instead.
9042 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9044 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9045
9046 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9048 EVT SetCCVT =
9049 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9050 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9051 SDValue Zero = DAG.getConstant(0, dl, VT);
9052 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9053 return DAG.getSelect(dl, VT, SrcIsZero,
9054 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9055 }
9056
9057 // Only expand vector types if we have the appropriate vector bit operations.
9058 // This includes the operations needed to expand CTPOP if it isn't supported.
9059 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9062 !canExpandVectorCTPOP(*this, VT)) ||
9066 return SDValue();
9067
9068 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9069 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9071 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9072 return V;
9073
9074 // for now, we use: { return popcount(~x & (x - 1)); }
9075 // unless the target has ctlz but not ctpop, in which case we use:
9076 // { return 32 - nlz(~x & (x-1)); }
9077 // Ref: "Hacker's Delight" by Henry Warren
9078 SDValue Tmp = DAG.getNode(
9079 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9080 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9081
9082 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9084 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9085 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9086 }
9087
9088 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9089}
9090
9092 SDValue Op = Node->getOperand(0);
9093 SDValue Mask = Node->getOperand(1);
9094 SDValue VL = Node->getOperand(2);
9095 SDLoc dl(Node);
9096 EVT VT = Node->getValueType(0);
9097
9098 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9099 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9100 DAG.getConstant(-1, dl, VT), Mask, VL);
9101 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9102 DAG.getConstant(1, dl, VT), Mask, VL);
9103 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9104 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9105}
9106
9108 SelectionDAG &DAG) const {
9109 // %cond = to_bool_vec %source
9110 // %splat = splat /*val=*/VL
9111 // %tz = step_vector
9112 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9113 // %r = vp.reduce.umin %v
9114 SDLoc DL(N);
9115 SDValue Source = N->getOperand(0);
9116 SDValue Mask = N->getOperand(1);
9117 SDValue EVL = N->getOperand(2);
9118 EVT SrcVT = Source.getValueType();
9119 EVT ResVT = N->getValueType(0);
9120 EVT ResVecVT =
9121 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9122
9123 // Convert to boolean vector.
9124 if (SrcVT.getScalarType() != MVT::i1) {
9125 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9126 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9127 SrcVT.getVectorElementCount());
9128 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9129 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9130 }
9131
9132 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9133 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9134 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9135 SDValue Select =
9136 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9137 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9138}
9139
9141 bool IsNegative) const {
9142 SDLoc dl(N);
9143 EVT VT = N->getValueType(0);
9144 SDValue Op = N->getOperand(0);
9145
9146 // abs(x) -> smax(x,sub(0,x))
9147 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9149 SDValue Zero = DAG.getConstant(0, dl, VT);
9150 Op = DAG.getFreeze(Op);
9151 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9152 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9153 }
9154
9155 // abs(x) -> umin(x,sub(0,x))
9156 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9158 SDValue Zero = DAG.getConstant(0, dl, VT);
9159 Op = DAG.getFreeze(Op);
9160 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9161 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9162 }
9163
9164 // 0 - abs(x) -> smin(x, sub(0,x))
9165 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9167 SDValue Zero = DAG.getConstant(0, dl, VT);
9168 Op = DAG.getFreeze(Op);
9169 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9170 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9171 }
9172
9173 // Only expand vector types if we have the appropriate vector operations.
9174 if (VT.isVector() &&
9176 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9177 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9179 return SDValue();
9180
9181 Op = DAG.getFreeze(Op);
9182 SDValue Shift = DAG.getNode(
9183 ISD::SRA, dl, VT, Op,
9184 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9185 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9186
9187 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9188 if (!IsNegative)
9189 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9190
9191 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9192 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9193}
9194
9196 SDLoc dl(N);
9197 EVT VT = N->getValueType(0);
9198 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9199 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9200 bool IsSigned = N->getOpcode() == ISD::ABDS;
9201
9202 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9203 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9204 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9205 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9206 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9207 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9208 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9209 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9210 }
9211
9212 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9213 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9214 return DAG.getNode(ISD::OR, dl, VT,
9215 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9216 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9217
9218 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9220 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9221
9222 // Branchless expansion iff cmp result is allbits:
9223 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9224 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9225 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9226 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9227 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9228 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9229 }
9230
9231 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9232 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9233 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9234 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9235}
9236
9238 SDLoc dl(N);
9239 EVT VT = N->getValueType(0);
9240 SDValue LHS = N->getOperand(0);
9241 SDValue RHS = N->getOperand(1);
9242
9243 unsigned Opc = N->getOpcode();
9244 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9245 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9246 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9247 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9248 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9249 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9250 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9251 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9252 "Unknown AVG node");
9253
9254 // If the operands are already extended, we can add+shift.
9255 bool IsExt =
9256 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9257 DAG.ComputeNumSignBits(RHS) >= 2) ||
9258 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9260 if (IsExt) {
9261 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9262 if (!IsFloor)
9263 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9264 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9265 DAG.getShiftAmountConstant(1, VT, dl));
9266 }
9267
9268 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9269 if (VT.isScalarInteger()) {
9270 unsigned BW = VT.getScalarSizeInBits();
9271 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9272 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9273 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9274 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9275 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9276 if (!IsFloor)
9277 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9278 DAG.getConstant(1, dl, ExtVT));
9279 // Just use SRL as we will be truncating away the extended sign bits.
9280 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9281 DAG.getShiftAmountConstant(1, ExtVT, dl));
9282 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9283 }
9284 }
9285
9286 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9287 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9288 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9289 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9290 LHS = DAG.getFreeze(LHS);
9291 RHS = DAG.getFreeze(RHS);
9292 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9293 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9294 SDValue Shift =
9295 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9296 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9297}
9298
9300 SDLoc dl(N);
9301 EVT VT = N->getValueType(0);
9302 SDValue Op = N->getOperand(0);
9303
9304 if (!VT.isSimple())
9305 return SDValue();
9306
9307 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9308 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9309 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9310 default:
9311 return SDValue();
9312 case MVT::i16:
9313 // Use a rotate by 8. This can be further expanded if necessary.
9314 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9315 case MVT::i32:
9316 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9317 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9318 DAG.getConstant(0xFF00, dl, VT));
9319 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9320 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9321 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9322 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9323 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9324 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9325 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9326 case MVT::i64:
9327 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9328 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9329 DAG.getConstant(255ULL<<8, dl, VT));
9330 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9331 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9332 DAG.getConstant(255ULL<<16, dl, VT));
9333 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9334 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9335 DAG.getConstant(255ULL<<24, dl, VT));
9336 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9337 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9338 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9339 DAG.getConstant(255ULL<<24, dl, VT));
9340 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9341 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9342 DAG.getConstant(255ULL<<16, dl, VT));
9343 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9344 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9345 DAG.getConstant(255ULL<<8, dl, VT));
9346 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9347 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9348 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9349 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9350 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9351 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9352 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9353 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9354 }
9355}
9356
9358 SDLoc dl(N);
9359 EVT VT = N->getValueType(0);
9360 SDValue Op = N->getOperand(0);
9361 SDValue Mask = N->getOperand(1);
9362 SDValue EVL = N->getOperand(2);
9363
9364 if (!VT.isSimple())
9365 return SDValue();
9366
9367 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9368 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9369 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9370 default:
9371 return SDValue();
9372 case MVT::i16:
9373 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9374 Mask, EVL);
9375 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9376 Mask, EVL);
9377 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9378 case MVT::i32:
9379 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9380 Mask, EVL);
9381 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9382 Mask, EVL);
9383 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9384 Mask, EVL);
9385 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9386 Mask, EVL);
9387 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9388 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9389 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9390 Mask, EVL);
9391 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9392 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9393 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9394 case MVT::i64:
9395 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9396 Mask, EVL);
9397 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9398 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9399 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9400 Mask, EVL);
9401 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9402 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9403 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9404 Mask, EVL);
9405 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9406 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9407 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9408 Mask, EVL);
9409 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9410 Mask, EVL);
9411 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9412 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9413 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9414 Mask, EVL);
9415 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9416 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9417 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9418 Mask, EVL);
9419 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9420 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9421 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9422 Mask, EVL);
9423 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9424 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9425 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9426 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9427 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9428 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9429 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9430 }
9431}
9432
9434 SDLoc dl(N);
9435 EVT VT = N->getValueType(0);
9436 SDValue Op = N->getOperand(0);
9437 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9438 unsigned Sz = VT.getScalarSizeInBits();
9439
9440 SDValue Tmp, Tmp2, Tmp3;
9441
9442 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9443 // and finally the i1 pairs.
9444 // TODO: We can easily support i4/i2 legal types if any target ever does.
9445 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9446 // Create the masks - repeating the pattern every byte.
9447 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9448 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9449 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9450
9451 // BSWAP if the type is wider than a single byte.
9452 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9453
9454 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9455 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9456 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9457 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9458 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9459 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9460
9461 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9462 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9463 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9464 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9465 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9466 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9467
9468 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9469 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9470 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9471 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9472 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9473 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9474 return Tmp;
9475 }
9476
9477 Tmp = DAG.getConstant(0, dl, VT);
9478 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9479 if (I < J)
9480 Tmp2 =
9481 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9482 else
9483 Tmp2 =
9484 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9485
9486 APInt Shift = APInt::getOneBitSet(Sz, J);
9487 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9488 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9489 }
9490
9491 return Tmp;
9492}
9493
9495 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9496
9497 SDLoc dl(N);
9498 EVT VT = N->getValueType(0);
9499 SDValue Op = N->getOperand(0);
9500 SDValue Mask = N->getOperand(1);
9501 SDValue EVL = N->getOperand(2);
9502 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9503 unsigned Sz = VT.getScalarSizeInBits();
9504
9505 SDValue Tmp, Tmp2, Tmp3;
9506
9507 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9508 // and finally the i1 pairs.
9509 // TODO: We can easily support i4/i2 legal types if any target ever does.
9510 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9511 // Create the masks - repeating the pattern every byte.
9512 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9513 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9514 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9515
9516 // BSWAP if the type is wider than a single byte.
9517 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9518
9519 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9520 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9521 Mask, EVL);
9522 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9523 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9524 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9525 Mask, EVL);
9526 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9527 Mask, EVL);
9528 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9529
9530 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9531 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9532 Mask, EVL);
9533 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9534 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9535 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9536 Mask, EVL);
9537 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9538 Mask, EVL);
9539 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9540
9541 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9542 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9543 Mask, EVL);
9544 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9545 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9546 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9547 Mask, EVL);
9548 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9549 Mask, EVL);
9550 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9551 return Tmp;
9552 }
9553 return SDValue();
9554}
9555
9556std::pair<SDValue, SDValue>
9558 SelectionDAG &DAG) const {
9559 SDLoc SL(LD);
9560 SDValue Chain = LD->getChain();
9561 SDValue BasePTR = LD->getBasePtr();
9562 EVT SrcVT = LD->getMemoryVT();
9563 EVT DstVT = LD->getValueType(0);
9564 ISD::LoadExtType ExtType = LD->getExtensionType();
9565
9566 if (SrcVT.isScalableVector())
9567 report_fatal_error("Cannot scalarize scalable vector loads");
9568
9569 unsigned NumElem = SrcVT.getVectorNumElements();
9570
9571 EVT SrcEltVT = SrcVT.getScalarType();
9572 EVT DstEltVT = DstVT.getScalarType();
9573
9574 // A vector must always be stored in memory as-is, i.e. without any padding
9575 // between the elements, since various code depend on it, e.g. in the
9576 // handling of a bitcast of a vector type to int, which may be done with a
9577 // vector store followed by an integer load. A vector that does not have
9578 // elements that are byte-sized must therefore be stored as an integer
9579 // built out of the extracted vector elements.
9580 if (!SrcEltVT.isByteSized()) {
9581 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9582 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9583
9584 unsigned NumSrcBits = SrcVT.getSizeInBits();
9585 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9586
9587 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9588 SDValue SrcEltBitMask = DAG.getConstant(
9589 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9590
9591 // Load the whole vector and avoid masking off the top bits as it makes
9592 // the codegen worse.
9593 SDValue Load =
9594 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9595 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9596 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9597
9599 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9600 unsigned ShiftIntoIdx =
9601 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9602 SDValue ShiftAmount =
9603 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9604 LoadVT, SL, /*LegalTypes=*/false);
9605 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9606 SDValue Elt =
9607 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9608 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9609
9610 if (ExtType != ISD::NON_EXTLOAD) {
9611 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9612 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9613 }
9614
9615 Vals.push_back(Scalar);
9616 }
9617
9618 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9619 return std::make_pair(Value, Load.getValue(1));
9620 }
9621
9622 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9623 assert(SrcEltVT.isByteSized());
9624
9626 SmallVector<SDValue, 8> LoadChains;
9627
9628 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9629 SDValue ScalarLoad =
9630 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9631 LD->getPointerInfo().getWithOffset(Idx * Stride),
9632 SrcEltVT, LD->getOriginalAlign(),
9633 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9634
9635 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9636
9637 Vals.push_back(ScalarLoad.getValue(0));
9638 LoadChains.push_back(ScalarLoad.getValue(1));
9639 }
9640
9641 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9642 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9643
9644 return std::make_pair(Value, NewChain);
9645}
9646
9648 SelectionDAG &DAG) const {
9649 SDLoc SL(ST);
9650
9651 SDValue Chain = ST->getChain();
9652 SDValue BasePtr = ST->getBasePtr();
9653 SDValue Value = ST->getValue();
9654 EVT StVT = ST->getMemoryVT();
9655
9656 if (StVT.isScalableVector())
9657 report_fatal_error("Cannot scalarize scalable vector stores");
9658
9659 // The type of the data we want to save
9660 EVT RegVT = Value.getValueType();
9661 EVT RegSclVT = RegVT.getScalarType();
9662
9663 // The type of data as saved in memory.
9664 EVT MemSclVT = StVT.getScalarType();
9665
9666 unsigned NumElem = StVT.getVectorNumElements();
9667
9668 // A vector must always be stored in memory as-is, i.e. without any padding
9669 // between the elements, since various code depend on it, e.g. in the
9670 // handling of a bitcast of a vector type to int, which may be done with a
9671 // vector store followed by an integer load. A vector that does not have
9672 // elements that are byte-sized must therefore be stored as an integer
9673 // built out of the extracted vector elements.
9674 if (!MemSclVT.isByteSized()) {
9675 unsigned NumBits = StVT.getSizeInBits();
9676 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9677
9678 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9679
9680 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9681 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9682 DAG.getVectorIdxConstant(Idx, SL));
9683 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9684 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9685 unsigned ShiftIntoIdx =
9686 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9687 SDValue ShiftAmount =
9688 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9689 SDValue ShiftedElt =
9690 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9691 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9692 }
9693
9694 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9695 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9696 ST->getAAInfo());
9697 }
9698
9699 // Store Stride in bytes
9700 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9701 assert(Stride && "Zero stride!");
9702 // Extract each of the elements from the original vector and save them into
9703 // memory individually.
9705 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9706 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9707 DAG.getVectorIdxConstant(Idx, SL));
9708
9709 SDValue Ptr =
9710 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9711
9712 // This scalar TruncStore may be illegal, but we legalize it later.
9713 SDValue Store = DAG.getTruncStore(
9714 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9715 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9716 ST->getAAInfo());
9717
9718 Stores.push_back(Store);
9719 }
9720
9721 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9722}
9723
9724std::pair<SDValue, SDValue>
9726 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9727 "unaligned indexed loads not implemented!");
9728 SDValue Chain = LD->getChain();
9729 SDValue Ptr = LD->getBasePtr();
9730 EVT VT = LD->getValueType(0);
9731 EVT LoadedVT = LD->getMemoryVT();
9732 SDLoc dl(LD);
9733 auto &MF = DAG.getMachineFunction();
9734
9735 if (VT.isFloatingPoint() || VT.isVector()) {
9736 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9737 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9738 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9739 LoadedVT.isVector()) {
9740 // Scalarize the load and let the individual components be handled.
9741 return scalarizeVectorLoad(LD, DAG);
9742 }
9743
9744 // Expand to a (misaligned) integer load of the same size,
9745 // then bitconvert to floating point or vector.
9746 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9747 LD->getMemOperand());
9748 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9749 if (LoadedVT != VT)
9750 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9751 ISD::ANY_EXTEND, dl, VT, Result);
9752
9753 return std::make_pair(Result, newLoad.getValue(1));
9754 }
9755
9756 // Copy the value to a (aligned) stack slot using (unaligned) integer
9757 // loads and stores, then do a (aligned) load from the stack slot.
9758 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9759 unsigned LoadedBytes = LoadedVT.getStoreSize();
9760 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9761 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9762
9763 // Make sure the stack slot is also aligned for the register type.
9764 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9765 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9767 SDValue StackPtr = StackBase;
9768 unsigned Offset = 0;
9769
9770 EVT PtrVT = Ptr.getValueType();
9771 EVT StackPtrVT = StackPtr.getValueType();
9772
9773 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9774 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9775
9776 // Do all but one copies using the full register width.
9777 for (unsigned i = 1; i < NumRegs; i++) {
9778 // Load one integer register's worth from the original location.
9779 SDValue Load = DAG.getLoad(
9780 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9781 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9782 LD->getAAInfo());
9783 // Follow the load with a store to the stack slot. Remember the store.
9784 Stores.push_back(DAG.getStore(
9785 Load.getValue(1), dl, Load, StackPtr,
9786 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9787 // Increment the pointers.
9788 Offset += RegBytes;
9789
9790 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9791 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9792 }
9793
9794 // The last copy may be partial. Do an extending load.
9795 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9796 8 * (LoadedBytes - Offset));
9797 SDValue Load =
9798 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9799 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9800 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9801 LD->getAAInfo());
9802 // Follow the load with a store to the stack slot. Remember the store.
9803 // On big-endian machines this requires a truncating store to ensure
9804 // that the bits end up in the right place.
9805 Stores.push_back(DAG.getTruncStore(
9806 Load.getValue(1), dl, Load, StackPtr,
9807 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9808
9809 // The order of the stores doesn't matter - say it with a TokenFactor.
9810 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9811
9812 // Finally, perform the original load only redirected to the stack slot.
9813 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9814 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9815 LoadedVT);
9816
9817 // Callers expect a MERGE_VALUES node.
9818 return std::make_pair(Load, TF);
9819 }
9820
9821 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9822 "Unaligned load of unsupported type.");
9823
9824 // Compute the new VT that is half the size of the old one. This is an
9825 // integer MVT.
9826 unsigned NumBits = LoadedVT.getSizeInBits();
9827 EVT NewLoadedVT;
9828 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9829 NumBits >>= 1;
9830
9831 Align Alignment = LD->getOriginalAlign();
9832 unsigned IncrementSize = NumBits / 8;
9833 ISD::LoadExtType HiExtType = LD->getExtensionType();
9834
9835 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9836 if (HiExtType == ISD::NON_EXTLOAD)
9837 HiExtType = ISD::ZEXTLOAD;
9838
9839 // Load the value in two parts
9840 SDValue Lo, Hi;
9841 if (DAG.getDataLayout().isLittleEndian()) {
9842 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9843 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9844 LD->getAAInfo());
9845
9846 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9847 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9848 LD->getPointerInfo().getWithOffset(IncrementSize),
9849 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9850 LD->getAAInfo());
9851 } else {
9852 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9853 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9854 LD->getAAInfo());
9855
9856 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9857 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9858 LD->getPointerInfo().getWithOffset(IncrementSize),
9859 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9860 LD->getAAInfo());
9861 }
9862
9863 // aggregate the two parts
9864 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9865 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9866 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9867
9868 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9869 Hi.getValue(1));
9870
9871 return std::make_pair(Result, TF);
9872}
9873
9875 SelectionDAG &DAG) const {
9876 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9877 "unaligned indexed stores not implemented!");
9878 SDValue Chain = ST->getChain();
9879 SDValue Ptr = ST->getBasePtr();
9880 SDValue Val = ST->getValue();
9881 EVT VT = Val.getValueType();
9882 Align Alignment = ST->getOriginalAlign();
9883 auto &MF = DAG.getMachineFunction();
9884 EVT StoreMemVT = ST->getMemoryVT();
9885
9886 SDLoc dl(ST);
9887 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9888 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9889 if (isTypeLegal(intVT)) {
9890 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9891 StoreMemVT.isVector()) {
9892 // Scalarize the store and let the individual components be handled.
9893 SDValue Result = scalarizeVectorStore(ST, DAG);
9894 return Result;
9895 }
9896 // Expand to a bitconvert of the value to the integer type of the
9897 // same size, then a (misaligned) int store.
9898 // FIXME: Does not handle truncating floating point stores!
9899 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9900 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9901 Alignment, ST->getMemOperand()->getFlags());
9902 return Result;
9903 }
9904 // Do a (aligned) store to a stack slot, then copy from the stack slot
9905 // to the final destination using (unaligned) integer loads and stores.
9906 MVT RegVT = getRegisterType(
9907 *DAG.getContext(),
9908 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9909 EVT PtrVT = Ptr.getValueType();
9910 unsigned StoredBytes = StoreMemVT.getStoreSize();
9911 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9912 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9913
9914 // Make sure the stack slot is also aligned for the register type.
9915 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9916 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9917
9918 // Perform the original store, only redirected to the stack slot.
9919 SDValue Store = DAG.getTruncStore(
9920 Chain, dl, Val, StackPtr,
9921 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9922
9923 EVT StackPtrVT = StackPtr.getValueType();
9924
9925 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9926 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9928 unsigned Offset = 0;
9929
9930 // Do all but one copies using the full register width.
9931 for (unsigned i = 1; i < NumRegs; i++) {
9932 // Load one integer register's worth from the stack slot.
9933 SDValue Load = DAG.getLoad(
9934 RegVT, dl, Store, StackPtr,
9935 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9936 // Store it to the final location. Remember the store.
9937 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9938 ST->getPointerInfo().getWithOffset(Offset),
9939 ST->getOriginalAlign(),
9940 ST->getMemOperand()->getFlags()));
9941 // Increment the pointers.
9942 Offset += RegBytes;
9943 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9944 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9945 }
9946
9947 // The last store may be partial. Do a truncating store. On big-endian
9948 // machines this requires an extending load from the stack slot to ensure
9949 // that the bits are in the right place.
9950 EVT LoadMemVT =
9951 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9952
9953 // Load from the stack slot.
9954 SDValue Load = DAG.getExtLoad(
9955 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9956 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9957
9958 Stores.push_back(
9959 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9960 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9961 ST->getOriginalAlign(),
9962 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9963 // The order of the stores doesn't matter - say it with a TokenFactor.
9964 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9965 return Result;
9966 }
9967
9968 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9969 "Unaligned store of unknown type.");
9970 // Get the half-size VT
9971 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9972 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9973 unsigned IncrementSize = NumBits / 8;
9974
9975 // Divide the stored value in two parts.
9976 SDValue ShiftAmount =
9977 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
9978 SDValue Lo = Val;
9979 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9980 // fold and not use the upper bits. A smaller constant may be easier to
9981 // materialize.
9982 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9983 Lo = DAG.getNode(
9984 ISD::AND, dl, VT, Lo,
9985 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9986 VT));
9987 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9988
9989 // Store the two parts
9990 SDValue Store1, Store2;
9991 Store1 = DAG.getTruncStore(Chain, dl,
9992 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9993 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9994 ST->getMemOperand()->getFlags());
9995
9996 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9997 Store2 = DAG.getTruncStore(
9998 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9999 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10000 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10001
10002 SDValue Result =
10003 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10004 return Result;
10005}
10006
10007SDValue
10009 const SDLoc &DL, EVT DataVT,
10010 SelectionDAG &DAG,
10011 bool IsCompressedMemory) const {
10012 SDValue Increment;
10013 EVT AddrVT = Addr.getValueType();
10014 EVT MaskVT = Mask.getValueType();
10015 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10016 "Incompatible types of Data and Mask");
10017 if (IsCompressedMemory) {
10018 if (DataVT.isScalableVector())
10020 "Cannot currently handle compressed memory with scalable vectors");
10021 // Incrementing the pointer according to number of '1's in the mask.
10022 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10023 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10024 if (MaskIntVT.getSizeInBits() < 32) {
10025 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10026 MaskIntVT = MVT::i32;
10027 }
10028
10029 // Count '1's with POPCNT.
10030 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10031 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10032 // Scale is an element size in bytes.
10033 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10034 AddrVT);
10035 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10036 } else if (DataVT.isScalableVector()) {
10037 Increment = DAG.getVScale(DL, AddrVT,
10038 APInt(AddrVT.getFixedSizeInBits(),
10039 DataVT.getStoreSize().getKnownMinValue()));
10040 } else
10041 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10042
10043 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10044}
10045
10047 EVT VecVT, const SDLoc &dl,
10048 ElementCount SubEC) {
10049 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10050 "Cannot index a scalable vector within a fixed-width vector");
10051
10052 unsigned NElts = VecVT.getVectorMinNumElements();
10053 unsigned NumSubElts = SubEC.getKnownMinValue();
10054 EVT IdxVT = Idx.getValueType();
10055
10056 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10057 // If this is a constant index and we know the value plus the number of the
10058 // elements in the subvector minus one is less than the minimum number of
10059 // elements then it's safe to return Idx.
10060 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10061 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10062 return Idx;
10063 SDValue VS =
10064 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10065 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10066 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10067 DAG.getConstant(NumSubElts, dl, IdxVT));
10068 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10069 }
10070 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10071 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10072 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10073 DAG.getConstant(Imm, dl, IdxVT));
10074 }
10075 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10076 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10077 DAG.getConstant(MaxIndex, dl, IdxVT));
10078}
10079
10081 SDValue VecPtr, EVT VecVT,
10082 SDValue Index) const {
10083 return getVectorSubVecPointer(
10084 DAG, VecPtr, VecVT,
10086 Index);
10087}
10088
10090 SDValue VecPtr, EVT VecVT,
10091 EVT SubVecVT,
10092 SDValue Index) const {
10093 SDLoc dl(Index);
10094 // Make sure the index type is big enough to compute in.
10095 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10096
10097 EVT EltVT = VecVT.getVectorElementType();
10098
10099 // Calculate the element offset and add it to the pointer.
10100 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10101 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10102 "Converting bits to bytes lost precision");
10103 assert(SubVecVT.getVectorElementType() == EltVT &&
10104 "Sub-vector must be a vector with matching element type");
10105 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10106 SubVecVT.getVectorElementCount());
10107
10108 EVT IdxVT = Index.getValueType();
10109 if (SubVecVT.isScalableVector())
10110 Index =
10111 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10112 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10113
10114 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10115 DAG.getConstant(EltSize, dl, IdxVT));
10116 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10117}
10118
10119//===----------------------------------------------------------------------===//
10120// Implementation of Emulated TLS Model
10121//===----------------------------------------------------------------------===//
10122
10124 SelectionDAG &DAG) const {
10125 // Access to address of TLS varialbe xyz is lowered to a function call:
10126 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10127 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10128 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10129 SDLoc dl(GA);
10130
10131 ArgListTy Args;
10132 ArgListEntry Entry;
10133 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
10134 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
10135 StringRef EmuTlsVarName(NameString);
10136 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
10137 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10138 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10139 Entry.Ty = VoidPtrType;
10140 Args.push_back(Entry);
10141
10142 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10143
10145 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10146 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10147 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10148
10149 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10150 // At last for X86 targets, maybe good for other targets too?
10152 MFI.setAdjustsStack(true); // Is this only for X86 target?
10153 MFI.setHasCalls(true);
10154
10155 assert((GA->getOffset() == 0) &&
10156 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10157 return CallResult.first;
10158}
10159
10161 SelectionDAG &DAG) const {
10162 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10163 if (!isCtlzFast())
10164 return SDValue();
10165 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10166 SDLoc dl(Op);
10167 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10168 EVT VT = Op.getOperand(0).getValueType();
10169 SDValue Zext = Op.getOperand(0);
10170 if (VT.bitsLT(MVT::i32)) {
10171 VT = MVT::i32;
10172 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10173 }
10174 unsigned Log2b = Log2_32(VT.getSizeInBits());
10175 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10176 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10177 DAG.getConstant(Log2b, dl, MVT::i32));
10178 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10179 }
10180 return SDValue();
10181}
10182
10184 SDValue Op0 = Node->getOperand(0);
10185 SDValue Op1 = Node->getOperand(1);
10186 EVT VT = Op0.getValueType();
10187 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10188 unsigned Opcode = Node->getOpcode();
10189 SDLoc DL(Node);
10190
10191 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10192 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10194 Op0 = DAG.getFreeze(Op0);
10195 SDValue Zero = DAG.getConstant(0, DL, VT);
10196 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10197 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10198 }
10199
10200 // umin(x,y) -> sub(x,usubsat(x,y))
10201 // TODO: Missing freeze(Op0)?
10202 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10204 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10205 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10206 }
10207
10208 // umax(x,y) -> add(x,usubsat(y,x))
10209 // TODO: Missing freeze(Op0)?
10210 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10212 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10213 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10214 }
10215
10216 // FIXME: Should really try to split the vector in case it's legal on a
10217 // subvector.
10219 return DAG.UnrollVectorOp(Node);
10220
10221 // Attempt to find an existing SETCC node that we can reuse.
10222 // TODO: Do we need a generic doesSETCCNodeExist?
10223 // TODO: Missing freeze(Op0)/freeze(Op1)?
10224 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10225 ISD::CondCode PrefCommuteCC,
10226 ISD::CondCode AltCommuteCC) {
10227 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10228 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10229 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10230 {Op0, Op1, DAG.getCondCode(CC)})) {
10231 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10232 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10233 }
10234 }
10235 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10236 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10237 {Op0, Op1, DAG.getCondCode(CC)})) {
10238 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10239 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10240 }
10241 }
10242 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10243 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10244 };
10245
10246 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10247 // -> Y = (A < B) ? B : A
10248 // -> Y = (A >= B) ? A : B
10249 // -> Y = (A <= B) ? B : A
10250 switch (Opcode) {
10251 case ISD::SMAX:
10252 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10253 case ISD::SMIN:
10254 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10255 case ISD::UMAX:
10256 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10257 case ISD::UMIN:
10258 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10259 }
10260
10261 llvm_unreachable("How did we get here?");
10262}
10263
10265 unsigned Opcode = Node->getOpcode();
10266 SDValue LHS = Node->getOperand(0);
10267 SDValue RHS = Node->getOperand(1);
10268 EVT VT = LHS.getValueType();
10269 SDLoc dl(Node);
10270
10271 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10272 assert(VT.isInteger() && "Expected operands to be integers");
10273
10274 // usub.sat(a, b) -> umax(a, b) - b
10275 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10276 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10277 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10278 }
10279
10280 // uadd.sat(a, b) -> umin(a, ~b) + b
10281 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10282 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10283 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10284 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10285 }
10286
10287 unsigned OverflowOp;
10288 switch (Opcode) {
10289 case ISD::SADDSAT:
10290 OverflowOp = ISD::SADDO;
10291 break;
10292 case ISD::UADDSAT:
10293 OverflowOp = ISD::UADDO;
10294 break;
10295 case ISD::SSUBSAT:
10296 OverflowOp = ISD::SSUBO;
10297 break;
10298 case ISD::USUBSAT:
10299 OverflowOp = ISD::USUBO;
10300 break;
10301 default:
10302 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10303 "addition or subtraction node.");
10304 }
10305
10306 // FIXME: Should really try to split the vector in case it's legal on a
10307 // subvector.
10309 return DAG.UnrollVectorOp(Node);
10310
10311 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10312 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10313 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10314 SDValue SumDiff = Result.getValue(0);
10315 SDValue Overflow = Result.getValue(1);
10316 SDValue Zero = DAG.getConstant(0, dl, VT);
10317 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10318
10319 if (Opcode == ISD::UADDSAT) {
10321 // (LHS + RHS) | OverflowMask
10322 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10323 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10324 }
10325 // Overflow ? 0xffff.... : (LHS + RHS)
10326 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10327 }
10328
10329 if (Opcode == ISD::USUBSAT) {
10331 // (LHS - RHS) & ~OverflowMask
10332 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10333 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10334 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10335 }
10336 // Overflow ? 0 : (LHS - RHS)
10337 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10338 }
10339
10340 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10343
10344 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10345 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10346
10347 // If either of the operand signs are known, then they are guaranteed to
10348 // only saturate in one direction. If non-negative they will saturate
10349 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10350 //
10351 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10352 // sign of 'y' has to be flipped.
10353
10354 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10355 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10356 : KnownRHS.isNegative();
10357 if (LHSIsNonNegative || RHSIsNonNegative) {
10358 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10359 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10360 }
10361
10362 bool LHSIsNegative = KnownLHS.isNegative();
10363 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10364 : KnownRHS.isNonNegative();
10365 if (LHSIsNegative || RHSIsNegative) {
10366 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10367 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10368 }
10369 }
10370
10371 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10373 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10374 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10375 DAG.getConstant(BitWidth - 1, dl, VT));
10376 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10377 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10378}
10379
10381 unsigned Opcode = Node->getOpcode();
10382 SDValue LHS = Node->getOperand(0);
10383 SDValue RHS = Node->getOperand(1);
10384 EVT VT = LHS.getValueType();
10385 EVT ResVT = Node->getValueType(0);
10386 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10387 SDLoc dl(Node);
10388
10389 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10390 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10391
10392 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10393 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10394 SDValue SelectZeroOrOne =
10395 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10396 DAG.getConstant(0, dl, ResVT));
10397 return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
10398 SelectZeroOrOne);
10399}
10400
10402 unsigned Opcode = Node->getOpcode();
10403 bool IsSigned = Opcode == ISD::SSHLSAT;
10404 SDValue LHS = Node->getOperand(0);
10405 SDValue RHS = Node->getOperand(1);
10406 EVT VT = LHS.getValueType();
10407 SDLoc dl(Node);
10408
10409 assert((Node->getOpcode() == ISD::SSHLSAT ||
10410 Node->getOpcode() == ISD::USHLSAT) &&
10411 "Expected a SHLSAT opcode");
10412 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10413 assert(VT.isInteger() && "Expected operands to be integers");
10414
10416 return DAG.UnrollVectorOp(Node);
10417
10418 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10419
10420 unsigned BW = VT.getScalarSizeInBits();
10421 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10422 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10423 SDValue Orig =
10424 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10425
10426 SDValue SatVal;
10427 if (IsSigned) {
10428 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10429 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10430 SDValue Cond =
10431 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10432 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10433 } else {
10434 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10435 }
10436 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10437 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10438}
10439
10441 bool Signed, EVT WideVT,
10442 const SDValue LL, const SDValue LH,
10443 const SDValue RL, const SDValue RH,
10444 SDValue &Lo, SDValue &Hi) const {
10445 // We can fall back to a libcall with an illegal type for the MUL if we
10446 // have a libcall big enough.
10447 // Also, we can fall back to a division in some cases, but that's a big
10448 // performance hit in the general case.
10449 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10450 if (WideVT == MVT::i16)
10451 LC = RTLIB::MUL_I16;
10452 else if (WideVT == MVT::i32)
10453 LC = RTLIB::MUL_I32;
10454 else if (WideVT == MVT::i64)
10455 LC = RTLIB::MUL_I64;
10456 else if (WideVT == MVT::i128)
10457 LC = RTLIB::MUL_I128;
10458
10459 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10460 // We'll expand the multiplication by brute force because we have no other
10461 // options. This is a trivially-generalized version of the code from
10462 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10463 // 4.3.1).
10464 EVT VT = LL.getValueType();
10465 unsigned Bits = VT.getSizeInBits();
10466 unsigned HalfBits = Bits >> 1;
10467 SDValue Mask =
10468 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10469 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10470 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10471
10472 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10473 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10474
10475 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10476 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10477 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10478 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10479
10480 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10481 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10482 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10483 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10484
10485 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10486 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10487 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10488
10489 SDValue W =
10490 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10491 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10492 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10493 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10494
10495 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10496 DAG.getNode(ISD::ADD, dl, VT,
10497 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10498 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10499 } else {
10500 // Attempt a libcall.
10501 SDValue Ret;
10503 CallOptions.setSExt(Signed);
10504 CallOptions.setIsPostTypeLegalization(true);
10505 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10506 // Halves of WideVT are packed into registers in different order
10507 // depending on platform endianness. This is usually handled by
10508 // the C calling convention, but we can't defer to it in
10509 // the legalizer.
10510 SDValue Args[] = {LL, LH, RL, RH};
10511 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10512 } else {
10513 SDValue Args[] = {LH, LL, RH, RL};
10514 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10515 }
10516 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10517 "Ret value is a collection of constituent nodes holding result.");
10518 if (DAG.getDataLayout().isLittleEndian()) {
10519 // Same as above.
10520 Lo = Ret.getOperand(0);
10521 Hi = Ret.getOperand(1);
10522 } else {
10523 Lo = Ret.getOperand(1);
10524 Hi = Ret.getOperand(0);
10525 }
10526 }
10527}
10528
10530 bool Signed, const SDValue LHS,
10531 const SDValue RHS, SDValue &Lo,
10532 SDValue &Hi) const {
10533 EVT VT = LHS.getValueType();
10534 assert(RHS.getValueType() == VT && "Mismatching operand types");
10535
10536 SDValue HiLHS;
10537 SDValue HiRHS;
10538 if (Signed) {
10539 // The high part is obtained by SRA'ing all but one of the bits of low
10540 // part.
10541 unsigned LoSize = VT.getFixedSizeInBits();
10542 HiLHS = DAG.getNode(
10543 ISD::SRA, dl, VT, LHS,
10544 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10545 HiRHS = DAG.getNode(
10546 ISD::SRA, dl, VT, RHS,
10547 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10548 } else {
10549 HiLHS = DAG.getConstant(0, dl, VT);
10550 HiRHS = DAG.getConstant(0, dl, VT);
10551 }
10552 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10553 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10554}
10555
10556SDValue
10558 assert((Node->getOpcode() == ISD::SMULFIX ||
10559 Node->getOpcode() == ISD::UMULFIX ||
10560 Node->getOpcode() == ISD::SMULFIXSAT ||
10561 Node->getOpcode() == ISD::UMULFIXSAT) &&
10562 "Expected a fixed point multiplication opcode");
10563
10564 SDLoc dl(Node);
10565 SDValue LHS = Node->getOperand(0);
10566 SDValue RHS = Node->getOperand(1);
10567 EVT VT = LHS.getValueType();
10568 unsigned Scale = Node->getConstantOperandVal(2);
10569 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10570 Node->getOpcode() == ISD::UMULFIXSAT);
10571 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10572 Node->getOpcode() == ISD::SMULFIXSAT);
10573 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10574 unsigned VTSize = VT.getScalarSizeInBits();
10575
10576 if (!Scale) {
10577 // [us]mul.fix(a, b, 0) -> mul(a, b)
10578 if (!Saturating) {
10580 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10581 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10582 SDValue Result =
10583 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10584 SDValue Product = Result.getValue(0);
10585 SDValue Overflow = Result.getValue(1);
10586 SDValue Zero = DAG.getConstant(0, dl, VT);
10587
10588 APInt MinVal = APInt::getSignedMinValue(VTSize);
10589 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10590 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10591 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10592 // Xor the inputs, if resulting sign bit is 0 the product will be
10593 // positive, else negative.
10594 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10595 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10596 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10597 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10598 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10599 SDValue Result =
10600 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10601 SDValue Product = Result.getValue(0);
10602 SDValue Overflow = Result.getValue(1);
10603
10604 APInt MaxVal = APInt::getMaxValue(VTSize);
10605 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10606 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10607 }
10608 }
10609
10610 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10611 "Expected scale to be less than the number of bits if signed or at "
10612 "most the number of bits if unsigned.");
10613 assert(LHS.getValueType() == RHS.getValueType() &&
10614 "Expected both operands to be the same type");
10615
10616 // Get the upper and lower bits of the result.
10617 SDValue Lo, Hi;
10618 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10619 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10620 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10621 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10622 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10623 Lo = Result.getValue(0);
10624 Hi = Result.getValue(1);
10625 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10626 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10627 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10628 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10629 // Try for a multiplication using a wider type.
10630 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10631 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10632 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10633 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10634 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10635 SDValue Shifted =
10636 DAG.getNode(ISD::SRA, dl, WideVT, Res,
10637 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10638 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10639 } else if (VT.isVector()) {
10640 return SDValue();
10641 } else {
10642 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10643 }
10644
10645 if (Scale == VTSize)
10646 // Result is just the top half since we'd be shifting by the width of the
10647 // operand. Overflow impossible so this works for both UMULFIX and
10648 // UMULFIXSAT.
10649 return Hi;
10650
10651 // The result will need to be shifted right by the scale since both operands
10652 // are scaled. The result is given to us in 2 halves, so we only want part of
10653 // both in the result.
10654 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10655 DAG.getShiftAmountConstant(Scale, VT, dl));
10656 if (!Saturating)
10657 return Result;
10658
10659 if (!Signed) {
10660 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10661 // widened multiplication) aren't all zeroes.
10662
10663 // Saturate to max if ((Hi >> Scale) != 0),
10664 // which is the same as if (Hi > ((1 << Scale) - 1))
10665 APInt MaxVal = APInt::getMaxValue(VTSize);
10666 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10667 dl, VT);
10668 Result = DAG.getSelectCC(dl, Hi, LowMask,
10669 DAG.getConstant(MaxVal, dl, VT), Result,
10670 ISD::SETUGT);
10671
10672 return Result;
10673 }
10674
10675 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10676 // widened multiplication) aren't all ones or all zeroes.
10677
10678 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10679 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10680
10681 if (Scale == 0) {
10682 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10683 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10684 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10685 // Saturated to SatMin if wide product is negative, and SatMax if wide
10686 // product is positive ...
10687 SDValue Zero = DAG.getConstant(0, dl, VT);
10688 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10689 ISD::SETLT);
10690 // ... but only if we overflowed.
10691 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10692 }
10693
10694 // We handled Scale==0 above so all the bits to examine is in Hi.
10695
10696 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10697 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10698 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10699 dl, VT);
10700 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10701 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10702 // which is the same as if (HI < (-1 << (Scale - 1))
10703 SDValue HighMask =
10704 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10705 dl, VT);
10706 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10707 return Result;
10708}
10709
10710SDValue
10712 SDValue LHS, SDValue RHS,
10713 unsigned Scale, SelectionDAG &DAG) const {
10714 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10715 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10716 "Expected a fixed point division opcode");
10717
10718 EVT VT = LHS.getValueType();
10719 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10720 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10721 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10722
10723 // If there is enough room in the type to upscale the LHS or downscale the
10724 // RHS before the division, we can perform it in this type without having to
10725 // resize. For signed operations, the LHS headroom is the number of
10726 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10727 // The headroom for the RHS is the number of trailing zeroes.
10728 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10730 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10731
10732 // For signed saturating operations, we need to be able to detect true integer
10733 // division overflow; that is, when you have MIN / -EPS. However, this
10734 // is undefined behavior and if we emit divisions that could take such
10735 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10736 // example).
10737 // Avoid this by requiring an extra bit so that we never get this case.
10738 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10739 // signed saturating division, we need to emit a whopping 32-bit division.
10740 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10741 return SDValue();
10742
10743 unsigned LHSShift = std::min(LHSLead, Scale);
10744 unsigned RHSShift = Scale - LHSShift;
10745
10746 // At this point, we know that if we shift the LHS up by LHSShift and the
10747 // RHS down by RHSShift, we can emit a regular division with a final scaling
10748 // factor of Scale.
10749
10750 if (LHSShift)
10751 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10752 DAG.getShiftAmountConstant(LHSShift, VT, dl));
10753 if (RHSShift)
10754 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10755 DAG.getShiftAmountConstant(RHSShift, VT, dl));
10756
10757 SDValue Quot;
10758 if (Signed) {
10759 // For signed operations, if the resulting quotient is negative and the
10760 // remainder is nonzero, subtract 1 from the quotient to round towards
10761 // negative infinity.
10762 SDValue Rem;
10763 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10764 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10765 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10766 if (isTypeLegal(VT) &&
10768 Quot = DAG.getNode(ISD::SDIVREM, dl,
10769 DAG.getVTList(VT, VT),
10770 LHS, RHS);
10771 Rem = Quot.getValue(1);
10772 Quot = Quot.getValue(0);
10773 } else {
10774 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10775 LHS, RHS);
10776 Rem = DAG.getNode(ISD::SREM, dl, VT,
10777 LHS, RHS);
10778 }
10779 SDValue Zero = DAG.getConstant(0, dl, VT);
10780 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10781 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10782 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10783 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10784 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10785 DAG.getConstant(1, dl, VT));
10786 Quot = DAG.getSelect(dl, VT,
10787 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10788 Sub1, Quot);
10789 } else
10790 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10791 LHS, RHS);
10792
10793 return Quot;
10794}
10795
10797 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10798 SDLoc dl(Node);
10799 SDValue LHS = Node->getOperand(0);
10800 SDValue RHS = Node->getOperand(1);
10801 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10802
10803 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10804 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10805 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10806 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10807 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10808 { LHS, RHS, CarryIn });
10809 Result = SDValue(NodeCarry.getNode(), 0);
10810 Overflow = SDValue(NodeCarry.getNode(), 1);
10811 return;
10812 }
10813
10814 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10815 LHS.getValueType(), LHS, RHS);
10816
10817 EVT ResultType = Node->getValueType(1);
10818 EVT SetCCType = getSetCCResultType(
10819 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10820 SDValue SetCC;
10821 if (IsAdd && isOneConstant(RHS)) {
10822 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10823 // the live range of X. We assume comparing with 0 is cheap.
10824 // The general case (X + C) < C is not necessarily beneficial. Although we
10825 // reduce the live range of X, we may introduce the materialization of
10826 // constant C.
10827 SetCC =
10828 DAG.getSetCC(dl, SetCCType, Result,
10829 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10830 } else if (IsAdd && isAllOnesConstant(RHS)) {
10831 // Special case: uaddo X, -1 overflows if X != 0.
10832 SetCC =
10833 DAG.getSetCC(dl, SetCCType, LHS,
10834 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10835 } else {
10837 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10838 }
10839 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10840}
10841
10843 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10844 SDLoc dl(Node);
10845 SDValue LHS = Node->getOperand(0);
10846 SDValue RHS = Node->getOperand(1);
10847 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10848
10849 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10850 LHS.getValueType(), LHS, RHS);
10851
10852 EVT ResultType = Node->getValueType(1);
10853 EVT OType = getSetCCResultType(
10854 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10855
10856 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10857 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10858 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10859 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10860 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10861 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10862 return;
10863 }
10864
10865 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10866
10867 // For an addition, the result should be less than one of the operands (LHS)
10868 // if and only if the other operand (RHS) is negative, otherwise there will
10869 // be overflow.
10870 // For a subtraction, the result should be less than one of the operands
10871 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10872 // otherwise there will be overflow.
10873 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10874 SDValue ConditionRHS =
10875 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10876
10877 Overflow = DAG.getBoolExtOrTrunc(
10878 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10879 ResultType, ResultType);
10880}
10881
10883 SDValue &Overflow, SelectionDAG &DAG) const {
10884 SDLoc dl(Node);
10885 EVT VT = Node->getValueType(0);
10886 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10887 SDValue LHS = Node->getOperand(0);
10888 SDValue RHS = Node->getOperand(1);
10889 bool isSigned = Node->getOpcode() == ISD::SMULO;
10890
10891 // For power-of-two multiplications we can use a simpler shift expansion.
10892 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10893 const APInt &C = RHSC->getAPIntValue();
10894 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10895 if (C.isPowerOf2()) {
10896 // smulo(x, signed_min) is same as umulo(x, signed_min).
10897 bool UseArithShift = isSigned && !C.isMinSignedValue();
10898 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10899 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10900 Overflow = DAG.getSetCC(dl, SetCCVT,
10901 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10902 dl, VT, Result, ShiftAmt),
10903 LHS, ISD::SETNE);
10904 return true;
10905 }
10906 }
10907
10908 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10909 if (VT.isVector())
10910 WideVT =
10912
10913 SDValue BottomHalf;
10914 SDValue TopHalf;
10915 static const unsigned Ops[2][3] =
10918 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10919 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10920 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10921 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10922 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10923 RHS);
10924 TopHalf = BottomHalf.getValue(1);
10925 } else if (isTypeLegal(WideVT)) {
10926 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10927 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10928 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10929 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10930 SDValue ShiftAmt =
10931 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
10932 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10933 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10934 } else {
10935 if (VT.isVector())
10936 return false;
10937
10938 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
10939 }
10940
10941 Result = BottomHalf;
10942 if (isSigned) {
10943 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10944 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
10945 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10946 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10947 } else {
10948 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10949 DAG.getConstant(0, dl, VT), ISD::SETNE);
10950 }
10951
10952 // Truncate the result if SetCC returns a larger type than needed.
10953 EVT RType = Node->getValueType(1);
10954 if (RType.bitsLT(Overflow.getValueType()))
10955 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10956
10957 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10958 "Unexpected result type for S/UMULO legalization");
10959 return true;
10960}
10961
10963 SDLoc dl(Node);
10964 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10965 SDValue Op = Node->getOperand(0);
10966 EVT VT = Op.getValueType();
10967
10968 if (VT.isScalableVector())
10970 "Expanding reductions for scalable vectors is undefined.");
10971
10972 // Try to use a shuffle reduction for power of two vectors.
10973 if (VT.isPow2VectorType()) {
10974 while (VT.getVectorNumElements() > 1) {
10975 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10976 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10977 break;
10978
10979 SDValue Lo, Hi;
10980 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10981 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
10982 VT = HalfVT;
10983 }
10984 }
10985
10986 EVT EltVT = VT.getVectorElementType();
10987 unsigned NumElts = VT.getVectorNumElements();
10988
10990 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10991
10992 SDValue Res = Ops[0];
10993 for (unsigned i = 1; i < NumElts; i++)
10994 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10995
10996 // Result type may be wider than element type.
10997 if (EltVT != Node->getValueType(0))
10998 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10999 return Res;
11000}
11001
11003 SDLoc dl(Node);
11004 SDValue AccOp = Node->getOperand(0);
11005 SDValue VecOp = Node->getOperand(1);
11006 SDNodeFlags Flags = Node->getFlags();
11007
11008 EVT VT = VecOp.getValueType();
11009 EVT EltVT = VT.getVectorElementType();
11010
11011 if (VT.isScalableVector())
11013 "Expanding reductions for scalable vectors is undefined.");
11014
11015 unsigned NumElts = VT.getVectorNumElements();
11016
11018 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11019
11020 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11021
11022 SDValue Res = AccOp;
11023 for (unsigned i = 0; i < NumElts; i++)
11024 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11025
11026 return Res;
11027}
11028
11030 SelectionDAG &DAG) const {
11031 EVT VT = Node->getValueType(0);
11032 SDLoc dl(Node);
11033 bool isSigned = Node->getOpcode() == ISD::SREM;
11034 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11035 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11036 SDValue Dividend = Node->getOperand(0);
11037 SDValue Divisor = Node->getOperand(1);
11038 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11039 SDVTList VTs = DAG.getVTList(VT, VT);
11040 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11041 return true;
11042 }
11043 if (isOperationLegalOrCustom(DivOpc, VT)) {
11044 // X % Y -> X-X/Y*Y
11045 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11046 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11047 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11048 return true;
11049 }
11050 return false;
11051}
11052
11054 SelectionDAG &DAG) const {
11055 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11056 SDLoc dl(SDValue(Node, 0));
11057 SDValue Src = Node->getOperand(0);
11058
11059 // DstVT is the result type, while SatVT is the size to which we saturate
11060 EVT SrcVT = Src.getValueType();
11061 EVT DstVT = Node->getValueType(0);
11062
11063 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11064 unsigned SatWidth = SatVT.getScalarSizeInBits();
11065 unsigned DstWidth = DstVT.getScalarSizeInBits();
11066 assert(SatWidth <= DstWidth &&
11067 "Expected saturation width smaller than result width");
11068
11069 // Determine minimum and maximum integer values and their corresponding
11070 // floating-point values.
11071 APInt MinInt, MaxInt;
11072 if (IsSigned) {
11073 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11074 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11075 } else {
11076 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11077 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11078 }
11079
11080 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11081 // libcall emission cannot handle this. Large result types will fail.
11082 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11083 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11084 SrcVT = Src.getValueType();
11085 }
11086
11087 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11088 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11089
11090 APFloat::opStatus MinStatus =
11091 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11092 APFloat::opStatus MaxStatus =
11093 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11094 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11095 !(MaxStatus & APFloat::opStatus::opInexact);
11096
11097 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11098 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11099
11100 // If the integer bounds are exactly representable as floats and min/max are
11101 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11102 // of comparisons and selects.
11103 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11105 if (AreExactFloatBounds && MinMaxLegal) {
11106 SDValue Clamped = Src;
11107
11108 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11109 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11110 // Clamp by MaxFloat from above. NaN cannot occur.
11111 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11112 // Convert clamped value to integer.
11113 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11114 dl, DstVT, Clamped);
11115
11116 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11117 // which will cast to zero.
11118 if (!IsSigned)
11119 return FpToInt;
11120
11121 // Otherwise, select 0 if Src is NaN.
11122 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11123 EVT SetCCVT =
11124 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11125 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11126 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11127 }
11128
11129 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11130 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11131
11132 // Result of direct conversion. The assumption here is that the operation is
11133 // non-trapping and it's fine to apply it to an out-of-range value if we
11134 // select it away later.
11135 SDValue FpToInt =
11136 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11137
11138 SDValue Select = FpToInt;
11139
11140 EVT SetCCVT =
11141 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11142
11143 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11144 // MinInt if Src is NaN.
11145 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11146 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11147 // If Src OGT MaxFloat, select MaxInt.
11148 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11149 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11150
11151 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11152 // is already zero.
11153 if (!IsSigned)
11154 return Select;
11155
11156 // Otherwise, select 0 if Src is NaN.
11157 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11158 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11159 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11160}
11161
11163 const SDLoc &dl,
11164 SelectionDAG &DAG) const {
11165 EVT OperandVT = Op.getValueType();
11166 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11167 return Op;
11168 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11169 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11170 // can induce double-rounding which may alter the results. We can
11171 // correct for this using a trick explained in: Boldo, Sylvie, and
11172 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11173 // World Congress. 2005.
11174 unsigned BitSize = OperandVT.getScalarSizeInBits();
11175 EVT WideIntVT = OperandVT.changeTypeToInteger();
11176 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11177 SDValue SignBit =
11178 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11179 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11180 SDValue AbsWide;
11181 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11182 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11183 } else {
11184 SDValue ClearedSign = DAG.getNode(
11185 ISD::AND, dl, WideIntVT, OpAsInt,
11186 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11187 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11188 }
11189 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11190 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11191
11192 // We can keep the narrow value as-is if narrowing was exact (no
11193 // rounding error), the wide value was NaN (the narrow value is also
11194 // NaN and should be preserved) or if we rounded to the odd value.
11195 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11196 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11197 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11198 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11199 EVT ResultIntVTCCVT = getSetCCResultType(
11200 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11201 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11202 // The result is already odd so we don't need to do anything.
11203 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11204
11205 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11206 AbsWide.getValueType());
11207 // We keep results which are exact, odd or NaN.
11208 SDValue KeepNarrow =
11209 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11210 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11211 // We morally performed a round-down if AbsNarrow is smaller than
11212 // AbsWide.
11213 SDValue NarrowIsRd =
11214 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11215 // If the narrow value is odd or exact, pick it.
11216 // Otherwise, narrow is even and corresponds to either the rounded-up
11217 // or rounded-down value. If narrow is the rounded-down value, we want
11218 // the rounded-up value as it will be odd.
11219 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11220 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11221 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11222 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11223 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11224 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11225 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11226 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11227 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11228}
11229
11231 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11232 SDValue Op = Node->getOperand(0);
11233 EVT VT = Node->getValueType(0);
11234 SDLoc dl(Node);
11235 if (VT.getScalarType() == MVT::bf16) {
11236 if (Node->getConstantOperandVal(1) == 1) {
11237 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11238 }
11239 EVT OperandVT = Op.getValueType();
11240 SDValue IsNaN = DAG.getSetCC(
11241 dl,
11242 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11243 Op, Op, ISD::SETUO);
11244
11245 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11246 // can induce double-rounding which may alter the results. We can
11247 // correct for this using a trick explained in: Boldo, Sylvie, and
11248 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11249 // World Congress. 2005.
11250 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11251 EVT I32 = F32.changeTypeToInteger();
11252 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11253 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11254
11255 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11256 // turning into infinities.
11257 SDValue NaN =
11258 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11259
11260 // Factor in the contribution of the low 16 bits.
11261 SDValue One = DAG.getConstant(1, dl, I32);
11262 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11263 DAG.getShiftAmountConstant(16, I32, dl));
11264 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11265 SDValue RoundingBias =
11266 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11267 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11268
11269 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11270 // 0x80000000.
11271 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11272
11273 // Now that we have rounded, shift the bits into position.
11274 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11275 DAG.getShiftAmountConstant(16, I32, dl));
11276 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11277 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11278 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11279 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11280 }
11281 return SDValue();
11282}
11283
11285 SelectionDAG &DAG) const {
11286 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11287 assert(Node->getValueType(0).isScalableVector() &&
11288 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11289
11290 EVT VT = Node->getValueType(0);
11291 SDValue V1 = Node->getOperand(0);
11292 SDValue V2 = Node->getOperand(1);
11293 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11294 SDLoc DL(Node);
11295
11296 // Expand through memory thusly:
11297 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11298 // Store V1, Ptr
11299 // Store V2, Ptr + sizeof(V1)
11300 // If (Imm < 0)
11301 // TrailingElts = -Imm
11302 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11303 // else
11304 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11305 // Res = Load Ptr
11306
11307 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11308
11310 VT.getVectorElementCount() * 2);
11311 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11312 EVT PtrVT = StackPtr.getValueType();
11313 auto &MF = DAG.getMachineFunction();
11314 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11315 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11316
11317 // Store the lo part of CONCAT_VECTORS(V1, V2)
11318 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11319 // Store the hi part of CONCAT_VECTORS(V1, V2)
11320 SDValue OffsetToV2 = DAG.getVScale(
11321 DL, PtrVT,
11323 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11324 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11325
11326 if (Imm >= 0) {
11327 // Load back the required element. getVectorElementPointer takes care of
11328 // clamping the index if it's out-of-bounds.
11329 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11330 // Load the spliced result
11331 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11333 }
11334
11335 uint64_t TrailingElts = -Imm;
11336
11337 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11338 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11339 SDValue TrailingBytes =
11340 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11341
11342 if (TrailingElts > VT.getVectorMinNumElements()) {
11343 SDValue VLBytes =
11344 DAG.getVScale(DL, PtrVT,
11345 APInt(PtrVT.getFixedSizeInBits(),
11347 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11348 }
11349
11350 // Calculate the start address of the spliced result.
11351 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11352
11353 // Load the spliced result
11354 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11356}
11357
11359 SDValue &LHS, SDValue &RHS,
11360 SDValue &CC, SDValue Mask,
11361 SDValue EVL, bool &NeedInvert,
11362 const SDLoc &dl, SDValue &Chain,
11363 bool IsSignaling) const {
11364 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11365 MVT OpVT = LHS.getSimpleValueType();
11366 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11367 NeedInvert = false;
11368 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11369 bool IsNonVP = !EVL;
11370 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11371 default:
11372 llvm_unreachable("Unknown condition code action!");
11374 // Nothing to do.
11375 break;
11378 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11379 std::swap(LHS, RHS);
11380 CC = DAG.getCondCode(InvCC);
11381 return true;
11382 }
11383 // Swapping operands didn't work. Try inverting the condition.
11384 bool NeedSwap = false;
11385 InvCC = getSetCCInverse(CCCode, OpVT);
11386 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11387 // If inverting the condition is not enough, try swapping operands
11388 // on top of it.
11389 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11390 NeedSwap = true;
11391 }
11392 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11393 CC = DAG.getCondCode(InvCC);
11394 NeedInvert = true;
11395 if (NeedSwap)
11396 std::swap(LHS, RHS);
11397 return true;
11398 }
11399
11401 unsigned Opc = 0;
11402 switch (CCCode) {
11403 default:
11404 llvm_unreachable("Don't know how to expand this condition!");
11405 case ISD::SETUO:
11406 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11407 CC1 = ISD::SETUNE;
11408 CC2 = ISD::SETUNE;
11409 Opc = ISD::OR;
11410 break;
11411 }
11412 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11413 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11414 NeedInvert = true;
11415 [[fallthrough]];
11416 case ISD::SETO:
11417 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11418 "If SETO is expanded, SETOEQ must be legal!");
11419 CC1 = ISD::SETOEQ;
11420 CC2 = ISD::SETOEQ;
11421 Opc = ISD::AND;
11422 break;
11423 case ISD::SETONE:
11424 case ISD::SETUEQ:
11425 // If the SETUO or SETO CC isn't legal, we might be able to use
11426 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11427 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11428 // the operands.
11429 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11430 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11431 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11432 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11433 CC1 = ISD::SETOGT;
11434 CC2 = ISD::SETOLT;
11435 Opc = ISD::OR;
11436 NeedInvert = ((unsigned)CCCode & 0x8U);
11437 break;
11438 }
11439 [[fallthrough]];
11440 case ISD::SETOEQ:
11441 case ISD::SETOGT:
11442 case ISD::SETOGE:
11443 case ISD::SETOLT:
11444 case ISD::SETOLE:
11445 case ISD::SETUNE:
11446 case ISD::SETUGT:
11447 case ISD::SETUGE:
11448 case ISD::SETULT:
11449 case ISD::SETULE:
11450 // If we are floating point, assign and break, otherwise fall through.
11451 if (!OpVT.isInteger()) {
11452 // We can use the 4th bit to tell if we are the unordered
11453 // or ordered version of the opcode.
11454 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11455 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11456 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11457 break;
11458 }
11459 // Fallthrough if we are unsigned integer.
11460 [[fallthrough]];
11461 case ISD::SETLE:
11462 case ISD::SETGT:
11463 case ISD::SETGE:
11464 case ISD::SETLT:
11465 case ISD::SETNE:
11466 case ISD::SETEQ:
11467 // If all combinations of inverting the condition and swapping operands
11468 // didn't work then we have no means to expand the condition.
11469 llvm_unreachable("Don't know how to expand this condition!");
11470 }
11471
11472 SDValue SetCC1, SetCC2;
11473 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11474 // If we aren't the ordered or unorder operation,
11475 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11476 if (IsNonVP) {
11477 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11478 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11479 } else {
11480 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11481 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11482 }
11483 } else {
11484 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11485 if (IsNonVP) {
11486 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11487 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11488 } else {
11489 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11490 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11491 }
11492 }
11493 if (Chain)
11494 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11495 SetCC2.getValue(1));
11496 if (IsNonVP)
11497 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11498 else {
11499 // Transform the binary opcode to the VP equivalent.
11500 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11501 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11502 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11503 }
11504 RHS = SDValue();
11505 CC = SDValue();
11506 return true;
11507 }
11508 }
11509 return false;
11510}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT F32
amdgpu AMDGPU Register Bank Select
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1237
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1058
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1038
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:998
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1009
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1543
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1386
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:428
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:208
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:402
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1371
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1365
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:185
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:237
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1319
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:188
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:195
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:308
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1228
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1376
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:813
void negate()
Negate this APInt in place.
Definition: APInt.h:1429
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1556
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:198
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1490
unsigned countLeadingZeros() const
Definition: APInt.h:1564
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:335
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1718
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:454
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1298
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:384
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:313
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1129
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1346
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:852
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1368
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:368
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:265
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1403
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1614
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1322
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:461
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:968
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:564
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:724
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2978
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:737
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:505
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:495
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:551
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:834
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1431
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:670
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:736
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1073
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:508
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:999
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:432
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:433
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:823
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:682
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:450
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1005
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:665
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:620
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:845
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:869
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:777
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1612
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1617
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1587
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1754
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1600
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1440
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:483
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:250
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:262
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:244
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:202
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:278
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:222
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:178
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:310
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:51
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:94
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:156
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:275
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:196
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...