LLVM 23.0.0git
LegalizeVectorOps.cpp
Go to the documentation of this file.
1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
39#include "llvm/IR/DataLayout.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(std::make_pair(From, To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(std::make_pair(To, To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
142 SDValue ExpandSELECT(SDNode *Node);
143 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
144 SDValue ExpandStore(SDNode *N);
145 SDValue ExpandFNEG(SDNode *Node);
146 SDValue ExpandFABS(SDNode *Node);
147 SDValue ExpandFCOPYSIGN(SDNode *Node);
148 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 SDValue ExpandBITREVERSE(SDNode *Node);
151 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157
158 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
160
161 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements vector promotion.
164 ///
165 /// This is essentially just bitcasting the operands to a different type and
166 /// bitcasting the result back to the original type.
168
169 /// Implements [SU]INT_TO_FP vector promotion.
170 ///
171 /// This is a [zs]ext of the input operand to a larger integer type.
172 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
173
174 /// Implements FP_TO_[SU]INT vector promotion of the result type.
175 ///
176 /// It is promoted to a larger integer type. The result is then
177 /// truncated back to the original type.
178 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
179
180 /// Implements vector setcc operation promotion.
181 ///
182 /// All vector operands are promoted to a vector type with larger element
183 /// type.
184 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
185
186 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
187
188 /// Calculate the reduction using a type of higher precision and round the
189 /// result to match the original type. Setting NonArithmetic signifies the
190 /// rounding of the result does not affect its value.
191 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
192 bool NonArithmetic);
193
194 void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
195
196public:
197 VectorLegalizer(SelectionDAG& dag) :
198 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
199
200 /// Begin legalizer the vector operations in the DAG.
201 bool Run();
202};
203
204} // end anonymous namespace
205
206bool VectorLegalizer::Run() {
207 // Before we start legalizing vector nodes, check if there are any vectors.
208 bool HasVectors = false;
210 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
211 // Check if the values of the nodes contain vectors. We don't need to check
212 // the operands because we are going to check their values at some point.
213 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
214
215 // If we found a vector node we can start the legalization.
216 if (HasVectors)
217 break;
218 }
219
220 // If this basic block has no vectors then no need to legalize vectors.
221 if (!HasVectors)
222 return false;
223
224 // The legalize process is inherently a bottom-up recursive process (users
225 // legalize their uses before themselves). Given infinite stack space, we
226 // could just start legalizing on the root and traverse the whole graph. In
227 // practice however, this causes us to run out of stack space on large basic
228 // blocks. To avoid this problem, compute an ordering of the nodes where each
229 // node is only legalized after all of its operands are legalized.
232 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
233 LegalizeOp(SDValue(&*I, 0));
234
235 // Finally, it's possible the root changed. Get the new root.
236 SDValue OldRoot = DAG.getRoot();
237 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
238 DAG.setRoot(LegalizedNodes[OldRoot]);
239
240 LegalizedNodes.clear();
241
242 // Remove dead nodes now.
243 DAG.RemoveDeadNodes();
244
245 return Changed;
246}
247
248SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
249 assert(Op->getNumValues() == Result->getNumValues() &&
250 "Unexpected number of results");
251 // Generic legalization: just pass the operand through.
252 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
253 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
254 return SDValue(Result, Op.getResNo());
255}
256
258VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
260 assert(Results.size() == Op->getNumValues() &&
261 "Unexpected number of results");
262 // Make sure that the generated code is itself legal.
263 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
264 Results[i] = LegalizeOp(Results[i]);
265 AddLegalizedOperand(Op.getValue(i), Results[i]);
266 }
267
268 return Results[Op.getResNo()];
269}
270
271SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
272 // Note that LegalizeOp may be reentered even from single-use nodes, which
273 // means that we always must cache transformed nodes.
274 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
275 if (I != LegalizedNodes.end()) return I->second;
276
277 // Legalize the operands
279 for (const SDValue &Oper : Op->op_values())
280 Ops.push_back(LegalizeOp(Oper));
281
282 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
283
284 bool HasVectorValueOrOp =
285 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
286 llvm::any_of(Node->op_values(),
287 [](SDValue O) { return O.getValueType().isVector(); });
288 if (!HasVectorValueOrOp)
289 return TranslateLegalizeResults(Op, Node);
290
291 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
292 EVT ValVT;
293 switch (Op.getOpcode()) {
294 default:
295 return TranslateLegalizeResults(Op, Node);
296 case ISD::LOAD: {
297 LoadSDNode *LD = cast<LoadSDNode>(Node);
298 ISD::LoadExtType ExtType = LD->getExtensionType();
299 EVT LoadedVT = LD->getMemoryVT();
300 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
301 Action = TLI.getLoadAction(LD->getValueType(0), LoadedVT, LD->getAlign(),
302 LD->getAddressSpace(), ExtType, false);
303 break;
304 }
305 case ISD::STORE: {
306 StoreSDNode *ST = cast<StoreSDNode>(Node);
307 EVT StVT = ST->getMemoryVT();
308 MVT ValVT = ST->getValue().getSimpleValueType();
309 if (StVT.isVector() && ST->isTruncatingStore())
310 Action = TLI.getTruncStoreAction(ValVT, StVT);
311 break;
312 }
314 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
315 // This operation lies about being legal: when it claims to be legal,
316 // it should actually be expanded.
317 if (Action == TargetLowering::Legal)
318 Action = TargetLowering::Expand;
319 break;
320#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
321 case ISD::STRICT_##DAGN:
322#include "llvm/IR/ConstrainedOps.def"
323 ValVT = Node->getValueType(0);
324 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
325 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
326 ValVT = Node->getOperand(1).getValueType();
327 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
328 Op.getOpcode() == ISD::STRICT_FSETCCS) {
329 MVT OpVT = Node->getOperand(1).getSimpleValueType();
330 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
331 Action = TLI.getCondCodeAction(CCCode, OpVT);
332 if (Action == TargetLowering::Legal)
333 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
334 } else {
335 Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
336 }
337 // If we're asked to expand a strict vector floating-point operation,
338 // by default we're going to simply unroll it. That is usually the
339 // best approach, except in the case where the resulting strict (scalar)
340 // operations would themselves use the fallback mutation to non-strict.
341 // In that specific case, just do the fallback on the vector op.
342 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
343 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
344 TargetLowering::Legal) {
345 EVT EltVT = ValVT.getVectorElementType();
346 if (TLI.getOperationAction(Node->getOpcode(), EltVT)
347 == TargetLowering::Expand &&
348 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
349 == TargetLowering::Legal)
350 Action = TargetLowering::Legal;
351 }
352 break;
353 case ISD::ADD:
354 case ISD::SUB:
355 case ISD::MUL:
356 case ISD::MULHS:
357 case ISD::MULHU:
358 case ISD::SDIV:
359 case ISD::UDIV:
360 case ISD::SREM:
361 case ISD::UREM:
362 case ISD::SDIVREM:
363 case ISD::UDIVREM:
364 case ISD::FADD:
365 case ISD::FSUB:
366 case ISD::FMUL:
367 case ISD::FDIV:
368 case ISD::FREM:
369 case ISD::AND:
370 case ISD::OR:
371 case ISD::XOR:
372 case ISD::SHL:
373 case ISD::SRA:
374 case ISD::SRL:
375 case ISD::FSHL:
376 case ISD::FSHR:
377 case ISD::ROTL:
378 case ISD::ROTR:
379 case ISD::ABS:
380 case ISD::ABDS:
381 case ISD::ABDU:
382 case ISD::AVGCEILS:
383 case ISD::AVGCEILU:
384 case ISD::AVGFLOORS:
385 case ISD::AVGFLOORU:
386 case ISD::BSWAP:
387 case ISD::BITREVERSE:
388 case ISD::CTLZ:
389 case ISD::CTTZ:
392 case ISD::CTPOP:
393 case ISD::CLMUL:
394 case ISD::CLMULH:
395 case ISD::CLMULR:
396 case ISD::SELECT:
397 case ISD::VSELECT:
398 case ISD::SELECT_CC:
399 case ISD::ZERO_EXTEND:
400 case ISD::ANY_EXTEND:
401 case ISD::TRUNCATE:
402 case ISD::SIGN_EXTEND:
403 case ISD::FP_TO_SINT:
404 case ISD::FP_TO_UINT:
405 case ISD::FNEG:
406 case ISD::FABS:
407 case ISD::FMINNUM:
408 case ISD::FMAXNUM:
411 case ISD::FMINIMUM:
412 case ISD::FMAXIMUM:
413 case ISD::FMINIMUMNUM:
414 case ISD::FMAXIMUMNUM:
415 case ISD::FCOPYSIGN:
416 case ISD::FSQRT:
417 case ISD::FSIN:
418 case ISD::FCOS:
419 case ISD::FTAN:
420 case ISD::FASIN:
421 case ISD::FACOS:
422 case ISD::FATAN:
423 case ISD::FATAN2:
424 case ISD::FSINH:
425 case ISD::FCOSH:
426 case ISD::FTANH:
427 case ISD::FLDEXP:
428 case ISD::FPOWI:
429 case ISD::FPOW:
430 case ISD::FCBRT:
431 case ISD::FLOG:
432 case ISD::FLOG2:
433 case ISD::FLOG10:
434 case ISD::FEXP:
435 case ISD::FEXP2:
436 case ISD::FEXP10:
437 case ISD::FCEIL:
438 case ISD::FTRUNC:
439 case ISD::FRINT:
440 case ISD::FNEARBYINT:
441 case ISD::FROUND:
442 case ISD::FROUNDEVEN:
443 case ISD::FFLOOR:
444 case ISD::FP_ROUND:
445 case ISD::FP_EXTEND:
447 case ISD::FMA:
452 case ISD::SMIN:
453 case ISD::SMAX:
454 case ISD::UMIN:
455 case ISD::UMAX:
456 case ISD::SMUL_LOHI:
457 case ISD::UMUL_LOHI:
458 case ISD::SADDO:
459 case ISD::UADDO:
460 case ISD::SSUBO:
461 case ISD::USUBO:
462 case ISD::SMULO:
463 case ISD::UMULO:
466 case ISD::FFREXP:
467 case ISD::FMODF:
468 case ISD::FSINCOS:
469 case ISD::FSINCOSPI:
470 case ISD::SADDSAT:
471 case ISD::UADDSAT:
472 case ISD::SSUBSAT:
473 case ISD::USUBSAT:
474 case ISD::SSHLSAT:
475 case ISD::USHLSAT:
478 case ISD::MGATHER:
480 case ISD::SCMP:
481 case ISD::UCMP:
484 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
485 break;
486 case ISD::SMULFIX:
487 case ISD::SMULFIXSAT:
488 case ISD::UMULFIX:
489 case ISD::UMULFIXSAT:
490 case ISD::SDIVFIX:
491 case ISD::SDIVFIXSAT:
492 case ISD::UDIVFIX:
493 case ISD::UDIVFIXSAT: {
494 unsigned Scale = Node->getConstantOperandVal(2);
495 Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
496 Node->getValueType(0), Scale);
497 break;
498 }
499 case ISD::LROUND:
500 case ISD::LLROUND:
501 case ISD::LRINT:
502 case ISD::LLRINT:
503 case ISD::SINT_TO_FP:
504 case ISD::UINT_TO_FP:
521 Action = TLI.getOperationAction(Node->getOpcode(),
522 Node->getOperand(0).getValueType());
523 break;
526 Action = TLI.getOperationAction(Node->getOpcode(),
527 Node->getOperand(1).getValueType());
528 break;
529 case ISD::SETCC: {
530 MVT OpVT = Node->getOperand(0).getSimpleValueType();
531 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
532 Action = TLI.getCondCodeAction(CCCode, OpVT);
533 if (Action == TargetLowering::Legal)
534 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
535 break;
536 }
541 Action =
542 TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0),
543 Node->getOperand(1).getValueType());
544 break;
545
546#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
547 case ISD::VPID: { \
548 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
549 : Node->getOperand(LEGALPOS).getValueType(); \
550 if (ISD::VPID == ISD::VP_SETCC) { \
551 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
552 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
553 if (Action != TargetLowering::Legal) \
554 break; \
555 } \
556 /* Defer non-vector results to LegalizeDAG. */ \
557 if (!Node->getValueType(0).isVector() && \
558 Node->getValueType(0) != MVT::Other) { \
559 Action = TargetLowering::Legal; \
560 break; \
561 } \
562 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
563 } break;
564#include "llvm/IR/VPIntrinsics.def"
565 }
566
567 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
568
569 SmallVector<SDValue, 8> ResultVals;
570 switch (Action) {
571 default: llvm_unreachable("This action is not supported yet!");
572 case TargetLowering::Promote:
573 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
574 "This action is not supported yet!");
575 LLVM_DEBUG(dbgs() << "Promoting\n");
576 Promote(Node, ResultVals);
577 assert(!ResultVals.empty() && "No results for promotion?");
578 break;
579 case TargetLowering::Legal:
580 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
581 break;
582 case TargetLowering::Custom:
583 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
584 if (LowerOperationWrapper(Node, ResultVals))
585 break;
586 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
587 [[fallthrough]];
588 case TargetLowering::Expand:
589 LLVM_DEBUG(dbgs() << "Expanding\n");
590 Expand(Node, ResultVals);
591 break;
592 }
593
594 if (ResultVals.empty())
595 return TranslateLegalizeResults(Op, Node);
596
597 Changed = true;
598 return RecursivelyLegalizeResults(Op, ResultVals);
599}
600
601// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
602// merge them somehow?
603bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
604 SmallVectorImpl<SDValue> &Results) {
605 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
606
607 if (!Res.getNode())
608 return false;
609
610 if (Res == SDValue(Node, 0))
611 return true;
612
613 // If the original node has one result, take the return value from
614 // LowerOperation as is. It might not be result number 0.
615 if (Node->getNumValues() == 1) {
616 Results.push_back(Res);
617 return true;
618 }
619
620 // If the original node has multiple results, then the return node should
621 // have the same number of results.
622 assert((Node->getNumValues() == Res->getNumValues()) &&
623 "Lowering returned the wrong number of results!");
624
625 // Places new result values base on N result number.
626 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
627 Results.push_back(Res.getValue(I));
628
629 return true;
630}
631
632void VectorLegalizer::PromoteSETCC(SDNode *Node,
633 SmallVectorImpl<SDValue> &Results) {
634 MVT VecVT = Node->getOperand(0).getSimpleValueType();
635 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
636
637 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
638
639 SDLoc DL(Node);
640 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
641
642 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
643 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
644 Operands[2] = Node->getOperand(2);
645
646 if (Node->getOpcode() == ISD::VP_SETCC) {
647 Operands[3] = Node->getOperand(3); // mask
648 Operands[4] = Node->getOperand(4); // evl
649 }
650
651 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
652 Operands, Node->getFlags());
653
654 Results.push_back(Res);
655}
656
657void VectorLegalizer::PromoteSTRICT(SDNode *Node,
658 SmallVectorImpl<SDValue> &Results) {
659 MVT VecVT = Node->getOperand(1).getSimpleValueType();
660 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
661
662 assert(VecVT.isFloatingPoint());
663
664 SDLoc DL(Node);
665 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
667
668 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
669 if (Node->getOperand(j).getValueType().isVector() &&
670 !(ISD::isVPOpcode(Node->getOpcode()) &&
671 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
672 {
673 // promote the vector operand.
674 SDValue Ext =
675 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
676 {Node->getOperand(0), Node->getOperand(j)});
677 Operands[j] = Ext.getValue(0);
678 Chains.push_back(Ext.getValue(1));
679 } else
680 Operands[j] = Node->getOperand(j); // Skip no vector operand.
681
682 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
683
684 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
685
686 SDValue Res =
687 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
688
689 SDValue Round =
690 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
691 {Res.getValue(1), Res.getValue(0),
692 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
693
694 Results.push_back(Round.getValue(0));
695 Results.push_back(Round.getValue(1));
696}
697
698void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
699 SmallVectorImpl<SDValue> &Results,
700 bool NonArithmetic) {
701 MVT OpVT = Node->getOperand(0).getSimpleValueType();
702 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
703 MVT NewOpVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OpVT);
704
705 SDLoc DL(Node);
706 SDValue NewOp = DAG.getNode(ISD::FP_EXTEND, DL, NewOpVT, Node->getOperand(0));
707 SDValue Rdx =
708 DAG.getNode(Node->getOpcode(), DL, NewOpVT.getVectorElementType(), NewOp,
709 Node->getFlags());
710 SDValue Res =
711 DAG.getNode(ISD::FP_ROUND, DL, Node->getValueType(0), Rdx,
712 DAG.getIntPtrConstant(NonArithmetic, DL, /*isTarget=*/true));
713 Results.push_back(Res);
714}
715
716void VectorLegalizer::PromoteVECTOR_COMPRESS(
717 SDNode *Node, SmallVectorImpl<SDValue> &Results) {
718 SDLoc DL(Node);
719 EVT VT = Node->getValueType(0);
720 MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
721 assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
722 "Only integer promotion or bitcasts between types is supported");
723
724 SDValue Vec = Node->getOperand(0);
725 SDValue Mask = Node->getOperand(1);
726 SDValue Passthru = Node->getOperand(2);
727 if (VT.isInteger()) {
728 Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
729 Mask = TLI.promoteTargetBoolean(DAG, Mask, PromotedVT);
730 Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
731 } else {
732 Vec = DAG.getBitcast(PromotedVT, Vec);
733 Passthru = DAG.getBitcast(PromotedVT, Passthru);
734 }
735
737 DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
738 Result = VT.isInteger() ? DAG.getNode(ISD::TRUNCATE, DL, VT, Result)
739 : DAG.getBitcast(VT, Result);
740 Results.push_back(Result);
741}
742
743void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
744 // For a few operations there is a specific concept for promotion based on
745 // the operand's type.
746 switch (Node->getOpcode()) {
747 case ISD::SINT_TO_FP:
748 case ISD::UINT_TO_FP:
751 // "Promote" the operation by extending the operand.
752 PromoteINT_TO_FP(Node, Results);
753 return;
754 case ISD::FP_TO_UINT:
755 case ISD::FP_TO_SINT:
758 // Promote the operation by extending the operand.
759 PromoteFP_TO_INT(Node, Results);
760 return;
761 case ISD::VP_SETCC:
762 case ISD::SETCC:
763 // Promote the operation by extending the operand.
764 PromoteSETCC(Node, Results);
765 return;
766 case ISD::STRICT_FADD:
767 case ISD::STRICT_FSUB:
768 case ISD::STRICT_FMUL:
769 case ISD::STRICT_FDIV:
771 case ISD::STRICT_FMA:
772 PromoteSTRICT(Node, Results);
773 return;
775 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
776 return;
781 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
782 return;
784 PromoteVECTOR_COMPRESS(Node, Results);
785 return;
786
787 case ISD::FP_ROUND:
788 case ISD::FP_EXTEND:
789 // These operations are used to do promotion so they can't be promoted
790 // themselves.
791 llvm_unreachable("Don't know how to promote this operation!");
792 case ISD::VP_FABS:
793 case ISD::VP_FCOPYSIGN:
794 case ISD::VP_FNEG:
795 // Promoting fabs, fneg, and fcopysign changes their semantics.
796 llvm_unreachable("These operations should not be promoted");
797 }
798
799 // There are currently two cases of vector promotion:
800 // 1) Bitcasting a vector of integers to a different type to a vector of the
801 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
802 // 2) Extending a vector of floats to a vector of the same number of larger
803 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
804 assert(Node->getNumValues() == 1 &&
805 "Can't promote a vector with multiple results!");
806 MVT VT = Node->getSimpleValueType(0);
807 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
808 SDLoc dl(Node);
809 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
810
811 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
812 // Do not promote the mask operand of a VP OP.
813 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
814 ISD::getVPMaskIdx(Node->getOpcode()) == j;
815 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
816 if (Node->getOperand(j)
817 .getValueType()
818 .getVectorElementType()
819 .isFloatingPoint() &&
821 if (ISD::isVPOpcode(Node->getOpcode())) {
822 unsigned EVLIdx =
824 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
825 Operands[j] =
826 DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
827 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
828 } else {
829 Operands[j] =
830 DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
831 }
832 else
833 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
834 else
835 Operands[j] = Node->getOperand(j);
836 }
837
838 SDValue Res =
839 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
840
841 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
844 if (ISD::isVPOpcode(Node->getOpcode())) {
845 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
846 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
847 Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res,
848 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
849 } else {
850 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
851 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
852 }
853 else
854 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
855
856 Results.push_back(Res);
857}
858
859void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
860 SmallVectorImpl<SDValue> &Results) {
861 // INT_TO_FP operations may require the input operand be promoted even
862 // when the type is otherwise legal.
863 bool IsStrict = Node->isStrictFPOpcode();
864 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
865 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
867 "Vectors have different number of elements!");
868
869 SDLoc dl(Node);
870 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
871
872 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
873 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
876 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
877 if (Node->getOperand(j).getValueType().isVector())
878 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
879 else
880 Operands[j] = Node->getOperand(j);
881 }
882
883 if (IsStrict) {
884 SDValue Res = DAG.getNode(Node->getOpcode(), dl,
885 {Node->getValueType(0), MVT::Other}, Operands);
886 Results.push_back(Res);
887 Results.push_back(Res.getValue(1));
888 return;
889 }
890
891 SDValue Res =
892 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
893 Results.push_back(Res);
894}
895
896// For FP_TO_INT we promote the result type to a vector type with wider
897// elements and then truncate the result. This is different from the default
898// PromoteVector which uses bitcast to promote thus assumning that the
899// promoted vector type has the same overall size.
900void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
901 SmallVectorImpl<SDValue> &Results) {
902 MVT VT = Node->getSimpleValueType(0);
903 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
904 bool IsStrict = Node->isStrictFPOpcode();
906 "Vectors have different number of elements!");
907
908 unsigned NewOpc = Node->getOpcode();
909 // Change FP_TO_UINT to FP_TO_SINT if possible.
910 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
911 if (NewOpc == ISD::FP_TO_UINT &&
913 NewOpc = ISD::FP_TO_SINT;
914
915 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
917 NewOpc = ISD::STRICT_FP_TO_SINT;
918
919 SDLoc dl(Node);
920 SDValue Promoted, Chain;
921 if (IsStrict) {
922 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
923 {Node->getOperand(0), Node->getOperand(1)});
924 Chain = Promoted.getValue(1);
925 } else
926 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
927
928 // Assert that the converted value fits in the original type. If it doesn't
929 // (eg: because the value being converted is too big), then the result of the
930 // original operation was undefined anyway, so the assert is still correct.
931 if (Node->getOpcode() == ISD::FP_TO_UINT ||
932 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
933 NewOpc = ISD::AssertZext;
934 else
935 NewOpc = ISD::AssertSext;
936
937 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
938 DAG.getValueType(VT.getScalarType()));
939 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
940 Results.push_back(Promoted);
941 if (IsStrict)
942 Results.push_back(Chain);
943}
944
945std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
946 LoadSDNode *LD = cast<LoadSDNode>(N);
947 return TLI.scalarizeVectorLoad(LD, DAG);
948}
949
950SDValue VectorLegalizer::ExpandStore(SDNode *N) {
951 StoreSDNode *ST = cast<StoreSDNode>(N);
952 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
953 return TF;
954}
955
956void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
957 switch (Node->getOpcode()) {
958 case ISD::LOAD: {
959 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
960 Results.push_back(Tmp.first);
961 Results.push_back(Tmp.second);
962 return;
963 }
964 case ISD::STORE:
965 Results.push_back(ExpandStore(Node));
966 return;
968 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
969 Results.push_back(Node->getOperand(i));
970 return;
972 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
973 Results.push_back(Expanded);
974 return;
975 }
976 break;
978 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
979 return;
981 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
982 return;
984 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
985 return;
986 case ISD::BSWAP:
987 if (SDValue Expanded = ExpandBSWAP(Node)) {
988 Results.push_back(Expanded);
989 return;
990 }
991 break;
992 case ISD::VP_BSWAP:
993 Results.push_back(TLI.expandVPBSWAP(Node, DAG));
994 return;
995 case ISD::VSELECT:
996 if (SDValue Expanded = ExpandVSELECT(Node)) {
997 Results.push_back(Expanded);
998 return;
999 }
1000 break;
1001 case ISD::VP_SELECT:
1002 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
1003 Results.push_back(Expanded);
1004 return;
1005 }
1006 break;
1007 case ISD::VP_SREM:
1008 case ISD::VP_UREM:
1009 if (SDValue Expanded = ExpandVP_REM(Node)) {
1010 Results.push_back(Expanded);
1011 return;
1012 }
1013 break;
1014 case ISD::VP_FNEG:
1015 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
1016 Results.push_back(Expanded);
1017 return;
1018 }
1019 break;
1020 case ISD::VP_FABS:
1021 if (SDValue Expanded = ExpandVP_FABS(Node)) {
1022 Results.push_back(Expanded);
1023 return;
1024 }
1025 break;
1026 case ISD::VP_FCOPYSIGN:
1027 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
1028 Results.push_back(Expanded);
1029 return;
1030 }
1031 break;
1032 case ISD::SELECT:
1033 if (SDValue Expanded = ExpandSELECT(Node)) {
1034 Results.push_back(Expanded);
1035 return;
1036 }
1037 break;
1038 case ISD::SELECT_CC: {
1039 if (Node->getValueType(0).isScalableVector()) {
1040 EVT CondVT = TLI.getSetCCResultType(
1041 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
1042 SDValue SetCC =
1043 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
1044 Node->getOperand(1), Node->getOperand(4));
1045 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
1046 Node->getOperand(2),
1047 Node->getOperand(3)));
1048 return;
1049 }
1050 break;
1051 }
1052 case ISD::FP_TO_UINT:
1053 ExpandFP_TO_UINT(Node, Results);
1054 return;
1055 case ISD::UINT_TO_FP:
1056 ExpandUINT_TO_FLOAT(Node, Results);
1057 return;
1058 case ISD::FNEG:
1059 if (SDValue Expanded = ExpandFNEG(Node)) {
1060 Results.push_back(Expanded);
1061 return;
1062 }
1063 break;
1064 case ISD::FABS:
1065 if (SDValue Expanded = ExpandFABS(Node)) {
1066 Results.push_back(Expanded);
1067 return;
1068 }
1069 break;
1070 case ISD::FCOPYSIGN:
1071 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1072 Results.push_back(Expanded);
1073 return;
1074 }
1075 break;
1076 case ISD::FSUB:
1077 ExpandFSUB(Node, Results);
1078 return;
1079 case ISD::SETCC:
1080 case ISD::VP_SETCC:
1081 ExpandSETCC(Node, Results);
1082 return;
1083 case ISD::ABS:
1084 if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
1085 Results.push_back(Expanded);
1086 return;
1087 }
1088 break;
1089 case ISD::ABDS:
1090 case ISD::ABDU:
1091 if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
1092 Results.push_back(Expanded);
1093 return;
1094 }
1095 break;
1096 case ISD::AVGCEILS:
1097 case ISD::AVGCEILU:
1098 case ISD::AVGFLOORS:
1099 case ISD::AVGFLOORU:
1100 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
1101 Results.push_back(Expanded);
1102 return;
1103 }
1104 break;
1105 case ISD::BITREVERSE:
1106 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1107 Results.push_back(Expanded);
1108 return;
1109 }
1110 break;
1111 case ISD::VP_BITREVERSE:
1112 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
1113 Results.push_back(Expanded);
1114 return;
1115 }
1116 break;
1117 case ISD::CTPOP:
1118 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
1119 Results.push_back(Expanded);
1120 return;
1121 }
1122 break;
1123 case ISD::VP_CTPOP:
1124 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
1125 Results.push_back(Expanded);
1126 return;
1127 }
1128 break;
1129 case ISD::CTLZ:
1131 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
1132 Results.push_back(Expanded);
1133 return;
1134 }
1135 break;
1136 case ISD::VP_CTLZ:
1137 case ISD::VP_CTLZ_ZERO_UNDEF:
1138 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
1139 Results.push_back(Expanded);
1140 return;
1141 }
1142 break;
1143 case ISD::CTTZ:
1145 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
1146 Results.push_back(Expanded);
1147 return;
1148 }
1149 break;
1150 case ISD::VP_CTTZ:
1151 case ISD::VP_CTTZ_ZERO_UNDEF:
1152 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
1153 Results.push_back(Expanded);
1154 return;
1155 }
1156 break;
1157 case ISD::FSHL:
1158 case ISD::VP_FSHL:
1159 case ISD::FSHR:
1160 case ISD::VP_FSHR:
1161 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
1162 Results.push_back(Expanded);
1163 return;
1164 }
1165 break;
1166 case ISD::CLMUL:
1167 case ISD::CLMULR:
1168 case ISD::CLMULH:
1169 if (SDValue Expanded = TLI.expandCLMUL(Node, DAG)) {
1170 Results.push_back(Expanded);
1171 return;
1172 }
1173 break;
1174 case ISD::ROTL:
1175 case ISD::ROTR:
1176 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
1177 Results.push_back(Expanded);
1178 return;
1179 }
1180 break;
1181 case ISD::FMINNUM:
1182 case ISD::FMAXNUM:
1183 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
1184 Results.push_back(Expanded);
1185 return;
1186 }
1187 break;
1188 case ISD::FMINIMUM:
1189 case ISD::FMAXIMUM:
1190 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
1191 return;
1192 case ISD::FMINIMUMNUM:
1193 case ISD::FMAXIMUMNUM:
1194 Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
1195 return;
1196 case ISD::SMIN:
1197 case ISD::SMAX:
1198 case ISD::UMIN:
1199 case ISD::UMAX:
1200 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1201 Results.push_back(Expanded);
1202 return;
1203 }
1204 break;
1205 case ISD::UADDO:
1206 case ISD::USUBO:
1207 ExpandUADDSUBO(Node, Results);
1208 return;
1209 case ISD::SADDO:
1210 case ISD::SSUBO:
1211 ExpandSADDSUBO(Node, Results);
1212 return;
1213 case ISD::UMULO:
1214 case ISD::SMULO:
1215 ExpandMULO(Node, Results);
1216 return;
1217 case ISD::USUBSAT:
1218 case ISD::SSUBSAT:
1219 case ISD::UADDSAT:
1220 case ISD::SADDSAT:
1221 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1222 Results.push_back(Expanded);
1223 return;
1224 }
1225 break;
1226 case ISD::USHLSAT:
1227 case ISD::SSHLSAT:
1228 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1229 Results.push_back(Expanded);
1230 return;
1231 }
1232 break;
1235 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1236 if (Node->getValueType(0).isScalableVector()) {
1237 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
1238 Results.push_back(Expanded);
1239 return;
1240 }
1241 }
1242 break;
1243 case ISD::SMULFIX:
1244 case ISD::UMULFIX:
1245 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1246 Results.push_back(Expanded);
1247 return;
1248 }
1249 break;
1250 case ISD::SMULFIXSAT:
1251 case ISD::UMULFIXSAT:
1252 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1253 // why. Maybe it results in worse codegen compared to the unroll for some
1254 // targets? This should probably be investigated. And if we still prefer to
1255 // unroll an explanation could be helpful.
1256 break;
1257 case ISD::SDIVFIX:
1258 case ISD::UDIVFIX:
1259 ExpandFixedPointDiv(Node, Results);
1260 return;
1261 case ISD::SDIVFIXSAT:
1262 case ISD::UDIVFIXSAT:
1263 break;
1264#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1265 case ISD::STRICT_##DAGN:
1266#include "llvm/IR/ConstrainedOps.def"
1267 ExpandStrictFPOp(Node, Results);
1268 return;
1269 case ISD::VECREDUCE_ADD:
1270 case ISD::VECREDUCE_MUL:
1271 case ISD::VECREDUCE_AND:
1272 case ISD::VECREDUCE_OR:
1273 case ISD::VECREDUCE_XOR:
1284 Results.push_back(TLI.expandVecReduce(Node, DAG));
1285 return;
1290 Results.push_back(TLI.expandPartialReduceMLA(Node, DAG));
1291 return;
1294 Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
1295 return;
1296 case ISD::SREM:
1297 case ISD::UREM:
1298 ExpandREM(Node, Results);
1299 return;
1300 case ISD::VP_MERGE:
1301 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1302 Results.push_back(Expanded);
1303 return;
1304 }
1305 break;
1306 case ISD::FREM: {
1307 RTLIB::Libcall LC = RTLIB::getREM(Node->getValueType(0));
1308 if (tryExpandVecMathCall(Node, LC, Results))
1309 return;
1310
1311 break;
1312 }
1313 case ISD::FSINCOS:
1314 case ISD::FSINCOSPI: {
1315 EVT VT = Node->getValueType(0);
1316 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1317 ? RTLIB::getSINCOS(VT)
1318 : RTLIB::getSINCOSPI(VT);
1319 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1320 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
1321 return;
1322
1323 // TODO: Try to see if there's a narrower call available to use before
1324 // scalarizing.
1325 break;
1326 }
1327 case ISD::FPOW: {
1328 RTLIB::Libcall LC = RTLIB::getPOW(Node->getValueType(0));
1329 if (tryExpandVecMathCall(Node, LC, Results))
1330 return;
1331
1332 // TODO: Try to see if there's a narrower call available to use before
1333 // scalarizing.
1334 break;
1335 }
1336 case ISD::FCBRT: {
1337 RTLIB::Libcall LC = RTLIB::getCBRT(Node->getValueType(0));
1338 if (tryExpandVecMathCall(Node, LC, Results))
1339 return;
1340
1341 // TODO: Try to see if there's a narrower call available to use before
1342 // scalarizing.
1343 break;
1344 }
1345 case ISD::FMODF: {
1346 EVT VT = Node->getValueType(0);
1347 RTLIB::Libcall LC = RTLIB::getMODF(VT);
1348 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1349 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
1350 /*CallRetResNo=*/0))
1351 return;
1352 break;
1353 }
1355 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
1356 return;
1358 Results.push_back(TLI.expandVectorFindLastActive(Node, DAG));
1359 return;
1360 case ISD::SCMP:
1361 case ISD::UCMP:
1362 Results.push_back(TLI.expandCMP(Node, DAG));
1363 return;
1366 Results.push_back(ExpandLOOP_DEPENDENCE_MASK(Node));
1367 return;
1368
1369 case ISD::FADD:
1370 case ISD::FMUL:
1371 case ISD::FMA:
1372 case ISD::FDIV:
1373 case ISD::FCEIL:
1374 case ISD::FFLOOR:
1375 case ISD::FNEARBYINT:
1376 case ISD::FRINT:
1377 case ISD::FROUND:
1378 case ISD::FROUNDEVEN:
1379 case ISD::FTRUNC:
1380 case ISD::FSQRT:
1381 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1382 Results.push_back(Expanded);
1383 return;
1384 }
1385 break;
1386 }
1387
1388 SDValue Unrolled = DAG.UnrollVectorOp(Node);
1389 if (Node->getNumValues() == 1) {
1390 Results.push_back(Unrolled);
1391 } else {
1392 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1393 "VectorLegalizer Expand returned wrong number of results!");
1394 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1395 Results.push_back(Unrolled.getValue(I));
1396 }
1397}
1398
1399SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1400 // Lower a select instruction where the condition is a scalar and the
1401 // operands are vectors. Lower this select to VSELECT and implement it
1402 // using XOR AND OR. The selector bit is broadcasted.
1403 EVT VT = Node->getValueType(0);
1404 SDLoc DL(Node);
1405
1406 SDValue Mask = Node->getOperand(0);
1407 SDValue Op1 = Node->getOperand(1);
1408 SDValue Op2 = Node->getOperand(2);
1409
1410 assert(VT.isVector() && !Mask.getValueType().isVector()
1411 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1412
1413 // If we can't even use the basic vector operations of
1414 // AND,OR,XOR, we will have to scalarize the op.
1415 // Notice that the operation may be 'promoted' which means that it is
1416 // 'bitcasted' to another type which is handled.
1417 // Also, we need to be able to construct a splat vector using either
1418 // BUILD_VECTOR or SPLAT_VECTOR.
1419 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1420 // BUILD_VECTOR?
1421 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1422 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1423 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1426 VT) == TargetLowering::Expand)
1427 return SDValue();
1428
1429 // Generate a mask operand.
1430 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1431
1432 // What is the size of each element in the vector mask.
1433 EVT BitTy = MaskTy.getScalarType();
1434
1435 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1436 DAG.getConstant(0, DL, BitTy));
1437
1438 // Broadcast the mask so that the entire vector is all one or all zero.
1439 Mask = DAG.getSplat(MaskTy, DL, Mask);
1440
1441 // Bitcast the operands to be the same type as the mask.
1442 // This is needed when we select between FP types because
1443 // the mask is a vector of integers.
1444 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1445 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1446
1447 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1448
1449 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1450 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1451 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1452 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1453}
1454
1455SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1456 EVT VT = Node->getValueType(0);
1457
1458 // Make sure that the SRA and SHL instructions are available.
1459 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1460 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1461 return SDValue();
1462
1463 SDLoc DL(Node);
1464 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1465
1466 unsigned BW = VT.getScalarSizeInBits();
1467 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1468 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1469
1470 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1471 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1472}
1473
1474// Generically expand a vector anyext in register to a shuffle of the relevant
1475// lanes into the appropriate locations, with other lanes left undef.
1476SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1477 SDLoc DL(Node);
1478 EVT VT = Node->getValueType(0);
1479 int NumElements = VT.getVectorNumElements();
1480 SDValue Src = Node->getOperand(0);
1481 EVT SrcVT = Src.getValueType();
1482 int NumSrcElements = SrcVT.getVectorNumElements();
1483
1484 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1485 // into a larger vector type.
1486 if (SrcVT.bitsLE(VT)) {
1487 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1488 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1489 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1490 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1491 NumSrcElements);
1492 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1493 }
1494
1495 // Build a base mask of undef shuffles.
1496 SmallVector<int, 16> ShuffleMask;
1497 ShuffleMask.resize(NumSrcElements, -1);
1498
1499 // Place the extended lanes into the correct locations.
1500 int ExtLaneScale = NumSrcElements / NumElements;
1501 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1502 for (int i = 0; i < NumElements; ++i)
1503 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1504
1505 return DAG.getNode(
1506 ISD::BITCAST, DL, VT,
1507 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getPOISON(SrcVT), ShuffleMask));
1508}
1509
1510SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1511 SDLoc DL(Node);
1512 EVT VT = Node->getValueType(0);
1513 SDValue Src = Node->getOperand(0);
1514 EVT SrcVT = Src.getValueType();
1515
1516 // First build an any-extend node which can be legalized above when we
1517 // recurse through it.
1519
1520 // Now we need sign extend. Do this by shifting the elements. Even if these
1521 // aren't legal operations, they have a better chance of being legalized
1522 // without full scalarization than the sign extension does.
1523 unsigned EltWidth = VT.getScalarSizeInBits();
1524 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1525 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1526 return DAG.getNode(ISD::SRA, DL, VT,
1527 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1528 ShiftAmount);
1529}
1530
1531// Generically expand a vector zext in register to a shuffle of the relevant
1532// lanes into the appropriate locations, a blend of zero into the high bits,
1533// and a bitcast to the wider element type.
1534SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1535 SDLoc DL(Node);
1536 EVT VT = Node->getValueType(0);
1537 int NumElements = VT.getVectorNumElements();
1538 SDValue Src = Node->getOperand(0);
1539 EVT SrcVT = Src.getValueType();
1540 int NumSrcElements = SrcVT.getVectorNumElements();
1541
1542 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1543 // into a larger vector type.
1544 if (SrcVT.bitsLE(VT)) {
1545 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1546 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1547 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1548 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1549 NumSrcElements);
1550 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1551 }
1552
1553 // Build up a zero vector to blend into this one.
1554 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1555
1556 // Shuffle the incoming lanes into the correct position, and pull all other
1557 // lanes from the zero vector.
1558 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1559
1560 int ExtLaneScale = NumSrcElements / NumElements;
1561 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1562 for (int i = 0; i < NumElements; ++i)
1563 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1564
1565 return DAG.getNode(ISD::BITCAST, DL, VT,
1566 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1567}
1568
1569static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1570 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1571 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1572 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1573 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1574}
1575
1576SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1577 EVT VT = Node->getValueType(0);
1578
1579 // Scalable vectors can't use shuffle expansion.
1580 if (VT.isScalableVector())
1581 return TLI.expandBSWAP(Node, DAG);
1582
1583 // Generate a byte wise shuffle mask for the BSWAP.
1584 SmallVector<int, 16> ShuffleMask;
1585 createBSWAPShuffleMask(VT, ShuffleMask);
1586 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1587
1588 // Only emit a shuffle if the mask is legal.
1589 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1590 SDLoc DL(Node);
1591 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1592 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getPOISON(ByteVT),
1593 ShuffleMask);
1594 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1595 }
1596
1597 // If we have the appropriate vector bit operations, it is better to use them
1598 // than unrolling and expanding each component.
1599 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1603 return TLI.expandBSWAP(Node, DAG);
1604
1605 // Otherwise let the caller unroll.
1606 return SDValue();
1607}
1608
1609SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1610 EVT VT = Node->getValueType(0);
1611
1612 // We can't unroll or use shuffles for scalable vectors.
1613 if (VT.isScalableVector())
1614 return TLI.expandBITREVERSE(Node, DAG);
1615
1616 // If we have the scalar operation, it's probably cheaper to unroll it.
1618 return SDValue();
1619
1620 // If the vector element width is a whole number of bytes, test if its legal
1621 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1622 // vector. This greatly reduces the number of bit shifts necessary.
1623 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1624 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1625 SmallVector<int, 16> BSWAPMask;
1626 createBSWAPShuffleMask(VT, BSWAPMask);
1627
1628 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1629 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1631 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1632 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1635 SDLoc DL(Node);
1636 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1637 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getPOISON(ByteVT),
1638 BSWAPMask);
1639 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1640 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1641 return Op;
1642 }
1643 }
1644
1645 // If we have the appropriate vector bit operations, it is better to use them
1646 // than unrolling and expanding each component.
1647 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1651 return TLI.expandBITREVERSE(Node, DAG);
1652
1653 // Otherwise unroll.
1654 return SDValue();
1655}
1656
1657SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1658 // Implement VSELECT in terms of XOR, AND, OR
1659 // on platforms which do not support blend natively.
1660 SDLoc DL(Node);
1661
1662 SDValue Mask = Node->getOperand(0);
1663 SDValue Op1 = Node->getOperand(1);
1664 SDValue Op2 = Node->getOperand(2);
1665
1666 EVT VT = Mask.getValueType();
1667
1668 // If we can't even use the basic vector operations of
1669 // AND,OR,XOR, we will have to scalarize the op.
1670 // Notice that the operation may be 'promoted' which means that it is
1671 // 'bitcasted' to another type which is handled.
1672 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1673 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1674 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1675 return SDValue();
1676
1677 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1678 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1679 // vector constant to mask with.
1680 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1681 auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1682 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1683 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1684 Op1.getValueType().getVectorElementType() == MVT::i1))
1685 return SDValue();
1686
1687 // If the mask and the type are different sizes, unroll the vector op. This
1688 // can occur when getSetCCResultType returns something that is different in
1689 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1690 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1691 return SDValue();
1692
1693 // Bitcast the operands to be the same type as the mask.
1694 // This is needed when we select between FP types because
1695 // the mask is a vector of integers.
1696 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1697 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1698
1699 SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1700
1701 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1702 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1703 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1704 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1705}
1706
1707SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1708 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1709 // do not support it natively.
1710 SDLoc DL(Node);
1711
1712 SDValue Mask = Node->getOperand(0);
1713 SDValue Op1 = Node->getOperand(1);
1714 SDValue Op2 = Node->getOperand(2);
1715 SDValue EVL = Node->getOperand(3);
1716
1717 EVT VT = Mask.getValueType();
1718
1719 // If we can't even use the basic vector operations of
1720 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1721 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1722 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1723 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1724 return SDValue();
1725
1726 // This operation also isn't safe when the operands aren't also booleans.
1727 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1728 return SDValue();
1729
1730 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1731 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1732
1733 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1734 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1735 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1736}
1737
1738SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1739 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1740 // indices less than the EVL/pivot are true. Combine that with the original
1741 // mask for a full-length mask. Use a full-length VSELECT to select between
1742 // the true and false values.
1743 SDLoc DL(Node);
1744
1745 SDValue Mask = Node->getOperand(0);
1746 SDValue Op1 = Node->getOperand(1);
1747 SDValue Op2 = Node->getOperand(2);
1748 SDValue EVL = Node->getOperand(3);
1749
1750 EVT MaskVT = Mask.getValueType();
1751 bool IsFixedLen = MaskVT.isFixedLengthVector();
1752
1753 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1754 MaskVT.getVectorElementCount());
1755
1756 // If we can't construct the EVL mask efficiently, it's better to unroll.
1757 if ((IsFixedLen &&
1759 (!IsFixedLen &&
1760 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1762 return SDValue();
1763
1764 // If using a SETCC would result in a different type than the mask type,
1765 // unroll.
1766 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1767 EVLVecVT) != MaskVT)
1768 return SDValue();
1769
1770 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1771 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1772 SDValue EVLMask =
1773 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1774
1775 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1776 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1777}
1778
1779SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1780 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1781 EVT VT = Node->getValueType(0);
1782
1783 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1784
1785 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1786 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1787 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1788 return SDValue();
1789
1790 SDLoc DL(Node);
1791
1792 SDValue Dividend = Node->getOperand(0);
1793 SDValue Divisor = Node->getOperand(1);
1794 SDValue Mask = Node->getOperand(2);
1795 SDValue EVL = Node->getOperand(3);
1796
1797 // X % Y -> X-X/Y*Y
1798 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1799 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1800 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1801}
1802
1803SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1804 EVT VT = Node->getValueType(0);
1805 EVT IntVT = VT.changeVectorElementTypeToInteger();
1806
1807 if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1808 return SDValue();
1809
1810 SDValue Mask = Node->getOperand(1);
1811 SDValue EVL = Node->getOperand(2);
1812
1813 SDLoc DL(Node);
1814 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1815 SDValue SignMask = DAG.getConstant(
1816 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1817 SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
1818 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1819}
1820
1821SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1822 EVT VT = Node->getValueType(0);
1823 EVT IntVT = VT.changeVectorElementTypeToInteger();
1824
1825 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
1826 return SDValue();
1827
1828 SDValue Mask = Node->getOperand(1);
1829 SDValue EVL = Node->getOperand(2);
1830
1831 SDLoc DL(Node);
1832 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1833 SDValue ClearSignMask = DAG.getConstant(
1835 SDValue ClearSign =
1836 DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
1837 return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
1838}
1839
1840SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1841 EVT VT = Node->getValueType(0);
1842
1843 if (VT != Node->getOperand(1).getValueType())
1844 return SDValue();
1845
1846 EVT IntVT = VT.changeVectorElementTypeToInteger();
1847 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
1848 !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1849 return SDValue();
1850
1851 SDValue Mask = Node->getOperand(2);
1852 SDValue EVL = Node->getOperand(3);
1853
1854 SDLoc DL(Node);
1855 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1856 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1857
1858 SDValue SignMask = DAG.getConstant(
1859 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1860 SDValue SignBit =
1861 DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
1862
1863 SDValue ClearSignMask = DAG.getConstant(
1865 SDValue ClearedSign =
1866 DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
1867
1868 SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
1869 Mask, EVL, SDNodeFlags::Disjoint);
1870
1871 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1872}
1873
1874SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
1875 SDLoc DL(N);
1876 EVT VT = N->getValueType(0);
1877 SDValue SourceValue = N->getOperand(0);
1878 SDValue SinkValue = N->getOperand(1);
1879 SDValue EltSizeInBytes = N->getOperand(2);
1880
1881 // Note: The lane offset is scalable if the mask is scalable.
1882 ElementCount LaneOffsetEC =
1883 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
1884
1885 EVT PtrVT = SourceValue->getValueType(0);
1886 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
1887
1888 // Take the difference between the pointers and divided by the element size,
1889 // to see how many lanes separate them.
1890 SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
1891 if (IsReadAfterWrite)
1892 Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff);
1893 Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSizeInBytes);
1894
1895 // The pointers do not alias if:
1896 // * Diff <= 0 (WAR_MASK)
1897 // * Diff == 0 (RAW_MASK)
1898 EVT CmpVT =
1899 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), PtrVT);
1900 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
1901 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
1902 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
1903
1904 // The pointers do not alias if:
1905 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
1906 SDValue LaneOffset = DAG.getElementCount(DL, PtrVT, LaneOffsetEC);
1907 SDValue MaskN =
1908 DAG.getSelect(DL, PtrVT, Cmp, DAG.getConstant(-1, DL, PtrVT), Diff);
1909
1910 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
1911}
1912
1913void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1914 SmallVectorImpl<SDValue> &Results) {
1915 // Attempt to expand using TargetLowering.
1916 SDValue Result, Chain;
1917 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1918 Results.push_back(Result);
1919 if (Node->isStrictFPOpcode())
1920 Results.push_back(Chain);
1921 return;
1922 }
1923
1924 // Otherwise go ahead and unroll.
1925 if (Node->isStrictFPOpcode()) {
1926 UnrollStrictFPOp(Node, Results);
1927 return;
1928 }
1929
1930 Results.push_back(DAG.UnrollVectorOp(Node));
1931}
1932
1933void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1934 SmallVectorImpl<SDValue> &Results) {
1935 bool IsStrict = Node->isStrictFPOpcode();
1936 unsigned OpNo = IsStrict ? 1 : 0;
1937 SDValue Src = Node->getOperand(OpNo);
1938 EVT SrcVT = Src.getValueType();
1939 EVT DstVT = Node->getValueType(0);
1940 SDLoc DL(Node);
1941
1942 // Attempt to expand using TargetLowering.
1944 SDValue Chain;
1945 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1946 Results.push_back(Result);
1947 if (IsStrict)
1948 Results.push_back(Chain);
1949 return;
1950 }
1951
1952 // Make sure that the SINT_TO_FP and SRL instructions are available.
1953 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
1954 TargetLowering::Expand) ||
1955 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
1956 TargetLowering::Expand)) ||
1957 TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
1958 if (IsStrict) {
1959 UnrollStrictFPOp(Node, Results);
1960 return;
1961 }
1962
1963 Results.push_back(DAG.UnrollVectorOp(Node));
1964 return;
1965 }
1966
1967 unsigned BW = SrcVT.getScalarSizeInBits();
1968 assert((BW == 64 || BW == 32) &&
1969 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1970
1971 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1972 // UINT_TO_FP with a larger float and round to the smaller type
1973 if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) ||
1974 (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) {
1975 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1976 SDValue UIToFP;
1978 SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
1979 EVT FloatVecVT = SrcVT.changeVectorElementType(*DAG.getContext(), FPVT);
1980 if (IsStrict) {
1981 UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
1982 {Node->getOperand(0), Src});
1983 Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
1984 {Node->getOperand(0), UIToFP, TargetZero});
1985 Results.push_back(Result);
1986 Results.push_back(Result.getValue(1));
1987 } else {
1988 UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
1989 Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
1990 Results.push_back(Result);
1991 }
1992
1993 return;
1994 }
1995
1996 SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
1997
1998 // Constants to clear the upper part of the word.
1999 // Notice that we can also use SHL+SHR, but using a constant is slightly
2000 // faster on x86.
2001 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
2002 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
2003
2004 // Two to the power of half-word-size.
2005 SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
2006
2007 // Clear upper part of LO, lower HI
2008 SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
2009 SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
2010
2011 if (IsStrict) {
2012 // Convert hi and lo to floats
2013 // Convert the hi part back to the upper values
2014 // TODO: Can any fast-math-flags be set on these nodes?
2015 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
2016 {Node->getOperand(0), HI});
2017 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
2018 {fHI.getValue(1), fHI, TWOHW});
2019 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
2020 {Node->getOperand(0), LO});
2021
2022 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
2023 fLO.getValue(1));
2024
2025 // Add the two halves
2026 SDValue Result =
2027 DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
2028
2029 Results.push_back(Result);
2030 Results.push_back(Result.getValue(1));
2031 return;
2032 }
2033
2034 // Convert hi and lo to floats
2035 // Convert the hi part back to the upper values
2036 // TODO: Can any fast-math-flags be set on these nodes?
2037 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
2038 fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
2039 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
2040
2041 // Add the two halves
2042 Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
2043}
2044
2045SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
2046 EVT VT = Node->getValueType(0);
2047 EVT IntVT = VT.changeVectorElementTypeToInteger();
2048
2049 if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
2050 return SDValue();
2051
2052 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2054 !VT.isScalableVector())
2055 return SDValue();
2056
2057 SDLoc DL(Node);
2058 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2059 SDValue SignMask = DAG.getConstant(
2060 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
2061 SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
2062 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
2063}
2064
2065SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
2066 EVT VT = Node->getValueType(0);
2067 EVT IntVT = VT.changeVectorElementTypeToInteger();
2068
2069 if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
2070 return SDValue();
2071
2072 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2074 !VT.isScalableVector())
2075 return SDValue();
2076
2077 SDLoc DL(Node);
2078 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2079 SDValue ClearSignMask = DAG.getConstant(
2081 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
2082 return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
2083}
2084
2085SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
2086 EVT VT = Node->getValueType(0);
2087 EVT IntVT = VT.changeVectorElementTypeToInteger();
2088
2089 if (VT != Node->getOperand(1).getValueType() ||
2090 !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
2091 !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
2092 return SDValue();
2093
2094 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2096 !VT.isScalableVector())
2097 return SDValue();
2098
2099 SDLoc DL(Node);
2100 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2101 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
2102
2103 SDValue SignMask = DAG.getConstant(
2104 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
2105 SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
2106
2107 SDValue ClearSignMask = DAG.getConstant(
2109 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
2110
2111 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit,
2113
2114 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
2115}
2116
2117void VectorLegalizer::ExpandFSUB(SDNode *Node,
2118 SmallVectorImpl<SDValue> &Results) {
2119 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2120 // we can defer this to operation legalization where it will be lowered as
2121 // a+(-b).
2122 EVT VT = Node->getValueType(0);
2123 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
2125 return; // Defer to LegalizeDAG
2126
2127 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2128 Results.push_back(Expanded);
2129 return;
2130 }
2131
2132 SDValue Tmp = DAG.UnrollVectorOp(Node);
2133 Results.push_back(Tmp);
2134}
2135
2136void VectorLegalizer::ExpandSETCC(SDNode *Node,
2137 SmallVectorImpl<SDValue> &Results) {
2138 bool NeedInvert = false;
2139 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2140 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2141 Node->getOpcode() == ISD::STRICT_FSETCCS;
2142 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2143 unsigned Offset = IsStrict ? 1 : 0;
2144
2145 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
2146 SDValue LHS = Node->getOperand(0 + Offset);
2147 SDValue RHS = Node->getOperand(1 + Offset);
2148 SDValue CC = Node->getOperand(2 + Offset);
2149
2150 MVT OpVT = LHS.getSimpleValueType();
2151 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
2152
2153 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
2154 if (IsStrict) {
2155 UnrollStrictFPOp(Node, Results);
2156 return;
2157 }
2158 Results.push_back(UnrollVSETCC(Node));
2159 return;
2160 }
2161
2162 SDValue Mask, EVL;
2163 if (IsVP) {
2164 Mask = Node->getOperand(3 + Offset);
2165 EVL = Node->getOperand(4 + Offset);
2166 }
2167
2168 SDLoc dl(Node);
2169 bool Legalized =
2170 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
2171 EVL, NeedInvert, dl, Chain, IsSignaling);
2172
2173 if (Legalized) {
2174 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2175 // condition code, create a new SETCC node.
2176 if (CC.getNode()) {
2177 if (IsStrict) {
2178 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
2179 {Chain, LHS, RHS, CC}, Node->getFlags());
2180 Chain = LHS.getValue(1);
2181 } else if (IsVP) {
2182 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
2183 {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
2184 } else {
2185 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
2186 Node->getFlags());
2187 }
2188 }
2189
2190 // If we expanded the SETCC by inverting the condition code, then wrap
2191 // the existing SETCC in a NOT to restore the intended condition.
2192 if (NeedInvert) {
2193 if (!IsVP)
2194 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
2195 else
2196 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
2197 }
2198 } else {
2199 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2200
2201 // Otherwise, SETCC for the given comparison type must be completely
2202 // illegal; expand it into a SELECT_CC.
2203 EVT VT = Node->getValueType(0);
2204 LHS = DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
2205 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
2206 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()),
2207 CC, Node->getFlags());
2208 }
2209
2210 Results.push_back(LHS);
2211 if (IsStrict)
2212 Results.push_back(Chain);
2213}
2214
2215void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2216 SmallVectorImpl<SDValue> &Results) {
2217 SDValue Result, Overflow;
2218 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2219 Results.push_back(Result);
2220 Results.push_back(Overflow);
2221}
2222
2223void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2224 SmallVectorImpl<SDValue> &Results) {
2225 SDValue Result, Overflow;
2226 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2227 Results.push_back(Result);
2228 Results.push_back(Overflow);
2229}
2230
2231void VectorLegalizer::ExpandMULO(SDNode *Node,
2232 SmallVectorImpl<SDValue> &Results) {
2233 SDValue Result, Overflow;
2234 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2235 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
2236
2237 Results.push_back(Result);
2238 Results.push_back(Overflow);
2239}
2240
2241void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2242 SmallVectorImpl<SDValue> &Results) {
2243 SDNode *N = Node;
2244 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
2245 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
2246 Results.push_back(Expanded);
2247}
2248
2249void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2250 SmallVectorImpl<SDValue> &Results) {
2251 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2252 ExpandUINT_TO_FLOAT(Node, Results);
2253 return;
2254 }
2255 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2256 ExpandFP_TO_UINT(Node, Results);
2257 return;
2258 }
2259
2260 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2261 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2262 ExpandSETCC(Node, Results);
2263 return;
2264 }
2265
2266 UnrollStrictFPOp(Node, Results);
2267}
2268
2269void VectorLegalizer::ExpandREM(SDNode *Node,
2270 SmallVectorImpl<SDValue> &Results) {
2271 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2272 "Expected REM node");
2273
2275 if (!TLI.expandREM(Node, Result, DAG))
2276 Result = DAG.UnrollVectorOp(Node);
2277 Results.push_back(Result);
2278}
2279
2280// Try to expand libm nodes into vector math routine calls. Callers provide the
2281// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2282// within TargetLibraryInfo. The only mappings considered are those where the
2283// result and all operands are the same vector type. While predicated nodes are
2284// not supported, we will emit calls to masked routines by passing in an all
2285// true mask.
2286bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2287 SmallVectorImpl<SDValue> &Results) {
2288 // Chain must be propagated but currently strict fp operations are down
2289 // converted to their none strict counterpart.
2290 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2291
2292 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(LC);
2293 if (LCImpl == RTLIB::Unsupported)
2294 return false;
2295
2296 EVT VT = Node->getValueType(0);
2297 const RTLIB::RuntimeLibcallsInfo &RTLCI = TLI.getRuntimeLibcallsInfo();
2298 LLVMContext &Ctx = *DAG.getContext();
2299
2300 auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(
2301 Ctx, DAG.getSubtarget().getTargetTriple(), DAG.getDataLayout(), LCImpl);
2302
2303 SDLoc DL(Node);
2304 TargetLowering::ArgListTy Args;
2305
2306 bool HasMaskArg = RTLCI.hasVectorMaskArgument(LCImpl);
2307
2308 // Sanity check just in case function has unexpected parameters.
2309 assert(FuncTy->getNumParams() == Node->getNumOperands() + HasMaskArg &&
2310 EVT::getEVT(FuncTy->getReturnType(), true) == VT &&
2311 "mismatch in value type and call signature type");
2312
2313 for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
2314 Type *ParamTy = FuncTy->getParamType(I);
2315
2316 if (HasMaskArg && I == E - 1) {
2317 assert(cast<VectorType>(ParamTy)->getElementType()->isIntegerTy(1) &&
2318 "unexpected vector mask type");
2319 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
2320 Args.emplace_back(DAG.getBoolConstant(true, DL, MaskVT, VT),
2321 MaskVT.getTypeForEVT(Ctx));
2322
2323 } else {
2324 SDValue Op = Node->getOperand(I);
2325 assert(Op.getValueType() == EVT::getEVT(ParamTy, true) &&
2326 "mismatch in value type and call argument type");
2327 Args.emplace_back(Op, ParamTy);
2328 }
2329 }
2330
2331 // Emit a call to the vector function.
2332 SDValue Callee =
2333 DAG.getExternalSymbol(LCImpl, TLI.getPointerTy(DAG.getDataLayout()));
2334 CallingConv::ID CC = RTLCI.getLibcallImplCallingConv(LCImpl);
2335
2336 TargetLowering::CallLoweringInfo CLI(DAG);
2337 CLI.setDebugLoc(DL)
2338 .setChain(DAG.getEntryNode())
2339 .setLibCallee(CC, FuncTy->getReturnType(), Callee, std::move(Args));
2340
2341 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2342 Results.push_back(CallResult.first);
2343 return true;
2344}
2345
2346void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2347 SmallVectorImpl<SDValue> &Results) {
2348 EVT VT = Node->getValueType(0);
2349 EVT EltVT = VT.getVectorElementType();
2350 unsigned NumElems = VT.getVectorNumElements();
2351 unsigned NumOpers = Node->getNumOperands();
2352 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2353
2354 EVT TmpEltVT = EltVT;
2355 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2356 Node->getOpcode() == ISD::STRICT_FSETCCS)
2357 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
2358 *DAG.getContext(), TmpEltVT);
2359
2360 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2361 SDValue Chain = Node->getOperand(0);
2362 SDLoc dl(Node);
2363
2364 SmallVector<SDValue, 32> OpValues;
2365 SmallVector<SDValue, 32> OpChains;
2366 for (unsigned i = 0; i < NumElems; ++i) {
2368 SDValue Idx = DAG.getVectorIdxConstant(i, dl);
2369
2370 // The Chain is the first operand.
2371 Opers.push_back(Chain);
2372
2373 // Now process the remaining operands.
2374 for (unsigned j = 1; j < NumOpers; ++j) {
2375 SDValue Oper = Node->getOperand(j);
2376 EVT OperVT = Oper.getValueType();
2377
2378 if (OperVT.isVector())
2379 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2380 OperVT.getVectorElementType(), Oper, Idx);
2381
2382 Opers.push_back(Oper);
2383 }
2384
2385 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
2386 SDValue ScalarResult = ScalarOp.getValue(0);
2387 SDValue ScalarChain = ScalarOp.getValue(1);
2388
2389 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2390 Node->getOpcode() == ISD::STRICT_FSETCCS)
2391 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
2392 DAG.getAllOnesConstant(dl, EltVT),
2393 DAG.getConstant(0, dl, EltVT));
2394
2395 OpValues.push_back(ScalarResult);
2396 OpChains.push_back(ScalarChain);
2397 }
2398
2399 SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
2400 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
2401
2402 Results.push_back(Result);
2403 Results.push_back(NewChain);
2404}
2405
2406SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2407 EVT VT = Node->getValueType(0);
2408 unsigned NumElems = VT.getVectorNumElements();
2409 EVT EltVT = VT.getVectorElementType();
2410 SDValue LHS = Node->getOperand(0);
2411 SDValue RHS = Node->getOperand(1);
2412 SDValue CC = Node->getOperand(2);
2413 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2414 SDLoc dl(Node);
2415 SmallVector<SDValue, 8> Ops(NumElems);
2416 for (unsigned i = 0; i < NumElems; ++i) {
2417 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
2418 DAG.getVectorIdxConstant(i, dl));
2419 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
2420 DAG.getVectorIdxConstant(i, dl));
2421 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2422 Ops[i] = DAG.getNode(ISD::SETCC, dl,
2424 *DAG.getContext(), TmpEltVT),
2425 LHSElem, RHSElem, CC);
2426 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
2427 DAG.getBoolConstant(true, dl, EltVT, VT),
2428 DAG.getConstant(0, dl, EltVT));
2429 }
2430 return DAG.getBuildVector(VT, dl, Ops);
2431}
2432
2434 return VectorLegalizer(*this).Run();
2435}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl< int > &ShuffleMask)
#define I(x, y, z)
Definition MD5.cpp:57
#define T
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
BinaryOperator * Mul
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isBigEndian() const
Definition DataLayout.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
size_t size() const
Definition Function.h:858
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
const Triple & getTargetTriple() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Represents one node in the SelectionDAG.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI bool LegalizeVectors()
This transforms the SelectionDAG into a SelectionDAG that only uses vector math operations supported ...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI std::pair< SDValue, SDValue > UnrollVectorOverflowOp(SDNode *N, unsigned ResNE=0)
Like UnrollVectorOp(), but for the [US](ADD|SUB|MUL)O family of opcodes.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
allnodes_const_iterator allnodes_end() const
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
LLVM_ABI SDValue getVPLogicalNOT(const SDLoc &DL, SDValue Val, SDValue Mask, SDValue EVL, EVT VT)
Create a vector-predicated logical NOT operation as (VP_XOR Val, BooleanOne, Mask,...
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI unsigned AssignTopologicalOrder()
Topological-sort the AllNodes list and a assign a unique node id for each node in the DAG based on th...
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void resize(size_type N)
void push_back(const T &Elt)
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, unsigned Scale) const
Some fixed point operations may be natively supported by the target but only for specific scales.
bool isStrictFPEnabled() const
Return true if the target support strict float operation.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT, EVT InputVT) const
Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treated.
LegalizeAction getLoadAction(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FPTRUNC_ROUND
FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
Definition ISDOpcodes.h:515
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ CONVERT_FROM_ARBITRARY_FP
CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary floating-point represented as an...
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PARTIAL_REDUCE_FMLA
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:735
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VECREDUCE_SEQ_FMUL
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
LLVM_ABI Libcall getREM(EVT VT)
LLVM_ABI Libcall getSINCOSPI(EVT RetVT)
getSINCOSPI - Return the SINCOSPI_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getMODF(EVT VT)
getMODF - Return the MODF_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getCBRT(EVT RetVT)
getCBRT - Return the CBRT_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getSINCOS(EVT RetVT)
getSINCOS - Return the SINCOS_* value for the given types, or UNKNOWN_LIBCALL if there is none.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ Xor
Bitwise or logical XOR of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
#define N
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
std::pair< FunctionType *, AttributeList > getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL, RTLIB::LibcallImpl LibcallImpl) const
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.