LLVM 20.0.0git
LegalizeVectorOps.cpp
Go to the documentation of this file.
1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
39#include "llvm/IR/DataLayout.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(std::make_pair(From, To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(std::make_pair(To, To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandSELECT(SDNode *Node);
142 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
143 SDValue ExpandStore(SDNode *N);
144 SDValue ExpandFNEG(SDNode *Node);
145 SDValue ExpandFABS(SDNode *Node);
146 SDValue ExpandFCOPYSIGN(SDNode *Node);
147 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 SDValue ExpandBITREVERSE(SDNode *Node);
150 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156
157 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
159 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
160 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
161 RTLIB::Libcall Call_F128,
162 RTLIB::Libcall Call_PPCF128,
164
165 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
166
167 /// Implements vector promotion.
168 ///
169 /// This is essentially just bitcasting the operands to a different type and
170 /// bitcasting the result back to the original type.
172
173 /// Implements [SU]INT_TO_FP vector promotion.
174 ///
175 /// This is a [zs]ext of the input operand to a larger integer type.
176 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
177
178 /// Implements FP_TO_[SU]INT vector promotion of the result type.
179 ///
180 /// It is promoted to a larger integer type. The result is then
181 /// truncated back to the original type.
182 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
183
184 /// Implements vector setcc operation promotion.
185 ///
186 /// All vector operands are promoted to a vector type with larger element
187 /// type.
188 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
189
190 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
191
192public:
193 VectorLegalizer(SelectionDAG& dag) :
194 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
195
196 /// Begin legalizer the vector operations in the DAG.
197 bool Run();
198};
199
200} // end anonymous namespace
201
202bool VectorLegalizer::Run() {
203 // Before we start legalizing vector nodes, check if there are any vectors.
204 bool HasVectors = false;
205 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
206 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
207 // Check if the values of the nodes contain vectors. We don't need to check
208 // the operands because we are going to check their values at some point.
209 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
210
211 // If we found a vector node we can start the legalization.
212 if (HasVectors)
213 break;
214 }
215
216 // If this basic block has no vectors then no need to legalize vectors.
217 if (!HasVectors)
218 return false;
219
220 // The legalize process is inherently a bottom-up recursive process (users
221 // legalize their uses before themselves). Given infinite stack space, we
222 // could just start legalizing on the root and traverse the whole graph. In
223 // practice however, this causes us to run out of stack space on large basic
224 // blocks. To avoid this problem, compute an ordering of the nodes where each
225 // node is only legalized after all of its operands are legalized.
226 DAG.AssignTopologicalOrder();
227 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
228 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
229 LegalizeOp(SDValue(&*I, 0));
230
231 // Finally, it's possible the root changed. Get the new root.
232 SDValue OldRoot = DAG.getRoot();
233 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
234 DAG.setRoot(LegalizedNodes[OldRoot]);
235
236 LegalizedNodes.clear();
237
238 // Remove dead nodes now.
239 DAG.RemoveDeadNodes();
240
241 return Changed;
242}
243
244SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
245 assert(Op->getNumValues() == Result->getNumValues() &&
246 "Unexpected number of results");
247 // Generic legalization: just pass the operand through.
248 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
249 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
250 return SDValue(Result, Op.getResNo());
251}
252
254VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
256 assert(Results.size() == Op->getNumValues() &&
257 "Unexpected number of results");
258 // Make sure that the generated code is itself legal.
259 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
260 Results[i] = LegalizeOp(Results[i]);
261 AddLegalizedOperand(Op.getValue(i), Results[i]);
262 }
263
264 return Results[Op.getResNo()];
265}
266
267SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
268 // Note that LegalizeOp may be reentered even from single-use nodes, which
269 // means that we always must cache transformed nodes.
270 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
271 if (I != LegalizedNodes.end()) return I->second;
272
273 // Legalize the operands
275 for (const SDValue &Oper : Op->op_values())
276 Ops.push_back(LegalizeOp(Oper));
277
278 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
279
280 bool HasVectorValueOrOp =
281 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
282 llvm::any_of(Node->op_values(),
283 [](SDValue O) { return O.getValueType().isVector(); });
284 if (!HasVectorValueOrOp)
285 return TranslateLegalizeResults(Op, Node);
286
287 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
288 EVT ValVT;
289 switch (Op.getOpcode()) {
290 default:
291 return TranslateLegalizeResults(Op, Node);
292 case ISD::LOAD: {
293 LoadSDNode *LD = cast<LoadSDNode>(Node);
294 ISD::LoadExtType ExtType = LD->getExtensionType();
295 EVT LoadedVT = LD->getMemoryVT();
296 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
297 Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
298 break;
299 }
300 case ISD::STORE: {
301 StoreSDNode *ST = cast<StoreSDNode>(Node);
302 EVT StVT = ST->getMemoryVT();
303 MVT ValVT = ST->getValue().getSimpleValueType();
304 if (StVT.isVector() && ST->isTruncatingStore())
305 Action = TLI.getTruncStoreAction(ValVT, StVT);
306 break;
307 }
309 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
310 // This operation lies about being legal: when it claims to be legal,
311 // it should actually be expanded.
312 if (Action == TargetLowering::Legal)
313 Action = TargetLowering::Expand;
314 break;
315#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
316 case ISD::STRICT_##DAGN:
317#include "llvm/IR/ConstrainedOps.def"
318 ValVT = Node->getValueType(0);
319 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
320 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
321 ValVT = Node->getOperand(1).getValueType();
322 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
323 Op.getOpcode() == ISD::STRICT_FSETCCS) {
324 MVT OpVT = Node->getOperand(1).getSimpleValueType();
325 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
326 Action = TLI.getCondCodeAction(CCCode, OpVT);
327 if (Action == TargetLowering::Legal)
328 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
329 } else {
330 Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
331 }
332 // If we're asked to expand a strict vector floating-point operation,
333 // by default we're going to simply unroll it. That is usually the
334 // best approach, except in the case where the resulting strict (scalar)
335 // operations would themselves use the fallback mutation to non-strict.
336 // In that specific case, just do the fallback on the vector op.
337 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
338 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
339 TargetLowering::Legal) {
340 EVT EltVT = ValVT.getVectorElementType();
341 if (TLI.getOperationAction(Node->getOpcode(), EltVT)
342 == TargetLowering::Expand &&
343 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
344 == TargetLowering::Legal)
345 Action = TargetLowering::Legal;
346 }
347 break;
348 case ISD::ADD:
349 case ISD::SUB:
350 case ISD::MUL:
351 case ISD::MULHS:
352 case ISD::MULHU:
353 case ISD::SDIV:
354 case ISD::UDIV:
355 case ISD::SREM:
356 case ISD::UREM:
357 case ISD::SDIVREM:
358 case ISD::UDIVREM:
359 case ISD::FADD:
360 case ISD::FSUB:
361 case ISD::FMUL:
362 case ISD::FDIV:
363 case ISD::FREM:
364 case ISD::AND:
365 case ISD::OR:
366 case ISD::XOR:
367 case ISD::SHL:
368 case ISD::SRA:
369 case ISD::SRL:
370 case ISD::FSHL:
371 case ISD::FSHR:
372 case ISD::ROTL:
373 case ISD::ROTR:
374 case ISD::ABS:
375 case ISD::ABDS:
376 case ISD::ABDU:
377 case ISD::AVGCEILS:
378 case ISD::AVGCEILU:
379 case ISD::AVGFLOORS:
380 case ISD::AVGFLOORU:
381 case ISD::BSWAP:
382 case ISD::BITREVERSE:
383 case ISD::CTLZ:
384 case ISD::CTTZ:
387 case ISD::CTPOP:
388 case ISD::SELECT:
389 case ISD::VSELECT:
390 case ISD::SELECT_CC:
391 case ISD::ZERO_EXTEND:
392 case ISD::ANY_EXTEND:
393 case ISD::TRUNCATE:
394 case ISD::SIGN_EXTEND:
395 case ISD::FP_TO_SINT:
396 case ISD::FP_TO_UINT:
397 case ISD::FNEG:
398 case ISD::FABS:
399 case ISD::FMINNUM:
400 case ISD::FMAXNUM:
403 case ISD::FMINIMUM:
404 case ISD::FMAXIMUM:
405 case ISD::FCOPYSIGN:
406 case ISD::FSQRT:
407 case ISD::FSIN:
408 case ISD::FCOS:
409 case ISD::FTAN:
410 case ISD::FASIN:
411 case ISD::FACOS:
412 case ISD::FATAN:
413 case ISD::FATAN2:
414 case ISD::FSINH:
415 case ISD::FCOSH:
416 case ISD::FTANH:
417 case ISD::FLDEXP:
418 case ISD::FPOWI:
419 case ISD::FPOW:
420 case ISD::FLOG:
421 case ISD::FLOG2:
422 case ISD::FLOG10:
423 case ISD::FEXP:
424 case ISD::FEXP2:
425 case ISD::FEXP10:
426 case ISD::FCEIL:
427 case ISD::FTRUNC:
428 case ISD::FRINT:
429 case ISD::FNEARBYINT:
430 case ISD::FROUND:
431 case ISD::FROUNDEVEN:
432 case ISD::FFLOOR:
433 case ISD::FP_ROUND:
434 case ISD::FP_EXTEND:
436 case ISD::FMA:
441 case ISD::SMIN:
442 case ISD::SMAX:
443 case ISD::UMIN:
444 case ISD::UMAX:
445 case ISD::SMUL_LOHI:
446 case ISD::UMUL_LOHI:
447 case ISD::SADDO:
448 case ISD::UADDO:
449 case ISD::SSUBO:
450 case ISD::USUBO:
451 case ISD::SMULO:
452 case ISD::UMULO:
454 case ISD::FFREXP:
455 case ISD::FSINCOS:
456 case ISD::SADDSAT:
457 case ISD::UADDSAT:
458 case ISD::SSUBSAT:
459 case ISD::USUBSAT:
460 case ISD::SSHLSAT:
461 case ISD::USHLSAT:
464 case ISD::MGATHER:
466 case ISD::SCMP:
467 case ISD::UCMP:
468 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
469 break;
470 case ISD::SMULFIX:
471 case ISD::SMULFIXSAT:
472 case ISD::UMULFIX:
473 case ISD::UMULFIXSAT:
474 case ISD::SDIVFIX:
475 case ISD::SDIVFIXSAT:
476 case ISD::UDIVFIX:
477 case ISD::UDIVFIXSAT: {
478 unsigned Scale = Node->getConstantOperandVal(2);
479 Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
480 Node->getValueType(0), Scale);
481 break;
482 }
483 case ISD::LROUND:
484 case ISD::LLROUND:
485 case ISD::LRINT:
486 case ISD::LLRINT:
487 case ISD::SINT_TO_FP:
488 case ISD::UINT_TO_FP:
504 Action = TLI.getOperationAction(Node->getOpcode(),
505 Node->getOperand(0).getValueType());
506 break;
509 Action = TLI.getOperationAction(Node->getOpcode(),
510 Node->getOperand(1).getValueType());
511 break;
512 case ISD::SETCC: {
513 MVT OpVT = Node->getOperand(0).getSimpleValueType();
514 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
515 Action = TLI.getCondCodeAction(CCCode, OpVT);
516 if (Action == TargetLowering::Legal)
517 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
518 break;
519 }
520
521#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
522 case ISD::VPID: { \
523 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
524 : Node->getOperand(LEGALPOS).getValueType(); \
525 if (ISD::VPID == ISD::VP_SETCC) { \
526 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
527 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
528 if (Action != TargetLowering::Legal) \
529 break; \
530 } \
531 /* Defer non-vector results to LegalizeDAG. */ \
532 if (!Node->getValueType(0).isVector() && \
533 Node->getValueType(0) != MVT::Other) { \
534 Action = TargetLowering::Legal; \
535 break; \
536 } \
537 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
538 } break;
539#include "llvm/IR/VPIntrinsics.def"
540 }
541
542 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
543
544 SmallVector<SDValue, 8> ResultVals;
545 switch (Action) {
546 default: llvm_unreachable("This action is not supported yet!");
547 case TargetLowering::Promote:
548 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
549 "This action is not supported yet!");
550 LLVM_DEBUG(dbgs() << "Promoting\n");
551 Promote(Node, ResultVals);
552 assert(!ResultVals.empty() && "No results for promotion?");
553 break;
554 case TargetLowering::Legal:
555 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
556 break;
557 case TargetLowering::Custom:
558 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
559 if (LowerOperationWrapper(Node, ResultVals))
560 break;
561 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
562 [[fallthrough]];
563 case TargetLowering::Expand:
564 LLVM_DEBUG(dbgs() << "Expanding\n");
565 Expand(Node, ResultVals);
566 break;
567 }
568
569 if (ResultVals.empty())
570 return TranslateLegalizeResults(Op, Node);
571
572 Changed = true;
573 return RecursivelyLegalizeResults(Op, ResultVals);
574}
575
576// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
577// merge them somehow?
578bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
580 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
581
582 if (!Res.getNode())
583 return false;
584
585 if (Res == SDValue(Node, 0))
586 return true;
587
588 // If the original node has one result, take the return value from
589 // LowerOperation as is. It might not be result number 0.
590 if (Node->getNumValues() == 1) {
591 Results.push_back(Res);
592 return true;
593 }
594
595 // If the original node has multiple results, then the return node should
596 // have the same number of results.
597 assert((Node->getNumValues() == Res->getNumValues()) &&
598 "Lowering returned the wrong number of results!");
599
600 // Places new result values base on N result number.
601 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
602 Results.push_back(Res.getValue(I));
603
604 return true;
605}
606
607void VectorLegalizer::PromoteSETCC(SDNode *Node,
609 MVT VecVT = Node->getOperand(0).getSimpleValueType();
610 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
611
612 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
613
614 SDLoc DL(Node);
615 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
616
617 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
618 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
619 Operands[2] = Node->getOperand(2);
620
621 if (Node->getOpcode() == ISD::VP_SETCC) {
622 Operands[3] = Node->getOperand(3); // mask
623 Operands[4] = Node->getOperand(4); // evl
624 }
625
626 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
627 Operands, Node->getFlags());
628
629 Results.push_back(Res);
630}
631
632void VectorLegalizer::PromoteSTRICT(SDNode *Node,
634 MVT VecVT = Node->getOperand(1).getSimpleValueType();
635 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
636
637 assert(VecVT.isFloatingPoint());
638
639 SDLoc DL(Node);
640 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
642
643 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
644 if (Node->getOperand(j).getValueType().isVector() &&
645 !(ISD::isVPOpcode(Node->getOpcode()) &&
646 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
647 {
648 // promote the vector operand.
649 SDValue Ext =
650 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
651 {Node->getOperand(0), Node->getOperand(j)});
652 Operands[j] = Ext.getValue(0);
653 Chains.push_back(Ext.getValue(1));
654 } else
655 Operands[j] = Node->getOperand(j); // Skip no vector operand.
656
657 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
658
659 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
660
661 SDValue Res =
662 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
663
664 SDValue Round =
665 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
666 {Res.getValue(1), Res.getValue(0),
667 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
668
669 Results.push_back(Round.getValue(0));
670 Results.push_back(Round.getValue(1));
671}
672
673void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
674 // For a few operations there is a specific concept for promotion based on
675 // the operand's type.
676 switch (Node->getOpcode()) {
677 case ISD::SINT_TO_FP:
678 case ISD::UINT_TO_FP:
681 // "Promote" the operation by extending the operand.
682 PromoteINT_TO_FP(Node, Results);
683 return;
684 case ISD::FP_TO_UINT:
685 case ISD::FP_TO_SINT:
688 // Promote the operation by extending the operand.
689 PromoteFP_TO_INT(Node, Results);
690 return;
691 case ISD::VP_SETCC:
692 case ISD::SETCC:
693 // Promote the operation by extending the operand.
694 PromoteSETCC(Node, Results);
695 return;
696 case ISD::STRICT_FADD:
697 case ISD::STRICT_FSUB:
698 case ISD::STRICT_FMUL:
699 case ISD::STRICT_FDIV:
701 case ISD::STRICT_FMA:
702 PromoteSTRICT(Node, Results);
703 return;
704 case ISD::FP_ROUND:
705 case ISD::FP_EXTEND:
706 // These operations are used to do promotion so they can't be promoted
707 // themselves.
708 llvm_unreachable("Don't know how to promote this operation!");
709 case ISD::VP_FABS:
710 case ISD::VP_FCOPYSIGN:
711 case ISD::VP_FNEG:
712 // Promoting fabs, fneg, and fcopysign changes their semantics.
713 llvm_unreachable("These operations should not be promoted");
714 }
715
716 // There are currently two cases of vector promotion:
717 // 1) Bitcasting a vector of integers to a different type to a vector of the
718 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
719 // 2) Extending a vector of floats to a vector of the same number of larger
720 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
721 assert(Node->getNumValues() == 1 &&
722 "Can't promote a vector with multiple results!");
723 MVT VT = Node->getSimpleValueType(0);
724 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
725 SDLoc dl(Node);
726 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
727
728 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
729 // Do not promote the mask operand of a VP OP.
730 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
731 ISD::getVPMaskIdx(Node->getOpcode()) == j;
732 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
733 if (Node->getOperand(j)
734 .getValueType()
735 .getVectorElementType()
736 .isFloatingPoint() &&
738 Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
739 else
740 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
741 else
742 Operands[j] = Node->getOperand(j);
743 }
744
745 SDValue Res =
746 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
747
748 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
751 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
752 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
753 else
754 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
755
756 Results.push_back(Res);
757}
758
759void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
761 // INT_TO_FP operations may require the input operand be promoted even
762 // when the type is otherwise legal.
763 bool IsStrict = Node->isStrictFPOpcode();
764 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
765 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
767 "Vectors have different number of elements!");
768
769 SDLoc dl(Node);
770 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
771
772 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
773 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
776 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
777 if (Node->getOperand(j).getValueType().isVector())
778 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
779 else
780 Operands[j] = Node->getOperand(j);
781 }
782
783 if (IsStrict) {
784 SDValue Res = DAG.getNode(Node->getOpcode(), dl,
785 {Node->getValueType(0), MVT::Other}, Operands);
786 Results.push_back(Res);
787 Results.push_back(Res.getValue(1));
788 return;
789 }
790
791 SDValue Res =
792 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
793 Results.push_back(Res);
794}
795
796// For FP_TO_INT we promote the result type to a vector type with wider
797// elements and then truncate the result. This is different from the default
798// PromoteVector which uses bitcast to promote thus assumning that the
799// promoted vector type has the same overall size.
800void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
802 MVT VT = Node->getSimpleValueType(0);
803 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
804 bool IsStrict = Node->isStrictFPOpcode();
806 "Vectors have different number of elements!");
807
808 unsigned NewOpc = Node->getOpcode();
809 // Change FP_TO_UINT to FP_TO_SINT if possible.
810 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
811 if (NewOpc == ISD::FP_TO_UINT &&
812 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
813 NewOpc = ISD::FP_TO_SINT;
814
815 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
816 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
817 NewOpc = ISD::STRICT_FP_TO_SINT;
818
819 SDLoc dl(Node);
820 SDValue Promoted, Chain;
821 if (IsStrict) {
822 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
823 {Node->getOperand(0), Node->getOperand(1)});
824 Chain = Promoted.getValue(1);
825 } else
826 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
827
828 // Assert that the converted value fits in the original type. If it doesn't
829 // (eg: because the value being converted is too big), then the result of the
830 // original operation was undefined anyway, so the assert is still correct.
831 if (Node->getOpcode() == ISD::FP_TO_UINT ||
832 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
833 NewOpc = ISD::AssertZext;
834 else
835 NewOpc = ISD::AssertSext;
836
837 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
838 DAG.getValueType(VT.getScalarType()));
839 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
840 Results.push_back(Promoted);
841 if (IsStrict)
842 Results.push_back(Chain);
843}
844
845std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
846 LoadSDNode *LD = cast<LoadSDNode>(N);
847 return TLI.scalarizeVectorLoad(LD, DAG);
848}
849
850SDValue VectorLegalizer::ExpandStore(SDNode *N) {
851 StoreSDNode *ST = cast<StoreSDNode>(N);
852 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
853 return TF;
854}
855
856void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
857 switch (Node->getOpcode()) {
858 case ISD::LOAD: {
859 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
860 Results.push_back(Tmp.first);
861 Results.push_back(Tmp.second);
862 return;
863 }
864 case ISD::STORE:
865 Results.push_back(ExpandStore(Node));
866 return;
868 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
869 Results.push_back(Node->getOperand(i));
870 return;
872 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
873 Results.push_back(Expanded);
874 return;
875 }
876 break;
878 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
879 return;
881 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
882 return;
884 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
885 return;
886 case ISD::BSWAP:
887 if (SDValue Expanded = ExpandBSWAP(Node)) {
888 Results.push_back(Expanded);
889 return;
890 }
891 break;
892 case ISD::VP_BSWAP:
893 Results.push_back(TLI.expandVPBSWAP(Node, DAG));
894 return;
895 case ISD::VSELECT:
896 if (SDValue Expanded = ExpandVSELECT(Node)) {
897 Results.push_back(Expanded);
898 return;
899 }
900 break;
901 case ISD::VP_SELECT:
902 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
903 Results.push_back(Expanded);
904 return;
905 }
906 break;
907 case ISD::VP_SREM:
908 case ISD::VP_UREM:
909 if (SDValue Expanded = ExpandVP_REM(Node)) {
910 Results.push_back(Expanded);
911 return;
912 }
913 break;
914 case ISD::VP_FNEG:
915 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
916 Results.push_back(Expanded);
917 return;
918 }
919 break;
920 case ISD::VP_FABS:
921 if (SDValue Expanded = ExpandVP_FABS(Node)) {
922 Results.push_back(Expanded);
923 return;
924 }
925 break;
926 case ISD::VP_FCOPYSIGN:
927 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
928 Results.push_back(Expanded);
929 return;
930 }
931 break;
932 case ISD::SELECT:
933 if (SDValue Expanded = ExpandSELECT(Node)) {
934 Results.push_back(Expanded);
935 return;
936 }
937 break;
938 case ISD::SELECT_CC: {
939 if (Node->getValueType(0).isScalableVector()) {
940 EVT CondVT = TLI.getSetCCResultType(
941 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
942 SDValue SetCC =
943 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
944 Node->getOperand(1), Node->getOperand(4));
945 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
946 Node->getOperand(2),
947 Node->getOperand(3)));
948 return;
949 }
950 break;
951 }
952 case ISD::FP_TO_UINT:
953 ExpandFP_TO_UINT(Node, Results);
954 return;
955 case ISD::UINT_TO_FP:
956 ExpandUINT_TO_FLOAT(Node, Results);
957 return;
958 case ISD::FNEG:
959 if (SDValue Expanded = ExpandFNEG(Node)) {
960 Results.push_back(Expanded);
961 return;
962 }
963 break;
964 case ISD::FABS:
965 if (SDValue Expanded = ExpandFABS(Node)) {
966 Results.push_back(Expanded);
967 return;
968 }
969 break;
970 case ISD::FCOPYSIGN:
971 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
972 Results.push_back(Expanded);
973 return;
974 }
975 break;
976 case ISD::FSUB:
977 ExpandFSUB(Node, Results);
978 return;
979 case ISD::SETCC:
980 case ISD::VP_SETCC:
981 ExpandSETCC(Node, Results);
982 return;
983 case ISD::ABS:
984 if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
985 Results.push_back(Expanded);
986 return;
987 }
988 break;
989 case ISD::ABDS:
990 case ISD::ABDU:
991 if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
992 Results.push_back(Expanded);
993 return;
994 }
995 break;
996 case ISD::AVGCEILS:
997 case ISD::AVGCEILU:
998 case ISD::AVGFLOORS:
999 case ISD::AVGFLOORU:
1000 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
1001 Results.push_back(Expanded);
1002 return;
1003 }
1004 break;
1005 case ISD::BITREVERSE:
1006 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1007 Results.push_back(Expanded);
1008 return;
1009 }
1010 break;
1011 case ISD::VP_BITREVERSE:
1012 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
1013 Results.push_back(Expanded);
1014 return;
1015 }
1016 break;
1017 case ISD::CTPOP:
1018 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
1019 Results.push_back(Expanded);
1020 return;
1021 }
1022 break;
1023 case ISD::VP_CTPOP:
1024 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
1025 Results.push_back(Expanded);
1026 return;
1027 }
1028 break;
1029 case ISD::CTLZ:
1031 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
1032 Results.push_back(Expanded);
1033 return;
1034 }
1035 break;
1036 case ISD::VP_CTLZ:
1037 case ISD::VP_CTLZ_ZERO_UNDEF:
1038 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
1039 Results.push_back(Expanded);
1040 return;
1041 }
1042 break;
1043 case ISD::CTTZ:
1045 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
1046 Results.push_back(Expanded);
1047 return;
1048 }
1049 break;
1050 case ISD::VP_CTTZ:
1051 case ISD::VP_CTTZ_ZERO_UNDEF:
1052 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
1053 Results.push_back(Expanded);
1054 return;
1055 }
1056 break;
1057 case ISD::FSHL:
1058 case ISD::VP_FSHL:
1059 case ISD::FSHR:
1060 case ISD::VP_FSHR:
1061 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
1062 Results.push_back(Expanded);
1063 return;
1064 }
1065 break;
1066 case ISD::ROTL:
1067 case ISD::ROTR:
1068 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
1069 Results.push_back(Expanded);
1070 return;
1071 }
1072 break;
1073 case ISD::FMINNUM:
1074 case ISD::FMAXNUM:
1075 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
1076 Results.push_back(Expanded);
1077 return;
1078 }
1079 break;
1080 case ISD::FMINIMUM:
1081 case ISD::FMAXIMUM:
1082 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
1083 return;
1084 case ISD::SMIN:
1085 case ISD::SMAX:
1086 case ISD::UMIN:
1087 case ISD::UMAX:
1088 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1089 Results.push_back(Expanded);
1090 return;
1091 }
1092 break;
1093 case ISD::UADDO:
1094 case ISD::USUBO:
1095 ExpandUADDSUBO(Node, Results);
1096 return;
1097 case ISD::SADDO:
1098 case ISD::SSUBO:
1099 ExpandSADDSUBO(Node, Results);
1100 return;
1101 case ISD::UMULO:
1102 case ISD::SMULO:
1103 ExpandMULO(Node, Results);
1104 return;
1105 case ISD::USUBSAT:
1106 case ISD::SSUBSAT:
1107 case ISD::UADDSAT:
1108 case ISD::SADDSAT:
1109 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1110 Results.push_back(Expanded);
1111 return;
1112 }
1113 break;
1114 case ISD::USHLSAT:
1115 case ISD::SSHLSAT:
1116 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1117 Results.push_back(Expanded);
1118 return;
1119 }
1120 break;
1123 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1124 if (Node->getValueType(0).isScalableVector()) {
1125 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
1126 Results.push_back(Expanded);
1127 return;
1128 }
1129 }
1130 break;
1131 case ISD::SMULFIX:
1132 case ISD::UMULFIX:
1133 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1134 Results.push_back(Expanded);
1135 return;
1136 }
1137 break;
1138 case ISD::SMULFIXSAT:
1139 case ISD::UMULFIXSAT:
1140 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1141 // why. Maybe it results in worse codegen compared to the unroll for some
1142 // targets? This should probably be investigated. And if we still prefer to
1143 // unroll an explanation could be helpful.
1144 break;
1145 case ISD::SDIVFIX:
1146 case ISD::UDIVFIX:
1147 ExpandFixedPointDiv(Node, Results);
1148 return;
1149 case ISD::SDIVFIXSAT:
1150 case ISD::UDIVFIXSAT:
1151 break;
1152#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1153 case ISD::STRICT_##DAGN:
1154#include "llvm/IR/ConstrainedOps.def"
1155 ExpandStrictFPOp(Node, Results);
1156 return;
1157 case ISD::VECREDUCE_ADD:
1158 case ISD::VECREDUCE_MUL:
1159 case ISD::VECREDUCE_AND:
1160 case ISD::VECREDUCE_OR:
1161 case ISD::VECREDUCE_XOR:
1172 Results.push_back(TLI.expandVecReduce(Node, DAG));
1173 return;
1176 Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
1177 return;
1178 case ISD::SREM:
1179 case ISD::UREM:
1180 ExpandREM(Node, Results);
1181 return;
1182 case ISD::VP_MERGE:
1183 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1184 Results.push_back(Expanded);
1185 return;
1186 }
1187 break;
1188 case ISD::FREM:
1189 if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
1190 RTLIB::REM_F80, RTLIB::REM_F128,
1191 RTLIB::REM_PPCF128, Results))
1192 return;
1193
1194 break;
1195 case ISD::FSINCOS: {
1196 RTLIB::Libcall LC =
1197 RTLIB::getFSINCOS(Node->getValueType(0).getVectorElementType());
1198 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
1199 return;
1200 break;
1201 }
1203 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
1204 return;
1205 case ISD::SCMP:
1206 case ISD::UCMP:
1207 Results.push_back(TLI.expandCMP(Node, DAG));
1208 return;
1209
1210 case ISD::FADD:
1211 case ISD::FMUL:
1212 case ISD::FMA:
1213 case ISD::FDIV:
1214 case ISD::FCEIL:
1215 case ISD::FFLOOR:
1216 case ISD::FNEARBYINT:
1217 case ISD::FRINT:
1218 case ISD::FROUND:
1219 case ISD::FROUNDEVEN:
1220 case ISD::FTRUNC:
1221 case ISD::FSQRT:
1222 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1223 Results.push_back(Expanded);
1224 return;
1225 }
1226 break;
1227 }
1228
1229 SDValue Unrolled = DAG.UnrollVectorOp(Node);
1230 if (Node->getNumValues() == 1) {
1231 Results.push_back(Unrolled);
1232 } else {
1233 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1234 "VectorLegalizer Expand returned wrong number of results!");
1235 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1236 Results.push_back(Unrolled.getValue(I));
1237 }
1238}
1239
1240SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1241 // Lower a select instruction where the condition is a scalar and the
1242 // operands are vectors. Lower this select to VSELECT and implement it
1243 // using XOR AND OR. The selector bit is broadcasted.
1244 EVT VT = Node->getValueType(0);
1245 SDLoc DL(Node);
1246
1247 SDValue Mask = Node->getOperand(0);
1248 SDValue Op1 = Node->getOperand(1);
1249 SDValue Op2 = Node->getOperand(2);
1250
1251 assert(VT.isVector() && !Mask.getValueType().isVector()
1252 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1253
1254 // If we can't even use the basic vector operations of
1255 // AND,OR,XOR, we will have to scalarize the op.
1256 // Notice that the operation may be 'promoted' which means that it is
1257 // 'bitcasted' to another type which is handled.
1258 // Also, we need to be able to construct a splat vector using either
1259 // BUILD_VECTOR or SPLAT_VECTOR.
1260 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1261 // BUILD_VECTOR?
1262 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1263 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1264 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1265 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1267 VT) == TargetLowering::Expand)
1268 return SDValue();
1269
1270 // Generate a mask operand.
1272
1273 // What is the size of each element in the vector mask.
1274 EVT BitTy = MaskTy.getScalarType();
1275
1276 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1277 DAG.getConstant(0, DL, BitTy));
1278
1279 // Broadcast the mask so that the entire vector is all one or all zero.
1280 Mask = DAG.getSplat(MaskTy, DL, Mask);
1281
1282 // Bitcast the operands to be the same type as the mask.
1283 // This is needed when we select between FP types because
1284 // the mask is a vector of integers.
1285 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1286 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1287
1288 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1289
1290 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1291 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1292 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1293 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1294}
1295
1296SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1297 EVT VT = Node->getValueType(0);
1298
1299 // Make sure that the SRA and SHL instructions are available.
1300 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1301 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1302 return SDValue();
1303
1304 SDLoc DL(Node);
1305 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1306
1307 unsigned BW = VT.getScalarSizeInBits();
1308 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1309 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1310
1311 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1312 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1313}
1314
1315// Generically expand a vector anyext in register to a shuffle of the relevant
1316// lanes into the appropriate locations, with other lanes left undef.
1317SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1318 SDLoc DL(Node);
1319 EVT VT = Node->getValueType(0);
1320 int NumElements = VT.getVectorNumElements();
1321 SDValue Src = Node->getOperand(0);
1322 EVT SrcVT = Src.getValueType();
1323 int NumSrcElements = SrcVT.getVectorNumElements();
1324
1325 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1326 // into a larger vector type.
1327 if (SrcVT.bitsLE(VT)) {
1328 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1329 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1330 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1331 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1332 NumSrcElements);
1333 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1334 Src, DAG.getVectorIdxConstant(0, DL));
1335 }
1336
1337 // Build a base mask of undef shuffles.
1338 SmallVector<int, 16> ShuffleMask;
1339 ShuffleMask.resize(NumSrcElements, -1);
1340
1341 // Place the extended lanes into the correct locations.
1342 int ExtLaneScale = NumSrcElements / NumElements;
1343 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1344 for (int i = 0; i < NumElements; ++i)
1345 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1346
1347 return DAG.getNode(
1348 ISD::BITCAST, DL, VT,
1349 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1350}
1351
1352SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1353 SDLoc DL(Node);
1354 EVT VT = Node->getValueType(0);
1355 SDValue Src = Node->getOperand(0);
1356 EVT SrcVT = Src.getValueType();
1357
1358 // First build an any-extend node which can be legalized above when we
1359 // recurse through it.
1360 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1361
1362 // Now we need sign extend. Do this by shifting the elements. Even if these
1363 // aren't legal operations, they have a better chance of being legalized
1364 // without full scalarization than the sign extension does.
1365 unsigned EltWidth = VT.getScalarSizeInBits();
1366 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1367 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1368 return DAG.getNode(ISD::SRA, DL, VT,
1369 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1370 ShiftAmount);
1371}
1372
1373// Generically expand a vector zext in register to a shuffle of the relevant
1374// lanes into the appropriate locations, a blend of zero into the high bits,
1375// and a bitcast to the wider element type.
1376SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1377 SDLoc DL(Node);
1378 EVT VT = Node->getValueType(0);
1379 int NumElements = VT.getVectorNumElements();
1380 SDValue Src = Node->getOperand(0);
1381 EVT SrcVT = Src.getValueType();
1382 int NumSrcElements = SrcVT.getVectorNumElements();
1383
1384 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1385 // into a larger vector type.
1386 if (SrcVT.bitsLE(VT)) {
1387 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1388 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1389 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1390 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1391 NumSrcElements);
1392 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1393 Src, DAG.getVectorIdxConstant(0, DL));
1394 }
1395
1396 // Build up a zero vector to blend into this one.
1397 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1398
1399 // Shuffle the incoming lanes into the correct position, and pull all other
1400 // lanes from the zero vector.
1401 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1402
1403 int ExtLaneScale = NumSrcElements / NumElements;
1404 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1405 for (int i = 0; i < NumElements; ++i)
1406 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1407
1408 return DAG.getNode(ISD::BITCAST, DL, VT,
1409 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1410}
1411
1412static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1413 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1414 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1415 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1416 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1417}
1418
1419SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1420 EVT VT = Node->getValueType(0);
1421
1422 // Scalable vectors can't use shuffle expansion.
1423 if (VT.isScalableVector())
1424 return TLI.expandBSWAP(Node, DAG);
1425
1426 // Generate a byte wise shuffle mask for the BSWAP.
1427 SmallVector<int, 16> ShuffleMask;
1428 createBSWAPShuffleMask(VT, ShuffleMask);
1429 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1430
1431 // Only emit a shuffle if the mask is legal.
1432 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1433 SDLoc DL(Node);
1434 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1435 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1436 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1437 }
1438
1439 // If we have the appropriate vector bit operations, it is better to use them
1440 // than unrolling and expanding each component.
1441 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1442 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1443 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1444 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1445 return TLI.expandBSWAP(Node, DAG);
1446
1447 // Otherwise let the caller unroll.
1448 return SDValue();
1449}
1450
1451SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1452 EVT VT = Node->getValueType(0);
1453
1454 // We can't unroll or use shuffles for scalable vectors.
1455 if (VT.isScalableVector())
1456 return TLI.expandBITREVERSE(Node, DAG);
1457
1458 // If we have the scalar operation, it's probably cheaper to unroll it.
1459 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
1460 return SDValue();
1461
1462 // If the vector element width is a whole number of bytes, test if its legal
1463 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1464 // vector. This greatly reduces the number of bit shifts necessary.
1465 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1466 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1467 SmallVector<int, 16> BSWAPMask;
1468 createBSWAPShuffleMask(VT, BSWAPMask);
1469
1470 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1471 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1472 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1473 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1474 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1475 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1476 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1477 SDLoc DL(Node);
1478 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1479 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1480 BSWAPMask);
1481 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1482 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1483 return Op;
1484 }
1485 }
1486
1487 // If we have the appropriate vector bit operations, it is better to use them
1488 // than unrolling and expanding each component.
1489 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1490 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1491 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1492 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1493 return TLI.expandBITREVERSE(Node, DAG);
1494
1495 // Otherwise unroll.
1496 return SDValue();
1497}
1498
1499SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1500 // Implement VSELECT in terms of XOR, AND, OR
1501 // on platforms which do not support blend natively.
1502 SDLoc DL(Node);
1503
1504 SDValue Mask = Node->getOperand(0);
1505 SDValue Op1 = Node->getOperand(1);
1506 SDValue Op2 = Node->getOperand(2);
1507
1508 EVT VT = Mask.getValueType();
1509
1510 // If we can't even use the basic vector operations of
1511 // AND,OR,XOR, we will have to scalarize the op.
1512 // Notice that the operation may be 'promoted' which means that it is
1513 // 'bitcasted' to another type which is handled.
1514 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1515 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1516 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1517 return SDValue();
1518
1519 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1520 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1521 // vector constant to mask with.
1522 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1523 auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1524 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1525 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1526 Op1.getValueType().getVectorElementType() == MVT::i1))
1527 return SDValue();
1528
1529 // If the mask and the type are different sizes, unroll the vector op. This
1530 // can occur when getSetCCResultType returns something that is different in
1531 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1532 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1533 return SDValue();
1534
1535 // Bitcast the operands to be the same type as the mask.
1536 // This is needed when we select between FP types because
1537 // the mask is a vector of integers.
1538 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1539 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1540
1541 SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1542
1543 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1544 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1545 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1546 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1547}
1548
1549SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1550 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1551 // do not support it natively.
1552 SDLoc DL(Node);
1553
1554 SDValue Mask = Node->getOperand(0);
1555 SDValue Op1 = Node->getOperand(1);
1556 SDValue Op2 = Node->getOperand(2);
1557 SDValue EVL = Node->getOperand(3);
1558
1559 EVT VT = Mask.getValueType();
1560
1561 // If we can't even use the basic vector operations of
1562 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1563 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1564 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1565 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1566 return SDValue();
1567
1568 // This operation also isn't safe when the operands aren't also booleans.
1569 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1570 return SDValue();
1571
1572 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1573 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1574
1575 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1576 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1577 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1578}
1579
1580SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1581 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1582 // indices less than the EVL/pivot are true. Combine that with the original
1583 // mask for a full-length mask. Use a full-length VSELECT to select between
1584 // the true and false values.
1585 SDLoc DL(Node);
1586
1587 SDValue Mask = Node->getOperand(0);
1588 SDValue Op1 = Node->getOperand(1);
1589 SDValue Op2 = Node->getOperand(2);
1590 SDValue EVL = Node->getOperand(3);
1591
1592 EVT MaskVT = Mask.getValueType();
1593 bool IsFixedLen = MaskVT.isFixedLengthVector();
1594
1595 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1596 MaskVT.getVectorElementCount());
1597
1598 // If we can't construct the EVL mask efficiently, it's better to unroll.
1599 if ((IsFixedLen &&
1600 !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
1601 (!IsFixedLen &&
1602 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1603 !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
1604 return SDValue();
1605
1606 // If using a SETCC would result in a different type than the mask type,
1607 // unroll.
1608 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1609 EVLVecVT) != MaskVT)
1610 return SDValue();
1611
1612 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1613 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1614 SDValue EVLMask =
1615 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1616
1617 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1618 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1619}
1620
1621SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1622 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1623 EVT VT = Node->getValueType(0);
1624
1625 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1626
1627 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1628 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1629 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1630 return SDValue();
1631
1632 SDLoc DL(Node);
1633
1634 SDValue Dividend = Node->getOperand(0);
1635 SDValue Divisor = Node->getOperand(1);
1636 SDValue Mask = Node->getOperand(2);
1637 SDValue EVL = Node->getOperand(3);
1638
1639 // X % Y -> X-X/Y*Y
1640 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1641 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1642 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1643}
1644
1645SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1646 EVT VT = Node->getValueType(0);
1648
1649 if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1650 return SDValue();
1651
1652 SDValue Mask = Node->getOperand(1);
1653 SDValue EVL = Node->getOperand(2);
1654
1655 SDLoc DL(Node);
1656 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1657 SDValue SignMask = DAG.getConstant(
1658 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1659 SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
1660 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1661}
1662
1663SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1664 EVT VT = Node->getValueType(0);
1666
1667 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
1668 return SDValue();
1669
1670 SDValue Mask = Node->getOperand(1);
1671 SDValue EVL = Node->getOperand(2);
1672
1673 SDLoc DL(Node);
1674 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1675 SDValue ClearSignMask = DAG.getConstant(
1677 SDValue ClearSign =
1678 DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
1679 return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
1680}
1681
1682SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1683 EVT VT = Node->getValueType(0);
1684
1685 if (VT != Node->getOperand(1).getValueType())
1686 return SDValue();
1687
1689 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
1690 !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1691 return SDValue();
1692
1693 SDValue Mask = Node->getOperand(2);
1694 SDValue EVL = Node->getOperand(3);
1695
1696 SDLoc DL(Node);
1697 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1698 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1699
1700 SDValue SignMask = DAG.getConstant(
1701 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1702 SDValue SignBit =
1703 DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
1704
1705 SDValue ClearSignMask = DAG.getConstant(
1707 SDValue ClearedSign =
1708 DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
1709
1710 SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
1711 Mask, EVL, SDNodeFlags::Disjoint);
1712
1713 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1714}
1715
1716void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1718 // Attempt to expand using TargetLowering.
1719 SDValue Result, Chain;
1720 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1721 Results.push_back(Result);
1722 if (Node->isStrictFPOpcode())
1723 Results.push_back(Chain);
1724 return;
1725 }
1726
1727 // Otherwise go ahead and unroll.
1728 if (Node->isStrictFPOpcode()) {
1729 UnrollStrictFPOp(Node, Results);
1730 return;
1731 }
1732
1733 Results.push_back(DAG.UnrollVectorOp(Node));
1734}
1735
1736void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1738 bool IsStrict = Node->isStrictFPOpcode();
1739 unsigned OpNo = IsStrict ? 1 : 0;
1740 SDValue Src = Node->getOperand(OpNo);
1741 EVT VT = Src.getValueType();
1742 SDLoc DL(Node);
1743
1744 // Attempt to expand using TargetLowering.
1746 SDValue Chain;
1747 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1748 Results.push_back(Result);
1749 if (IsStrict)
1750 Results.push_back(Chain);
1751 return;
1752 }
1753
1754 // Make sure that the SINT_TO_FP and SRL instructions are available.
1755 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
1756 TargetLowering::Expand) ||
1757 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
1758 TargetLowering::Expand)) ||
1759 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
1760 if (IsStrict) {
1761 UnrollStrictFPOp(Node, Results);
1762 return;
1763 }
1764
1765 Results.push_back(DAG.UnrollVectorOp(Node));
1766 return;
1767 }
1768
1769 unsigned BW = VT.getScalarSizeInBits();
1770 assert((BW == 64 || BW == 32) &&
1771 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1772
1773 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1774
1775 // Constants to clear the upper part of the word.
1776 // Notice that we can also use SHL+SHR, but using a constant is slightly
1777 // faster on x86.
1778 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1779 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1780
1781 // Two to the power of half-word-size.
1782 SDValue TWOHW =
1783 DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
1784
1785 // Clear upper part of LO, lower HI
1786 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
1787 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
1788
1789 if (IsStrict) {
1790 // Convert hi and lo to floats
1791 // Convert the hi part back to the upper values
1792 // TODO: Can any fast-math-flags be set on these nodes?
1794 {Node->getValueType(0), MVT::Other},
1795 {Node->getOperand(0), HI});
1796 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
1797 {fHI.getValue(1), fHI, TWOHW});
1799 {Node->getValueType(0), MVT::Other},
1800 {Node->getOperand(0), LO});
1801
1802 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
1803 fLO.getValue(1));
1804
1805 // Add the two halves
1806 SDValue Result =
1807 DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
1808 {TF, fHI, fLO});
1809
1810 Results.push_back(Result);
1811 Results.push_back(Result.getValue(1));
1812 return;
1813 }
1814
1815 // Convert hi and lo to floats
1816 // Convert the hi part back to the upper values
1817 // TODO: Can any fast-math-flags be set on these nodes?
1818 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
1819 fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
1820 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
1821
1822 // Add the two halves
1823 Results.push_back(
1824 DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
1825}
1826
1827SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1828 EVT VT = Node->getValueType(0);
1830
1831 if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
1832 return SDValue();
1833
1834 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1835 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1836 !VT.isScalableVector())
1837 return SDValue();
1838
1839 SDLoc DL(Node);
1840 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1841 SDValue SignMask = DAG.getConstant(
1842 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1843 SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
1844 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1845}
1846
1847SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
1848 EVT VT = Node->getValueType(0);
1850
1851 if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
1852 return SDValue();
1853
1854 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1855 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1856 !VT.isScalableVector())
1857 return SDValue();
1858
1859 SDLoc DL(Node);
1860 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1861 SDValue ClearSignMask = DAG.getConstant(
1863 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
1864 return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
1865}
1866
1867SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
1868 EVT VT = Node->getValueType(0);
1870
1871 if (VT != Node->getOperand(1).getValueType() ||
1872 !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
1873 !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
1874 return SDValue();
1875
1876 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1877 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1878 !VT.isScalableVector())
1879 return SDValue();
1880
1881 SDLoc DL(Node);
1882 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1883 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1884
1885 SDValue SignMask = DAG.getConstant(
1886 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1887 SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
1888
1889 SDValue ClearSignMask = DAG.getConstant(
1891 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
1892
1893 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit,
1895
1896 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1897}
1898
1899void VectorLegalizer::ExpandFSUB(SDNode *Node,
1901 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1902 // we can defer this to operation legalization where it will be lowered as
1903 // a+(-b).
1904 EVT VT = Node->getValueType(0);
1905 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1906 TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1907 return; // Defer to LegalizeDAG
1908
1909 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1910 Results.push_back(Expanded);
1911 return;
1912 }
1913
1914 SDValue Tmp = DAG.UnrollVectorOp(Node);
1915 Results.push_back(Tmp);
1916}
1917
1918void VectorLegalizer::ExpandSETCC(SDNode *Node,
1920 bool NeedInvert = false;
1921 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
1922 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
1923 Node->getOpcode() == ISD::STRICT_FSETCCS;
1924 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
1925 unsigned Offset = IsStrict ? 1 : 0;
1926
1927 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
1928 SDValue LHS = Node->getOperand(0 + Offset);
1929 SDValue RHS = Node->getOperand(1 + Offset);
1930 SDValue CC = Node->getOperand(2 + Offset);
1931
1932 MVT OpVT = LHS.getSimpleValueType();
1933 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
1934
1935 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
1936 if (IsStrict) {
1937 UnrollStrictFPOp(Node, Results);
1938 return;
1939 }
1940 Results.push_back(UnrollVSETCC(Node));
1941 return;
1942 }
1943
1944 SDValue Mask, EVL;
1945 if (IsVP) {
1946 Mask = Node->getOperand(3 + Offset);
1947 EVL = Node->getOperand(4 + Offset);
1948 }
1949
1950 SDLoc dl(Node);
1951 bool Legalized =
1952 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
1953 EVL, NeedInvert, dl, Chain, IsSignaling);
1954
1955 if (Legalized) {
1956 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1957 // condition code, create a new SETCC node.
1958 if (CC.getNode()) {
1959 if (IsStrict) {
1960 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
1961 {Chain, LHS, RHS, CC}, Node->getFlags());
1962 Chain = LHS.getValue(1);
1963 } else if (IsVP) {
1964 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
1965 {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
1966 } else {
1967 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
1968 Node->getFlags());
1969 }
1970 }
1971
1972 // If we expanded the SETCC by inverting the condition code, then wrap
1973 // the existing SETCC in a NOT to restore the intended condition.
1974 if (NeedInvert) {
1975 if (!IsVP)
1976 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
1977 else
1978 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
1979 }
1980 } else {
1981 assert(!IsStrict && "Don't know how to expand for strict nodes.");
1982
1983 // Otherwise, SETCC for the given comparison type must be completely
1984 // illegal; expand it into a SELECT_CC.
1985 EVT VT = Node->getValueType(0);
1986 LHS =
1987 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
1988 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
1989 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
1990 LHS->setFlags(Node->getFlags());
1991 }
1992
1993 Results.push_back(LHS);
1994 if (IsStrict)
1995 Results.push_back(Chain);
1996}
1997
1998void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2000 SDValue Result, Overflow;
2001 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2002 Results.push_back(Result);
2003 Results.push_back(Overflow);
2004}
2005
2006void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2008 SDValue Result, Overflow;
2009 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2010 Results.push_back(Result);
2011 Results.push_back(Overflow);
2012}
2013
2014void VectorLegalizer::ExpandMULO(SDNode *Node,
2016 SDValue Result, Overflow;
2017 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2018 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
2019
2020 Results.push_back(Result);
2021 Results.push_back(Overflow);
2022}
2023
2024void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2026 SDNode *N = Node;
2027 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
2028 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
2029 Results.push_back(Expanded);
2030}
2031
2032void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2034 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2035 ExpandUINT_TO_FLOAT(Node, Results);
2036 return;
2037 }
2038 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2039 ExpandFP_TO_UINT(Node, Results);
2040 return;
2041 }
2042
2043 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2044 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2045 ExpandSETCC(Node, Results);
2046 return;
2047 }
2048
2049 UnrollStrictFPOp(Node, Results);
2050}
2051
2052void VectorLegalizer::ExpandREM(SDNode *Node,
2054 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2055 "Expected REM node");
2056
2058 if (!TLI.expandREM(Node, Result, DAG))
2059 Result = DAG.UnrollVectorOp(Node);
2060 Results.push_back(Result);
2061}
2062
2063// Try to expand libm nodes into vector math routine calls. Callers provide the
2064// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2065// within TargetLibraryInfo. The only mappings considered are those where the
2066// result and all operands are the same vector type. While predicated nodes are
2067// not supported, we will emit calls to masked routines by passing in an all
2068// true mask.
2069bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2071 // Chain must be propagated but currently strict fp operations are down
2072 // converted to their none strict counterpart.
2073 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2074
2075 const char *LCName = TLI.getLibcallName(LC);
2076 if (!LCName)
2077 return false;
2078 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
2079
2080 EVT VT = Node->getValueType(0);
2082
2083 // Lookup a vector function equivalent to the specified libcall. Prefer
2084 // unmasked variants but we will generate a mask if need be.
2085 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
2086 const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false);
2087 if (!VD)
2088 VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true);
2089 if (!VD)
2090 return false;
2091
2092 LLVMContext *Ctx = DAG.getContext();
2093 Type *Ty = VT.getTypeForEVT(*Ctx);
2094 Type *ScalarTy = Ty->getScalarType();
2095
2096 // Construct a scalar function type based on Node's operands.
2098 for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
2099 assert(Node->getOperand(i).getValueType() == VT &&
2100 "Expected matching vector types!");
2101 ArgTys.push_back(ScalarTy);
2102 }
2103 FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false);
2104
2105 // Generate call information for the vector function.
2106 const std::string MangledName = VD->getVectorFunctionABIVariantString();
2107 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
2108 if (!OptVFInfo)
2109 return false;
2110
2111 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
2112 << "\n");
2113
2114 // Sanity check just in case OptVFInfo has unexpected parameters.
2115 if (OptVFInfo->Shape.Parameters.size() !=
2116 Node->getNumOperands() + VD->isMasked())
2117 return false;
2118
2119 // Collect vector call operands.
2120
2121 SDLoc DL(Node);
2124 Entry.IsSExt = false;
2125 Entry.IsZExt = false;
2126
2127 unsigned OpNum = 0;
2128 for (auto &VFParam : OptVFInfo->Shape.Parameters) {
2129 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
2130 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT);
2131 Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT);
2132 Entry.Ty = MaskVT.getTypeForEVT(*Ctx);
2133 Args.push_back(Entry);
2134 continue;
2135 }
2136
2137 // Only vector operands are supported.
2138 if (VFParam.ParamKind != VFParamKind::Vector)
2139 return false;
2140
2141 Entry.Node = Node->getOperand(OpNum++);
2142 Entry.Ty = Ty;
2143 Args.push_back(Entry);
2144 }
2145
2146 // Emit a call to the vector function.
2147 SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(),
2148 TLI.getPointerTy(DAG.getDataLayout()));
2150 CLI.setDebugLoc(DL)
2151 .setChain(DAG.getEntryNode())
2152 .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args));
2153
2154 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2155 Results.push_back(CallResult.first);
2156 return true;
2157}
2158
2159/// Try to expand the node to a vector libcall based on the result type.
2160bool VectorLegalizer::tryExpandVecMathCall(
2161 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
2162 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
2165 Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64,
2166 Call_F80, Call_F128, Call_PPCF128);
2167
2168 if (LC == RTLIB::UNKNOWN_LIBCALL)
2169 return false;
2170
2171 return tryExpandVecMathCall(Node, LC, Results);
2172}
2173
2174void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2176 EVT VT = Node->getValueType(0);
2177 EVT EltVT = VT.getVectorElementType();
2178 unsigned NumElems = VT.getVectorNumElements();
2179 unsigned NumOpers = Node->getNumOperands();
2180 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2181
2182 EVT TmpEltVT = EltVT;
2183 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2184 Node->getOpcode() == ISD::STRICT_FSETCCS)
2185 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
2186 *DAG.getContext(), TmpEltVT);
2187
2188 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2189 SDValue Chain = Node->getOperand(0);
2190 SDLoc dl(Node);
2191
2192 SmallVector<SDValue, 32> OpValues;
2193 SmallVector<SDValue, 32> OpChains;
2194 for (unsigned i = 0; i < NumElems; ++i) {
2196 SDValue Idx = DAG.getVectorIdxConstant(i, dl);
2197
2198 // The Chain is the first operand.
2199 Opers.push_back(Chain);
2200
2201 // Now process the remaining operands.
2202 for (unsigned j = 1; j < NumOpers; ++j) {
2203 SDValue Oper = Node->getOperand(j);
2204 EVT OperVT = Oper.getValueType();
2205
2206 if (OperVT.isVector())
2207 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2208 OperVT.getVectorElementType(), Oper, Idx);
2209
2210 Opers.push_back(Oper);
2211 }
2212
2213 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
2214 SDValue ScalarResult = ScalarOp.getValue(0);
2215 SDValue ScalarChain = ScalarOp.getValue(1);
2216
2217 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2218 Node->getOpcode() == ISD::STRICT_FSETCCS)
2219 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
2220 DAG.getAllOnesConstant(dl, EltVT),
2221 DAG.getConstant(0, dl, EltVT));
2222
2223 OpValues.push_back(ScalarResult);
2224 OpChains.push_back(ScalarChain);
2225 }
2226
2227 SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
2228 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
2229
2230 Results.push_back(Result);
2231 Results.push_back(NewChain);
2232}
2233
2234SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2235 EVT VT = Node->getValueType(0);
2236 unsigned NumElems = VT.getVectorNumElements();
2237 EVT EltVT = VT.getVectorElementType();
2238 SDValue LHS = Node->getOperand(0);
2239 SDValue RHS = Node->getOperand(1);
2240 SDValue CC = Node->getOperand(2);
2241 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2242 SDLoc dl(Node);
2243 SmallVector<SDValue, 8> Ops(NumElems);
2244 for (unsigned i = 0; i < NumElems; ++i) {
2245 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
2246 DAG.getVectorIdxConstant(i, dl));
2247 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
2248 DAG.getVectorIdxConstant(i, dl));
2249 Ops[i] = DAG.getNode(ISD::SETCC, dl,
2250 TLI.getSetCCResultType(DAG.getDataLayout(),
2251 *DAG.getContext(), TmpEltVT),
2252 LHSElem, RHSElem, CC);
2253 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
2254 DAG.getConstant(0, dl, EltVT));
2255 }
2256 return DAG.getBuildVector(VT, dl, Ops);
2257}
2258
2260 return VectorLegalizer(*this).Run();
2261}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl< int > &ShuffleMask)
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
BinaryOperator * Mul
DEMANGLE_DUMP_METHOD void dump() const
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
This class represents an Operation in the Expression.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
size_t size() const
Definition: Function.h:858
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool LegalizeVectors()
This transforms the SelectionDAG into a SelectionDAG that only uses vector math operations supported ...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:558
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Provides information about what library functions are available for the current target.
const VecDesc * getVectorMappingInfo(StringRef F, const ElementCount &VF, bool Masked) const
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
Provides info so a possible vectorization of a function can be computed.
bool isMasked() const
std::string getVectorFunctionABIVariantString() const
Returns a vector function ABI variant string on the form: ZGV<isa><mask><vlen><vparams><scalarname>(<...
StringRef getVectorFnName() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ FATAN2
FATAN2 - atan2, inspired by libm.
Definition: ISDOpcodes.h:999
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FPTRUNC_ROUND
FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
Definition: ISDOpcodes.h:495
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition: ISDOpcodes.h:705
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VECREDUCE_SEQ_FMUL
Definition: ISDOpcodes.h:1418
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
Libcall getFSINCOS(EVT RetVT)
getFSINCOS - Return the FSINCOS_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPLibCall(EVT VT, Libcall Call_F32, Libcall Call_F64, Libcall Call_F80, Libcall Call_F128, Libcall Call_PPCF128)
GetFPLibCall - Helper to return the right libcall for the given floating point type,...
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Xor
Bitwise or logical XOR of integers.
DWARFExpression::Operation Op
#define N
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.