LLVM 17.0.0git
LegalizeVectorOps.cpp
Go to the documentation of this file.
1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
36#include "llvm/IR/DataLayout.h"
39#include "llvm/Support/Debug.h"
42#include <cassert>
43#include <cstdint>
44#include <iterator>
45#include <utility>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "legalizevectorops"
50
51namespace {
52
53class VectorLegalizer {
54 SelectionDAG& DAG;
55 const TargetLowering &TLI;
56 bool Changed = false; // Keep track of whether anything changed
57
58 /// For nodes that are of legal width, and that have more than one use, this
59 /// map indicates what regularized operand to use. This allows us to avoid
60 /// legalizing the same thing more than once.
62
63 /// Adds a node to the translation cache.
64 void AddLegalizedOperand(SDValue From, SDValue To) {
65 LegalizedNodes.insert(std::make_pair(From, To));
66 // If someone requests legalization of the new node, return itself.
67 if (From != To)
68 LegalizedNodes.insert(std::make_pair(To, To));
69 }
70
71 /// Legalizes the given node.
72 SDValue LegalizeOp(SDValue Op);
73
74 /// Assuming the node is legal, "legalize" the results.
75 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
76
77 /// Make sure Results are legal and update the translation cache.
78 SDValue RecursivelyLegalizeResults(SDValue Op,
80
81 /// Wrapper to interface LowerOperation with a vector of Results.
82 /// Returns false if the target wants to use default expansion. Otherwise
83 /// returns true. If return is true and the Results are empty, then the
84 /// target wants to keep the input node as is.
85 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
86
87 /// Implements unrolling a VSETCC.
88 SDValue UnrollVSETCC(SDNode *Node);
89
90 /// Implement expand-based legalization of vector operations.
91 ///
92 /// This is just a high-level routine to dispatch to specific code paths for
93 /// operations to legalize them.
95
96 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
97 /// FP_TO_SINT isn't legal.
98 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
99
100 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
101 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
102 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
103
104 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
105 SDValue ExpandSEXTINREG(SDNode *Node);
106
107 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
108 ///
109 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
110 /// type. The contents of the bits in the extended part of each element are
111 /// undef.
112 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
113
114 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
115 ///
116 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
117 /// type, then shifts left and arithmetic shifts right to introduce a sign
118 /// extension.
119 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
120
121 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
122 ///
123 /// Shuffles the low lanes of the operand into place and blends zeros into
124 /// the remaining lanes, finally bitcasting to the proper type.
125 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
126
127 /// Expand bswap of vectors into a shuffle if legal.
128 SDValue ExpandBSWAP(SDNode *Node);
129
130 /// Implement vselect in terms of XOR, AND, OR when blend is not
131 /// supported by the target.
132 SDValue ExpandVSELECT(SDNode *Node);
133 SDValue ExpandVP_SELECT(SDNode *Node);
134 SDValue ExpandVP_MERGE(SDNode *Node);
135 SDValue ExpandVP_REM(SDNode *Node);
136 SDValue ExpandSELECT(SDNode *Node);
137 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
138 SDValue ExpandStore(SDNode *N);
139 SDValue ExpandFNEG(SDNode *Node);
140 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
141 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
142 void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
143 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
144 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
145 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
146 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
147 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149
150 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151
152 /// Implements vector promotion.
153 ///
154 /// This is essentially just bitcasting the operands to a different type and
155 /// bitcasting the result back to the original type.
157
158 /// Implements [SU]INT_TO_FP vector promotion.
159 ///
160 /// This is a [zs]ext of the input operand to a larger integer type.
161 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements FP_TO_[SU]INT vector promotion of the result type.
164 ///
165 /// It is promoted to a larger integer type. The result is then
166 /// truncated back to the original type.
167 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
168
169public:
170 VectorLegalizer(SelectionDAG& dag) :
171 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
172
173 /// Begin legalizer the vector operations in the DAG.
174 bool Run();
175};
176
177} // end anonymous namespace
178
179bool VectorLegalizer::Run() {
180 // Before we start legalizing vector nodes, check if there are any vectors.
181 bool HasVectors = false;
182 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
183 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
184 // Check if the values of the nodes contain vectors. We don't need to check
185 // the operands because we are going to check their values at some point.
186 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
187
188 // If we found a vector node we can start the legalization.
189 if (HasVectors)
190 break;
191 }
192
193 // If this basic block has no vectors then no need to legalize vectors.
194 if (!HasVectors)
195 return false;
196
197 // The legalize process is inherently a bottom-up recursive process (users
198 // legalize their uses before themselves). Given infinite stack space, we
199 // could just start legalizing on the root and traverse the whole graph. In
200 // practice however, this causes us to run out of stack space on large basic
201 // blocks. To avoid this problem, compute an ordering of the nodes where each
202 // node is only legalized after all of its operands are legalized.
203 DAG.AssignTopologicalOrder();
204 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
205 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
206 LegalizeOp(SDValue(&*I, 0));
207
208 // Finally, it's possible the root changed. Get the new root.
209 SDValue OldRoot = DAG.getRoot();
210 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
211 DAG.setRoot(LegalizedNodes[OldRoot]);
212
213 LegalizedNodes.clear();
214
215 // Remove dead nodes now.
216 DAG.RemoveDeadNodes();
217
218 return Changed;
219}
220
221SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
222 assert(Op->getNumValues() == Result->getNumValues() &&
223 "Unexpected number of results");
224 // Generic legalization: just pass the operand through.
225 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
226 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
227 return SDValue(Result, Op.getResNo());
228}
229
231VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
233 assert(Results.size() == Op->getNumValues() &&
234 "Unexpected number of results");
235 // Make sure that the generated code is itself legal.
236 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
237 Results[i] = LegalizeOp(Results[i]);
238 AddLegalizedOperand(Op.getValue(i), Results[i]);
239 }
240
241 return Results[Op.getResNo()];
242}
243
244SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
245 // Note that LegalizeOp may be reentered even from single-use nodes, which
246 // means that we always must cache transformed nodes.
247 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
248 if (I != LegalizedNodes.end()) return I->second;
249
250 // Legalize the operands
252 for (const SDValue &Oper : Op->op_values())
253 Ops.push_back(LegalizeOp(Oper));
254
255 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
256
257 bool HasVectorValueOrOp =
258 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
259 llvm::any_of(Node->op_values(),
260 [](SDValue O) { return O.getValueType().isVector(); });
261 if (!HasVectorValueOrOp)
262 return TranslateLegalizeResults(Op, Node);
263
264 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
265 EVT ValVT;
266 switch (Op.getOpcode()) {
267 default:
268 return TranslateLegalizeResults(Op, Node);
269 case ISD::LOAD: {
270 LoadSDNode *LD = cast<LoadSDNode>(Node);
271 ISD::LoadExtType ExtType = LD->getExtensionType();
272 EVT LoadedVT = LD->getMemoryVT();
273 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
274 Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
275 break;
276 }
277 case ISD::STORE: {
278 StoreSDNode *ST = cast<StoreSDNode>(Node);
279 EVT StVT = ST->getMemoryVT();
280 MVT ValVT = ST->getValue().getSimpleValueType();
281 if (StVT.isVector() && ST->isTruncatingStore())
282 Action = TLI.getTruncStoreAction(ValVT, StVT);
283 break;
284 }
286 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
287 // This operation lies about being legal: when it claims to be legal,
288 // it should actually be expanded.
289 if (Action == TargetLowering::Legal)
290 Action = TargetLowering::Expand;
291 break;
292#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
293 case ISD::STRICT_##DAGN:
294#include "llvm/IR/ConstrainedOps.def"
295 ValVT = Node->getValueType(0);
296 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
297 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
298 ValVT = Node->getOperand(1).getValueType();
299 Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
300 // If we're asked to expand a strict vector floating-point operation,
301 // by default we're going to simply unroll it. That is usually the
302 // best approach, except in the case where the resulting strict (scalar)
303 // operations would themselves use the fallback mutation to non-strict.
304 // In that specific case, just do the fallback on the vector op.
305 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
306 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
307 TargetLowering::Legal) {
308 EVT EltVT = ValVT.getVectorElementType();
309 if (TLI.getOperationAction(Node->getOpcode(), EltVT)
310 == TargetLowering::Expand &&
311 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
312 == TargetLowering::Legal)
313 Action = TargetLowering::Legal;
314 }
315 break;
316 case ISD::ADD:
317 case ISD::SUB:
318 case ISD::MUL:
319 case ISD::MULHS:
320 case ISD::MULHU:
321 case ISD::SDIV:
322 case ISD::UDIV:
323 case ISD::SREM:
324 case ISD::UREM:
325 case ISD::SDIVREM:
326 case ISD::UDIVREM:
327 case ISD::FADD:
328 case ISD::FSUB:
329 case ISD::FMUL:
330 case ISD::FDIV:
331 case ISD::FREM:
332 case ISD::AND:
333 case ISD::OR:
334 case ISD::XOR:
335 case ISD::SHL:
336 case ISD::SRA:
337 case ISD::SRL:
338 case ISD::FSHL:
339 case ISD::FSHR:
340 case ISD::ROTL:
341 case ISD::ROTR:
342 case ISD::ABS:
343 case ISD::BSWAP:
344 case ISD::BITREVERSE:
345 case ISD::CTLZ:
346 case ISD::CTTZ:
349 case ISD::CTPOP:
350 case ISD::SELECT:
351 case ISD::VSELECT:
352 case ISD::SELECT_CC:
353 case ISD::ZERO_EXTEND:
354 case ISD::ANY_EXTEND:
355 case ISD::TRUNCATE:
356 case ISD::SIGN_EXTEND:
357 case ISD::FP_TO_SINT:
358 case ISD::FP_TO_UINT:
359 case ISD::FNEG:
360 case ISD::FABS:
361 case ISD::FMINNUM:
362 case ISD::FMAXNUM:
365 case ISD::FMINIMUM:
366 case ISD::FMAXIMUM:
367 case ISD::FCOPYSIGN:
368 case ISD::FSQRT:
369 case ISD::FSIN:
370 case ISD::FCOS:
371 case ISD::FPOWI:
372 case ISD::FPOW:
373 case ISD::FLOG:
374 case ISD::FLOG2:
375 case ISD::FLOG10:
376 case ISD::FEXP:
377 case ISD::FEXP2:
378 case ISD::FCEIL:
379 case ISD::FTRUNC:
380 case ISD::FRINT:
381 case ISD::FNEARBYINT:
382 case ISD::FROUND:
383 case ISD::FROUNDEVEN:
384 case ISD::FFLOOR:
385 case ISD::FP_ROUND:
386 case ISD::FP_EXTEND:
387 case ISD::FMA:
392 case ISD::SMIN:
393 case ISD::SMAX:
394 case ISD::UMIN:
395 case ISD::UMAX:
396 case ISD::SMUL_LOHI:
397 case ISD::UMUL_LOHI:
398 case ISD::SADDO:
399 case ISD::UADDO:
400 case ISD::SSUBO:
401 case ISD::USUBO:
402 case ISD::SMULO:
403 case ISD::UMULO:
405 case ISD::SADDSAT:
406 case ISD::UADDSAT:
407 case ISD::SSUBSAT:
408 case ISD::USUBSAT:
409 case ISD::SSHLSAT:
410 case ISD::USHLSAT:
413 case ISD::MGATHER:
414 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
415 break;
416 case ISD::SMULFIX:
417 case ISD::SMULFIXSAT:
418 case ISD::UMULFIX:
419 case ISD::UMULFIXSAT:
420 case ISD::SDIVFIX:
421 case ISD::SDIVFIXSAT:
422 case ISD::UDIVFIX:
423 case ISD::UDIVFIXSAT: {
424 unsigned Scale = Node->getConstantOperandVal(2);
425 Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
426 Node->getValueType(0), Scale);
427 break;
428 }
429 case ISD::SINT_TO_FP:
430 case ISD::UINT_TO_FP:
444 Action = TLI.getOperationAction(Node->getOpcode(),
445 Node->getOperand(0).getValueType());
446 break;
449 Action = TLI.getOperationAction(Node->getOpcode(),
450 Node->getOperand(1).getValueType());
451 break;
452 case ISD::SETCC: {
453 MVT OpVT = Node->getOperand(0).getSimpleValueType();
454 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
455 Action = TLI.getCondCodeAction(CCCode, OpVT);
456 if (Action == TargetLowering::Legal)
457 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
458 break;
459 }
460
461#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
462 case ISD::VPID: { \
463 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
464 : Node->getOperand(LEGALPOS).getValueType(); \
465 if (ISD::VPID == ISD::VP_SETCC) { \
466 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
467 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
468 if (Action != TargetLowering::Legal) \
469 break; \
470 } \
471 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
472 } break;
473#include "llvm/IR/VPIntrinsics.def"
474 }
475
476 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
477
478 SmallVector<SDValue, 8> ResultVals;
479 switch (Action) {
480 default: llvm_unreachable("This action is not supported yet!");
481 case TargetLowering::Promote:
482 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
483 "This action is not supported yet!");
484 LLVM_DEBUG(dbgs() << "Promoting\n");
485 Promote(Node, ResultVals);
486 assert(!ResultVals.empty() && "No results for promotion?");
487 break;
488 case TargetLowering::Legal:
489 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
490 break;
491 case TargetLowering::Custom:
492 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
493 if (LowerOperationWrapper(Node, ResultVals))
494 break;
495 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
496 [[fallthrough]];
497 case TargetLowering::Expand:
498 LLVM_DEBUG(dbgs() << "Expanding\n");
499 Expand(Node, ResultVals);
500 break;
501 }
502
503 if (ResultVals.empty())
504 return TranslateLegalizeResults(Op, Node);
505
506 Changed = true;
507 return RecursivelyLegalizeResults(Op, ResultVals);
508}
509
510// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
511// merge them somehow?
512bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
514 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
515
516 if (!Res.getNode())
517 return false;
518
519 if (Res == SDValue(Node, 0))
520 return true;
521
522 // If the original node has one result, take the return value from
523 // LowerOperation as is. It might not be result number 0.
524 if (Node->getNumValues() == 1) {
525 Results.push_back(Res);
526 return true;
527 }
528
529 // If the original node has multiple results, then the return node should
530 // have the same number of results.
531 assert((Node->getNumValues() == Res->getNumValues()) &&
532 "Lowering returned the wrong number of results!");
533
534 // Places new result values base on N result number.
535 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
536 Results.push_back(Res.getValue(I));
537
538 return true;
539}
540
541void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
542 // For a few operations there is a specific concept for promotion based on
543 // the operand's type.
544 switch (Node->getOpcode()) {
545 case ISD::SINT_TO_FP:
546 case ISD::UINT_TO_FP:
549 // "Promote" the operation by extending the operand.
550 PromoteINT_TO_FP(Node, Results);
551 return;
552 case ISD::FP_TO_UINT:
553 case ISD::FP_TO_SINT:
556 // Promote the operation by extending the operand.
557 PromoteFP_TO_INT(Node, Results);
558 return;
559 case ISD::FP_ROUND:
560 case ISD::FP_EXTEND:
561 // These operations are used to do promotion so they can't be promoted
562 // themselves.
563 llvm_unreachable("Don't know how to promote this operation!");
564 }
565
566 // There are currently two cases of vector promotion:
567 // 1) Bitcasting a vector of integers to a different type to a vector of the
568 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
569 // 2) Extending a vector of floats to a vector of the same number of larger
570 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
571 assert(Node->getNumValues() == 1 &&
572 "Can't promote a vector with multiple results!");
573 MVT VT = Node->getSimpleValueType(0);
574 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
575 SDLoc dl(Node);
576 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
577
578 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
579 if (Node->getOperand(j).getValueType().isVector())
580 if (Node->getOperand(j)
581 .getValueType()
582 .getVectorElementType()
583 .isFloatingPoint() &&
585 Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
586 else
587 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
588 else
589 Operands[j] = Node->getOperand(j);
590 }
591
592 SDValue Res =
593 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
594
595 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
598 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
599 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
600 else
601 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
602
603 Results.push_back(Res);
604}
605
606void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
608 // INT_TO_FP operations may require the input operand be promoted even
609 // when the type is otherwise legal.
610 bool IsStrict = Node->isStrictFPOpcode();
611 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
612 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
614 "Vectors have different number of elements!");
615
616 SDLoc dl(Node);
617 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
618
619 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
620 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
623 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
624 if (Node->getOperand(j).getValueType().isVector())
625 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
626 else
627 Operands[j] = Node->getOperand(j);
628 }
629
630 if (IsStrict) {
631 SDValue Res = DAG.getNode(Node->getOpcode(), dl,
632 {Node->getValueType(0), MVT::Other}, Operands);
633 Results.push_back(Res);
634 Results.push_back(Res.getValue(1));
635 return;
636 }
637
638 SDValue Res =
639 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
640 Results.push_back(Res);
641}
642
643// For FP_TO_INT we promote the result type to a vector type with wider
644// elements and then truncate the result. This is different from the default
645// PromoteVector which uses bitcast to promote thus assumning that the
646// promoted vector type has the same overall size.
647void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
649 MVT VT = Node->getSimpleValueType(0);
650 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
651 bool IsStrict = Node->isStrictFPOpcode();
653 "Vectors have different number of elements!");
654
655 unsigned NewOpc = Node->getOpcode();
656 // Change FP_TO_UINT to FP_TO_SINT if possible.
657 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
658 if (NewOpc == ISD::FP_TO_UINT &&
659 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
660 NewOpc = ISD::FP_TO_SINT;
661
662 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
663 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
664 NewOpc = ISD::STRICT_FP_TO_SINT;
665
666 SDLoc dl(Node);
667 SDValue Promoted, Chain;
668 if (IsStrict) {
669 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
670 {Node->getOperand(0), Node->getOperand(1)});
671 Chain = Promoted.getValue(1);
672 } else
673 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
674
675 // Assert that the converted value fits in the original type. If it doesn't
676 // (eg: because the value being converted is too big), then the result of the
677 // original operation was undefined anyway, so the assert is still correct.
678 if (Node->getOpcode() == ISD::FP_TO_UINT ||
679 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
680 NewOpc = ISD::AssertZext;
681 else
682 NewOpc = ISD::AssertSext;
683
684 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
685 DAG.getValueType(VT.getScalarType()));
686 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
687 Results.push_back(Promoted);
688 if (IsStrict)
689 Results.push_back(Chain);
690}
691
692std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
693 LoadSDNode *LD = cast<LoadSDNode>(N);
694 return TLI.scalarizeVectorLoad(LD, DAG);
695}
696
697SDValue VectorLegalizer::ExpandStore(SDNode *N) {
698 StoreSDNode *ST = cast<StoreSDNode>(N);
699 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
700 return TF;
701}
702
703void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
704 switch (Node->getOpcode()) {
705 case ISD::LOAD: {
706 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
707 Results.push_back(Tmp.first);
708 Results.push_back(Tmp.second);
709 return;
710 }
711 case ISD::STORE:
712 Results.push_back(ExpandStore(Node));
713 return;
715 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
716 Results.push_back(Node->getOperand(i));
717 return;
719 Results.push_back(ExpandSEXTINREG(Node));
720 return;
722 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
723 return;
725 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
726 return;
728 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
729 return;
730 case ISD::BSWAP:
731 Results.push_back(ExpandBSWAP(Node));
732 return;
733 case ISD::VP_BSWAP:
734 Results.push_back(TLI.expandVPBSWAP(Node, DAG));
735 return;
736 case ISD::VSELECT:
737 Results.push_back(ExpandVSELECT(Node));
738 return;
739 case ISD::VP_SELECT:
740 Results.push_back(ExpandVP_SELECT(Node));
741 return;
742 case ISD::VP_SREM:
743 case ISD::VP_UREM:
744 if (SDValue Expanded = ExpandVP_REM(Node)) {
745 Results.push_back(Expanded);
746 return;
747 }
748 break;
749 case ISD::SELECT:
750 Results.push_back(ExpandSELECT(Node));
751 return;
752 case ISD::SELECT_CC: {
753 if (Node->getValueType(0).isScalableVector()) {
754 EVT CondVT = TLI.getSetCCResultType(
755 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
756 SDValue SetCC =
757 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
758 Node->getOperand(1), Node->getOperand(4));
759 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
760 Node->getOperand(2),
761 Node->getOperand(3)));
762 return;
763 }
764 break;
765 }
766 case ISD::FP_TO_UINT:
767 ExpandFP_TO_UINT(Node, Results);
768 return;
769 case ISD::UINT_TO_FP:
770 ExpandUINT_TO_FLOAT(Node, Results);
771 return;
772 case ISD::FNEG:
773 Results.push_back(ExpandFNEG(Node));
774 return;
775 case ISD::FSUB:
776 ExpandFSUB(Node, Results);
777 return;
778 case ISD::SETCC:
779 case ISD::VP_SETCC:
780 ExpandSETCC(Node, Results);
781 return;
782 case ISD::ABS:
783 if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
784 Results.push_back(Expanded);
785 return;
786 }
787 break;
788 case ISD::BITREVERSE:
789 ExpandBITREVERSE(Node, Results);
790 return;
791 case ISD::VP_BITREVERSE:
792 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
793 Results.push_back(Expanded);
794 return;
795 }
796 break;
797 case ISD::CTPOP:
798 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
799 Results.push_back(Expanded);
800 return;
801 }
802 break;
803 case ISD::VP_CTPOP:
804 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
805 Results.push_back(Expanded);
806 return;
807 }
808 break;
809 case ISD::CTLZ:
811 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
812 Results.push_back(Expanded);
813 return;
814 }
815 break;
816 case ISD::VP_CTLZ:
817 case ISD::VP_CTLZ_ZERO_UNDEF:
818 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
819 Results.push_back(Expanded);
820 return;
821 }
822 break;
823 case ISD::CTTZ:
825 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
826 Results.push_back(Expanded);
827 return;
828 }
829 break;
830 case ISD::VP_CTTZ:
831 case ISD::VP_CTTZ_ZERO_UNDEF:
832 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
833 Results.push_back(Expanded);
834 return;
835 }
836 break;
837 case ISD::FSHL:
838 case ISD::VP_FSHL:
839 case ISD::FSHR:
840 case ISD::VP_FSHR:
841 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
842 Results.push_back(Expanded);
843 return;
844 }
845 break;
846 case ISD::ROTL:
847 case ISD::ROTR:
848 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
849 Results.push_back(Expanded);
850 return;
851 }
852 break;
853 case ISD::FMINNUM:
854 case ISD::FMAXNUM:
855 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
856 Results.push_back(Expanded);
857 return;
858 }
859 break;
860 case ISD::SMIN:
861 case ISD::SMAX:
862 case ISD::UMIN:
863 case ISD::UMAX:
864 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
865 Results.push_back(Expanded);
866 return;
867 }
868 break;
869 case ISD::UADDO:
870 case ISD::USUBO:
871 ExpandUADDSUBO(Node, Results);
872 return;
873 case ISD::SADDO:
874 case ISD::SSUBO:
875 ExpandSADDSUBO(Node, Results);
876 return;
877 case ISD::UMULO:
878 case ISD::SMULO:
879 ExpandMULO(Node, Results);
880 return;
881 case ISD::USUBSAT:
882 case ISD::SSUBSAT:
883 case ISD::UADDSAT:
884 case ISD::SADDSAT:
885 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
886 Results.push_back(Expanded);
887 return;
888 }
889 break;
890 case ISD::USHLSAT:
891 case ISD::SSHLSAT:
892 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
893 Results.push_back(Expanded);
894 return;
895 }
896 break;
899 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
900 if (Node->getValueType(0).isScalableVector()) {
901 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
902 Results.push_back(Expanded);
903 return;
904 }
905 }
906 break;
907 case ISD::SMULFIX:
908 case ISD::UMULFIX:
909 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
910 Results.push_back(Expanded);
911 return;
912 }
913 break;
914 case ISD::SMULFIXSAT:
915 case ISD::UMULFIXSAT:
916 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
917 // why. Maybe it results in worse codegen compared to the unroll for some
918 // targets? This should probably be investigated. And if we still prefer to
919 // unroll an explanation could be helpful.
920 break;
921 case ISD::SDIVFIX:
922 case ISD::UDIVFIX:
923 ExpandFixedPointDiv(Node, Results);
924 return;
925 case ISD::SDIVFIXSAT:
926 case ISD::UDIVFIXSAT:
927 break;
928#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
929 case ISD::STRICT_##DAGN:
930#include "llvm/IR/ConstrainedOps.def"
931 ExpandStrictFPOp(Node, Results);
932 return;
946 Results.push_back(TLI.expandVecReduce(Node, DAG));
947 return;
950 Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
951 return;
952 case ISD::SREM:
953 case ISD::UREM:
954 ExpandREM(Node, Results);
955 return;
956 case ISD::VP_MERGE:
957 Results.push_back(ExpandVP_MERGE(Node));
958 return;
959 }
960
961 Results.push_back(DAG.UnrollVectorOp(Node));
962}
963
964SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
965 // Lower a select instruction where the condition is a scalar and the
966 // operands are vectors. Lower this select to VSELECT and implement it
967 // using XOR AND OR. The selector bit is broadcasted.
968 EVT VT = Node->getValueType(0);
969 SDLoc DL(Node);
970
971 SDValue Mask = Node->getOperand(0);
972 SDValue Op1 = Node->getOperand(1);
973 SDValue Op2 = Node->getOperand(2);
974
975 assert(VT.isVector() && !Mask.getValueType().isVector()
976 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
977
978 // If we can't even use the basic vector operations of
979 // AND,OR,XOR, we will have to scalarize the op.
980 // Notice that the operation may be 'promoted' which means that it is
981 // 'bitcasted' to another type which is handled.
982 // Also, we need to be able to construct a splat vector using either
983 // BUILD_VECTOR or SPLAT_VECTOR.
984 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
985 // BUILD_VECTOR?
986 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
987 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
988 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
989 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
991 VT) == TargetLowering::Expand)
992 return DAG.UnrollVectorOp(Node);
993
994 // Generate a mask operand.
996
997 // What is the size of each element in the vector mask.
998 EVT BitTy = MaskTy.getScalarType();
999
1000 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1001 DAG.getConstant(0, DL, BitTy));
1002
1003 // Broadcast the mask so that the entire vector is all one or all zero.
1004 Mask = DAG.getSplat(MaskTy, DL, Mask);
1005
1006 // Bitcast the operands to be the same type as the mask.
1007 // This is needed when we select between FP types because
1008 // the mask is a vector of integers.
1009 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1010 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1011
1012 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1013
1014 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1015 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1016 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1017 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1018}
1019
1020SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1021 EVT VT = Node->getValueType(0);
1022
1023 // Make sure that the SRA and SHL instructions are available.
1024 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1025 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1026 return DAG.UnrollVectorOp(Node);
1027
1028 SDLoc DL(Node);
1029 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1030
1031 unsigned BW = VT.getScalarSizeInBits();
1032 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1033 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1034
1035 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1036 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1037}
1038
1039// Generically expand a vector anyext in register to a shuffle of the relevant
1040// lanes into the appropriate locations, with other lanes left undef.
1041SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1042 SDLoc DL(Node);
1043 EVT VT = Node->getValueType(0);
1044 int NumElements = VT.getVectorNumElements();
1045 SDValue Src = Node->getOperand(0);
1046 EVT SrcVT = Src.getValueType();
1047 int NumSrcElements = SrcVT.getVectorNumElements();
1048
1049 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1050 // into a larger vector type.
1051 if (SrcVT.bitsLE(VT)) {
1052 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1053 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1054 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1055 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1056 NumSrcElements);
1057 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1058 Src, DAG.getVectorIdxConstant(0, DL));
1059 }
1060
1061 // Build a base mask of undef shuffles.
1062 SmallVector<int, 16> ShuffleMask;
1063 ShuffleMask.resize(NumSrcElements, -1);
1064
1065 // Place the extended lanes into the correct locations.
1066 int ExtLaneScale = NumSrcElements / NumElements;
1067 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1068 for (int i = 0; i < NumElements; ++i)
1069 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1070
1071 return DAG.getNode(
1072 ISD::BITCAST, DL, VT,
1073 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1074}
1075
1076SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1077 SDLoc DL(Node);
1078 EVT VT = Node->getValueType(0);
1079 SDValue Src = Node->getOperand(0);
1080 EVT SrcVT = Src.getValueType();
1081
1082 // First build an any-extend node which can be legalized above when we
1083 // recurse through it.
1084 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1085
1086 // Now we need sign extend. Do this by shifting the elements. Even if these
1087 // aren't legal operations, they have a better chance of being legalized
1088 // without full scalarization than the sign extension does.
1089 unsigned EltWidth = VT.getScalarSizeInBits();
1090 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1091 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1092 return DAG.getNode(ISD::SRA, DL, VT,
1093 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1094 ShiftAmount);
1095}
1096
1097// Generically expand a vector zext in register to a shuffle of the relevant
1098// lanes into the appropriate locations, a blend of zero into the high bits,
1099// and a bitcast to the wider element type.
1100SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1101 SDLoc DL(Node);
1102 EVT VT = Node->getValueType(0);
1103 int NumElements = VT.getVectorNumElements();
1104 SDValue Src = Node->getOperand(0);
1105 EVT SrcVT = Src.getValueType();
1106 int NumSrcElements = SrcVT.getVectorNumElements();
1107
1108 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1109 // into a larger vector type.
1110 if (SrcVT.bitsLE(VT)) {
1111 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1112 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1113 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1114 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1115 NumSrcElements);
1116 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1117 Src, DAG.getVectorIdxConstant(0, DL));
1118 }
1119
1120 // Build up a zero vector to blend into this one.
1121 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1122
1123 // Shuffle the incoming lanes into the correct position, and pull all other
1124 // lanes from the zero vector.
1125 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1126
1127 int ExtLaneScale = NumSrcElements / NumElements;
1128 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1129 for (int i = 0; i < NumElements; ++i)
1130 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1131
1132 return DAG.getNode(ISD::BITCAST, DL, VT,
1133 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1134}
1135
1136static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1137 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1138 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1139 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1140 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1141}
1142
1143SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1144 EVT VT = Node->getValueType(0);
1145
1146 // Scalable vectors can't use shuffle expansion.
1147 if (VT.isScalableVector())
1148 return TLI.expandBSWAP(Node, DAG);
1149
1150 // Generate a byte wise shuffle mask for the BSWAP.
1151 SmallVector<int, 16> ShuffleMask;
1152 createBSWAPShuffleMask(VT, ShuffleMask);
1153 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1154
1155 // Only emit a shuffle if the mask is legal.
1156 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1157 SDLoc DL(Node);
1158 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1159 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1160 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1161 }
1162
1163 // If we have the appropriate vector bit operations, it is better to use them
1164 // than unrolling and expanding each component.
1165 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1166 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1167 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1168 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1169 return TLI.expandBSWAP(Node, DAG);
1170
1171 // Otherwise unroll.
1172 return DAG.UnrollVectorOp(Node);
1173}
1174
1175void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
1177 EVT VT = Node->getValueType(0);
1178
1179 // We can't unroll or use shuffles for scalable vectors.
1180 if (VT.isScalableVector()) {
1181 Results.push_back(TLI.expandBITREVERSE(Node, DAG));
1182 return;
1183 }
1184
1185 // If we have the scalar operation, it's probably cheaper to unroll it.
1186 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
1187 SDValue Tmp = DAG.UnrollVectorOp(Node);
1188 Results.push_back(Tmp);
1189 return;
1190 }
1191
1192 // If the vector element width is a whole number of bytes, test if its legal
1193 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1194 // vector. This greatly reduces the number of bit shifts necessary.
1195 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1196 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1197 SmallVector<int, 16> BSWAPMask;
1198 createBSWAPShuffleMask(VT, BSWAPMask);
1199
1200 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1201 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1202 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1203 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1204 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1205 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1206 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1207 SDLoc DL(Node);
1208 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1209 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1210 BSWAPMask);
1211 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1212 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1213 Results.push_back(Op);
1214 return;
1215 }
1216 }
1217
1218 // If we have the appropriate vector bit operations, it is better to use them
1219 // than unrolling and expanding each component.
1220 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1221 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1222 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1223 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
1224 Results.push_back(TLI.expandBITREVERSE(Node, DAG));
1225 return;
1226 }
1227
1228 // Otherwise unroll.
1229 SDValue Tmp = DAG.UnrollVectorOp(Node);
1230 Results.push_back(Tmp);
1231}
1232
1233SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1234 // Implement VSELECT in terms of XOR, AND, OR
1235 // on platforms which do not support blend natively.
1236 SDLoc DL(Node);
1237
1238 SDValue Mask = Node->getOperand(0);
1239 SDValue Op1 = Node->getOperand(1);
1240 SDValue Op2 = Node->getOperand(2);
1241
1242 EVT VT = Mask.getValueType();
1243
1244 // If we can't even use the basic vector operations of
1245 // AND,OR,XOR, we will have to scalarize the op.
1246 // Notice that the operation may be 'promoted' which means that it is
1247 // 'bitcasted' to another type which is handled.
1248 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1249 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1250 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1251 return DAG.UnrollVectorOp(Node);
1252
1253 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1254 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1255 // vector constant to mask with.
1256 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1257 auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1258 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1259 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1261 return DAG.UnrollVectorOp(Node);
1262
1263 // If the mask and the type are different sizes, unroll the vector op. This
1264 // can occur when getSetCCResultType returns something that is different in
1265 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1266 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1267 return DAG.UnrollVectorOp(Node);
1268
1269 // Bitcast the operands to be the same type as the mask.
1270 // This is needed when we select between FP types because
1271 // the mask is a vector of integers.
1272 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1273 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1274
1275 SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1276
1277 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1278 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1279 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1280 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1281}
1282
1283SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1284 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1285 // do not support it natively.
1286 SDLoc DL(Node);
1287
1288 SDValue Mask = Node->getOperand(0);
1289 SDValue Op1 = Node->getOperand(1);
1290 SDValue Op2 = Node->getOperand(2);
1291 SDValue EVL = Node->getOperand(3);
1292
1293 EVT VT = Mask.getValueType();
1294
1295 // If we can't even use the basic vector operations of
1296 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1297 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1298 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1299 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1300 return DAG.UnrollVectorOp(Node);
1301
1302 // This operation also isn't safe when the operands aren't also booleans.
1304 return DAG.UnrollVectorOp(Node);
1305
1306 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1307 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
1308
1309 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
1310 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
1311 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
1312}
1313
1314SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1315 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1316 // indices less than the EVL/pivot are true. Combine that with the original
1317 // mask for a full-length mask. Use a full-length VSELECT to select between
1318 // the true and false values.
1319 SDLoc DL(Node);
1320
1321 SDValue Mask = Node->getOperand(0);
1322 SDValue Op1 = Node->getOperand(1);
1323 SDValue Op2 = Node->getOperand(2);
1324 SDValue EVL = Node->getOperand(3);
1325
1326 EVT MaskVT = Mask.getValueType();
1327 bool IsFixedLen = MaskVT.isFixedLengthVector();
1328
1329 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1330 MaskVT.getVectorElementCount());
1331
1332 // If we can't construct the EVL mask efficiently, it's better to unroll.
1333 if ((IsFixedLen &&
1334 !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
1335 (!IsFixedLen &&
1336 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1337 !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
1338 return DAG.UnrollVectorOp(Node);
1339
1340 // If using a SETCC would result in a different type than the mask type,
1341 // unroll.
1342 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1343 EVLVecVT) != MaskVT)
1344 return DAG.UnrollVectorOp(Node);
1345
1346 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1347 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1348 SDValue EVLMask =
1349 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1350
1351 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1352 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1353}
1354
1355SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1356 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1357 EVT VT = Node->getValueType(0);
1358
1359 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1360
1361 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1362 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1363 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1364 return SDValue();
1365
1366 SDLoc DL(Node);
1367
1368 SDValue Dividend = Node->getOperand(0);
1369 SDValue Divisor = Node->getOperand(1);
1370 SDValue Mask = Node->getOperand(2);
1371 SDValue EVL = Node->getOperand(3);
1372
1373 // X % Y -> X-X/Y*Y
1374 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1375 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1376 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1377}
1378
1379void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1381 // Attempt to expand using TargetLowering.
1382 SDValue Result, Chain;
1383 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1384 Results.push_back(Result);
1385 if (Node->isStrictFPOpcode())
1386 Results.push_back(Chain);
1387 return;
1388 }
1389
1390 // Otherwise go ahead and unroll.
1391 if (Node->isStrictFPOpcode()) {
1392 UnrollStrictFPOp(Node, Results);
1393 return;
1394 }
1395
1396 Results.push_back(DAG.UnrollVectorOp(Node));
1397}
1398
1399void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1401 bool IsStrict = Node->isStrictFPOpcode();
1402 unsigned OpNo = IsStrict ? 1 : 0;
1403 SDValue Src = Node->getOperand(OpNo);
1404 EVT VT = Src.getValueType();
1405 SDLoc DL(Node);
1406
1407 // Attempt to expand using TargetLowering.
1409 SDValue Chain;
1410 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1411 Results.push_back(Result);
1412 if (IsStrict)
1413 Results.push_back(Chain);
1414 return;
1415 }
1416
1417 // Make sure that the SINT_TO_FP and SRL instructions are available.
1418 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
1419 TargetLowering::Expand) ||
1420 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
1421 TargetLowering::Expand)) ||
1422 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
1423 if (IsStrict) {
1424 UnrollStrictFPOp(Node, Results);
1425 return;
1426 }
1427
1428 Results.push_back(DAG.UnrollVectorOp(Node));
1429 return;
1430 }
1431
1432 unsigned BW = VT.getScalarSizeInBits();
1433 assert((BW == 64 || BW == 32) &&
1434 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1435
1436 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1437
1438 // Constants to clear the upper part of the word.
1439 // Notice that we can also use SHL+SHR, but using a constant is slightly
1440 // faster on x86.
1441 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1442 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1443
1444 // Two to the power of half-word-size.
1445 SDValue TWOHW =
1446 DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
1447
1448 // Clear upper part of LO, lower HI
1449 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
1450 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
1451
1452 if (IsStrict) {
1453 // Convert hi and lo to floats
1454 // Convert the hi part back to the upper values
1455 // TODO: Can any fast-math-flags be set on these nodes?
1457 {Node->getValueType(0), MVT::Other},
1458 {Node->getOperand(0), HI});
1459 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
1460 {fHI.getValue(1), fHI, TWOHW});
1462 {Node->getValueType(0), MVT::Other},
1463 {Node->getOperand(0), LO});
1464
1466 fLO.getValue(1));
1467
1468 // Add the two halves
1469 SDValue Result =
1470 DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
1471 {TF, fHI, fLO});
1472
1473 Results.push_back(Result);
1474 Results.push_back(Result.getValue(1));
1475 return;
1476 }
1477
1478 // Convert hi and lo to floats
1479 // Convert the hi part back to the upper values
1480 // TODO: Can any fast-math-flags be set on these nodes?
1481 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
1482 fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
1483 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
1484
1485 // Add the two halves
1486 Results.push_back(
1487 DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
1488}
1489
1490SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1491 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
1492 SDLoc DL(Node);
1493 SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
1494 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1495 return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
1496 Node->getOperand(0));
1497 }
1498 return DAG.UnrollVectorOp(Node);
1499}
1500
1501void VectorLegalizer::ExpandFSUB(SDNode *Node,
1503 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1504 // we can defer this to operation legalization where it will be lowered as
1505 // a+(-b).
1506 EVT VT = Node->getValueType(0);
1507 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1508 TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1509 return; // Defer to LegalizeDAG
1510
1511 SDValue Tmp = DAG.UnrollVectorOp(Node);
1512 Results.push_back(Tmp);
1513}
1514
1515void VectorLegalizer::ExpandSETCC(SDNode *Node,
1517 bool NeedInvert = false;
1518 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
1519 SDLoc dl(Node);
1520 MVT OpVT = Node->getOperand(0).getSimpleValueType();
1521 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
1522
1523 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
1524 Results.push_back(UnrollVSETCC(Node));
1525 return;
1526 }
1527
1528 SDValue Chain;
1529 SDValue LHS = Node->getOperand(0);
1530 SDValue RHS = Node->getOperand(1);
1531 SDValue CC = Node->getOperand(2);
1532 SDValue Mask, EVL;
1533 if (IsVP) {
1534 Mask = Node->getOperand(3);
1535 EVL = Node->getOperand(4);
1536 }
1537
1538 bool Legalized =
1539 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
1540 EVL, NeedInvert, dl, Chain);
1541
1542 if (Legalized) {
1543 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1544 // condition code, create a new SETCC node.
1545 if (CC.getNode()) {
1546 if (!IsVP)
1547 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
1548 Node->getFlags());
1549 else
1550 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
1551 {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
1552 }
1553
1554 // If we expanded the SETCC by inverting the condition code, then wrap
1555 // the existing SETCC in a NOT to restore the intended condition.
1556 if (NeedInvert) {
1557 if (!IsVP)
1558 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
1559 else
1560 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
1561 }
1562 } else {
1563 // Otherwise, SETCC for the given comparison type must be completely
1564 // illegal; expand it into a SELECT_CC.
1565 EVT VT = Node->getValueType(0);
1566 LHS =
1567 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
1568 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
1569 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
1570 LHS->setFlags(Node->getFlags());
1571 }
1572
1573 Results.push_back(LHS);
1574}
1575
1576void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
1578 SDValue Result, Overflow;
1579 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
1580 Results.push_back(Result);
1581 Results.push_back(Overflow);
1582}
1583
1584void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
1586 SDValue Result, Overflow;
1587 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
1588 Results.push_back(Result);
1589 Results.push_back(Overflow);
1590}
1591
1592void VectorLegalizer::ExpandMULO(SDNode *Node,
1594 SDValue Result, Overflow;
1595 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
1596 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
1597
1598 Results.push_back(Result);
1599 Results.push_back(Overflow);
1600}
1601
1602void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
1604 SDNode *N = Node;
1605 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
1606 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
1607 Results.push_back(Expanded);
1608}
1609
1610void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
1612 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
1613 ExpandUINT_TO_FLOAT(Node, Results);
1614 return;
1615 }
1616 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
1617 ExpandFP_TO_UINT(Node, Results);
1618 return;
1619 }
1620
1621 UnrollStrictFPOp(Node, Results);
1622}
1623
1624void VectorLegalizer::ExpandREM(SDNode *Node,
1626 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
1627 "Expected REM node");
1628
1630 if (!TLI.expandREM(Node, Result, DAG))
1631 Result = DAG.UnrollVectorOp(Node);
1632 Results.push_back(Result);
1633}
1634
1635void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
1637 EVT VT = Node->getValueType(0);
1638 EVT EltVT = VT.getVectorElementType();
1639 unsigned NumElems = VT.getVectorNumElements();
1640 unsigned NumOpers = Node->getNumOperands();
1641 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1642
1643 EVT TmpEltVT = EltVT;
1644 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1645 Node->getOpcode() == ISD::STRICT_FSETCCS)
1646 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
1647 *DAG.getContext(), TmpEltVT);
1648
1649 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
1650 SDValue Chain = Node->getOperand(0);
1651 SDLoc dl(Node);
1652
1653 SmallVector<SDValue, 32> OpValues;
1654 SmallVector<SDValue, 32> OpChains;
1655 for (unsigned i = 0; i < NumElems; ++i) {
1657 SDValue Idx = DAG.getVectorIdxConstant(i, dl);
1658
1659 // The Chain is the first operand.
1660 Opers.push_back(Chain);
1661
1662 // Now process the remaining operands.
1663 for (unsigned j = 1; j < NumOpers; ++j) {
1664 SDValue Oper = Node->getOperand(j);
1665 EVT OperVT = Oper.getValueType();
1666
1667 if (OperVT.isVector())
1668 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1669 OperVT.getVectorElementType(), Oper, Idx);
1670
1671 Opers.push_back(Oper);
1672 }
1673
1674 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
1675 SDValue ScalarResult = ScalarOp.getValue(0);
1676 SDValue ScalarChain = ScalarOp.getValue(1);
1677
1678 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1679 Node->getOpcode() == ISD::STRICT_FSETCCS)
1680 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
1681 DAG.getAllOnesConstant(dl, EltVT),
1682 DAG.getConstant(0, dl, EltVT));
1683
1684 OpValues.push_back(ScalarResult);
1685 OpChains.push_back(ScalarChain);
1686 }
1687
1688 SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1689 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1690
1691 Results.push_back(Result);
1692 Results.push_back(NewChain);
1693}
1694
1695SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
1696 EVT VT = Node->getValueType(0);
1697 unsigned NumElems = VT.getVectorNumElements();
1698 EVT EltVT = VT.getVectorElementType();
1699 SDValue LHS = Node->getOperand(0);
1700 SDValue RHS = Node->getOperand(1);
1701 SDValue CC = Node->getOperand(2);
1702 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1703 SDLoc dl(Node);
1704 SmallVector<SDValue, 8> Ops(NumElems);
1705 for (unsigned i = 0; i < NumElems; ++i) {
1706 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1707 DAG.getVectorIdxConstant(i, dl));
1708 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1709 DAG.getVectorIdxConstant(i, dl));
1710 Ops[i] = DAG.getNode(ISD::SETCC, dl,
1711 TLI.getSetCCResultType(DAG.getDataLayout(),
1712 *DAG.getContext(), TmpEltVT),
1713 LHSElem, RHSElem, CC);
1714 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
1715 DAG.getConstant(0, dl, EltVT));
1716 }
1717 return DAG.getBuildVector(VT, dl, Ops);
1718}
1719
1721 return VectorLegalizer(*this).Run();
1722}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl< int > &ShuffleMask)
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
BinaryOperator * Mul
DEMANGLE_DUMP_METHOD void dump() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
size_t size() const
Definition: Function.h:761
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool LegalizeVectors()
This transforms the SelectionDAG into a SelectionDAG that only uses vector math operations supported ...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class is used to represent ISD::STORE nodes.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:749
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:722
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1276
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1303
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:713
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:367
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:963
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:373
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:779
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:786
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1292
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:687
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:816
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1302
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:380
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:773
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:449
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1289
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:721
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1293
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:923
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:645
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:726
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1304
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:626
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1297
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:650
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:962
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:852
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:741
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:955
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:359
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:805
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:794
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:673
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:386
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:883
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:735
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:448
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1305
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:442
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:441
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:968
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:832
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1290
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:865
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:827
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:851
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:782
@ VECREDUCE_SEQ_FMUL
Definition: ISDOpcodes.h:1277
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1427
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
#define N
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:333
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:363
bool isFixedLengthVector() const
Definition: ValueTypes.h:170
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:306
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:166
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:291