Line data Source code
1 : //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the SelectionDAG::LegalizeVectors method.
11 : //
12 : // The vector legalizer looks for vector operations which might need to be
13 : // scalarized and legalizes them. This is a separate step from Legalize because
14 : // scalarizing can introduce illegal types. For example, suppose we have an
15 : // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
16 : // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17 : // operation, which introduces nodes with the illegal type i64 which must be
18 : // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19 : // the operation must be unrolled, which introduces nodes with the illegal
20 : // type i8 which must be promoted.
21 : //
22 : // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23 : // or operations that happen to take a vector which are custom-lowered;
24 : // the legalization for such operations never produces nodes
25 : // with illegal types, so it's okay to put off legalizing them until
26 : // SelectionDAG::Legalize runs.
27 : //
28 : //===----------------------------------------------------------------------===//
29 :
30 : #include "llvm/ADT/APInt.h"
31 : #include "llvm/ADT/DenseMap.h"
32 : #include "llvm/ADT/SmallVector.h"
33 : #include "llvm/CodeGen/ISDOpcodes.h"
34 : #include "llvm/CodeGen/MachineMemOperand.h"
35 : #include "llvm/CodeGen/SelectionDAG.h"
36 : #include "llvm/CodeGen/SelectionDAGNodes.h"
37 : #include "llvm/CodeGen/TargetLowering.h"
38 : #include "llvm/CodeGen/ValueTypes.h"
39 : #include "llvm/IR/DataLayout.h"
40 : #include "llvm/Support/Casting.h"
41 : #include "llvm/Support/Compiler.h"
42 : #include "llvm/Support/ErrorHandling.h"
43 : #include "llvm/Support/MachineValueType.h"
44 : #include "llvm/Support/MathExtras.h"
45 : #include <cassert>
46 : #include <cstdint>
47 : #include <iterator>
48 : #include <utility>
49 :
50 : using namespace llvm;
51 :
52 : #define DEBUG_TYPE "legalizevectorops"
53 :
54 : namespace {
55 :
56 : class VectorLegalizer {
57 : SelectionDAG& DAG;
58 : const TargetLowering &TLI;
59 : bool Changed = false; // Keep track of whether anything changed
60 :
61 : /// For nodes that are of legal width, and that have more than one use, this
62 : /// map indicates what regularized operand to use. This allows us to avoid
63 : /// legalizing the same thing more than once.
64 : SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
65 :
66 : /// Adds a node to the translation cache.
67 0 : void AddLegalizedOperand(SDValue From, SDValue To) {
68 0 : LegalizedNodes.insert(std::make_pair(From, To));
69 : // If someone requests legalization of the new node, return itself.
70 : if (From != To)
71 0 : LegalizedNodes.insert(std::make_pair(To, To));
72 0 : }
73 :
74 : /// Legalizes the given node.
75 : SDValue LegalizeOp(SDValue Op);
76 :
77 : /// Assuming the node is legal, "legalize" the results.
78 : SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
79 :
80 : /// Implements unrolling a VSETCC.
81 : SDValue UnrollVSETCC(SDValue Op);
82 :
83 : /// Implement expand-based legalization of vector operations.
84 : ///
85 : /// This is just a high-level routine to dispatch to specific code paths for
86 : /// operations to legalize them.
87 : SDValue Expand(SDValue Op);
88 :
89 : /// Implements expansion for FNEG; falls back to UnrollVectorOp if
90 : /// FSUB isn't legal.
91 : ///
92 : /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
93 : /// SINT_TO_FLOAT and SHR on vectors isn't legal.
94 : SDValue ExpandUINT_TO_FLOAT(SDValue Op);
95 :
96 : /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
97 : SDValue ExpandSEXTINREG(SDValue Op);
98 :
99 : /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
100 : ///
101 : /// Shuffles the low lanes of the operand into place and bitcasts to the proper
102 : /// type. The contents of the bits in the extended part of each element are
103 : /// undef.
104 : SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
105 :
106 : /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
107 : ///
108 : /// Shuffles the low lanes of the operand into place, bitcasts to the proper
109 : /// type, then shifts left and arithmetic shifts right to introduce a sign
110 : /// extension.
111 : SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
112 :
113 : /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
114 : ///
115 : /// Shuffles the low lanes of the operand into place and blends zeros into
116 : /// the remaining lanes, finally bitcasting to the proper type.
117 : SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
118 :
119 : /// Expand bswap of vectors into a shuffle if legal.
120 : SDValue ExpandBSWAP(SDValue Op);
121 :
122 : /// Implement vselect in terms of XOR, AND, OR when blend is not
123 : /// supported by the target.
124 : SDValue ExpandVSELECT(SDValue Op);
125 : SDValue ExpandSELECT(SDValue Op);
126 : SDValue ExpandLoad(SDValue Op);
127 : SDValue ExpandStore(SDValue Op);
128 : SDValue ExpandFNEG(SDValue Op);
129 : SDValue ExpandFSUB(SDValue Op);
130 : SDValue ExpandBITREVERSE(SDValue Op);
131 : SDValue ExpandCTLZ(SDValue Op);
132 : SDValue ExpandCTTZ(SDValue Op);
133 : SDValue ExpandStrictFPOp(SDValue Op);
134 :
135 : /// Implements vector promotion.
136 : ///
137 : /// This is essentially just bitcasting the operands to a different type and
138 : /// bitcasting the result back to the original type.
139 : SDValue Promote(SDValue Op);
140 :
141 : /// Implements [SU]INT_TO_FP vector promotion.
142 : ///
143 : /// This is a [zs]ext of the input operand to a larger integer type.
144 : SDValue PromoteINT_TO_FP(SDValue Op);
145 :
146 : /// Implements FP_TO_[SU]INT vector promotion of the result type.
147 : ///
148 : /// It is promoted to a larger integer type. The result is then
149 : /// truncated back to the original type.
150 : SDValue PromoteFP_TO_INT(SDValue Op);
151 :
152 : public:
153 1269116 : VectorLegalizer(SelectionDAG& dag) :
154 1269116 : DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
155 :
156 : /// Begin legalizer the vector operations in the DAG.
157 : bool Run();
158 : };
159 :
160 : } // end anonymous namespace
161 :
162 1269116 : bool VectorLegalizer::Run() {
163 : // Before we start legalizing vector nodes, check if there are any vectors.
164 : bool HasVectors = false;
165 1269116 : for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
166 28668143 : E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
167 : // Check if the values of the nodes contain vectors. We don't need to check
168 : // the operands because we are going to check their values at some point.
169 27657639 : for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
170 62957864 : J != E; ++J)
171 35300225 : HasVectors |= J->isVector();
172 :
173 : // If we found a vector node we can start the legalization.
174 27657639 : if (HasVectors)
175 : break;
176 : }
177 :
178 : // If this basic block has no vectors then no need to legalize vectors.
179 1269116 : if (!HasVectors)
180 : return false;
181 :
182 : // The legalize process is inherently a bottom-up recursive process (users
183 : // legalize their uses before themselves). Given infinite stack space, we
184 : // could just start legalizing on the root and traverse the whole graph. In
185 : // practice however, this causes us to run out of stack space on large basic
186 : // blocks. To avoid this problem, compute an ordering of the nodes where each
187 : // node is only legalized after all of its operands are legalized.
188 258612 : DAG.AssignTopologicalOrder();
189 258612 : for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
190 11967797 : E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
191 11709185 : LegalizeOp(SDValue(&*I, 0));
192 :
193 : // Finally, it's possible the root changed. Get the new root.
194 258612 : SDValue OldRoot = DAG.getRoot();
195 : assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
196 258612 : DAG.setRoot(LegalizedNodes[OldRoot]);
197 :
198 258612 : LegalizedNodes.clear();
199 :
200 : // Remove dead nodes now.
201 258612 : DAG.RemoveDeadNodes();
202 :
203 258612 : return Changed;
204 : }
205 :
206 0 : SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
207 : // Generic legalization: just pass the operand through.
208 27070050 : for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
209 15405287 : AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
210 0 : return Result.getValue(Op.getResNo());
211 : }
212 :
213 36489037 : SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
214 : // Note that LegalizeOp may be reentered even from single-use nodes, which
215 : // means that we always must cache transformed nodes.
216 36489037 : DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
217 36489037 : if (I != LegalizedNodes.end()) return I->second;
218 :
219 11894555 : SDNode* Node = Op.getNode();
220 :
221 : // Legalize the operands
222 : SmallVector<SDValue, 8> Ops;
223 36629371 : for (const SDValue &Op : Node->op_values())
224 24734816 : Ops.push_back(LegalizeOp(Op));
225 :
226 11894555 : SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
227 23789110 : Op.getResNo());
228 :
229 23789110 : if (Op.getOpcode() == ISD::LOAD) {
230 : LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
231 : ISD::LoadExtType ExtType = LD->getExtensionType();
232 2087428 : if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
233 : LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
234 : Node->dump(&DAG));
235 8138 : switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
236 : LD->getMemoryVT())) {
237 0 : default: llvm_unreachable("This action is not supported yet!");
238 1501 : case TargetLowering::Legal:
239 1501 : return TranslateLegalizeResults(Op, Result);
240 932 : case TargetLowering::Custom:
241 932 : if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
242 : assert(Lowered->getNumValues() == Op->getNumValues() &&
243 : "Unexpected number of results");
244 932 : Changed = Lowered != Result;
245 932 : return TranslateLegalizeResults(Op, Lowered);
246 932 : }
247 : LLVM_FALLTHROUGH;
248 : case TargetLowering::Expand:
249 1712 : Changed = true;
250 1712 : return LegalizeOp(ExpandLoad(Op));
251 : }
252 : }
253 10850841 : } else if (Op.getOpcode() == ISD::STORE) {
254 : StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
255 1391914 : EVT StVT = ST->getMemoryVT();
256 : MVT ValVT = ST->getValue().getSimpleValueType();
257 1391914 : if (StVT.isVector() && ST->isTruncatingStore()) {
258 : LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
259 : Node->dump(&DAG));
260 1410 : switch (TLI.getTruncStoreAction(ValVT, StVT)) {
261 0 : default: llvm_unreachable("This action is not supported yet!");
262 256 : case TargetLowering::Legal:
263 973 : return TranslateLegalizeResults(Op, Result);
264 244 : case TargetLowering::Custom: {
265 244 : SDValue Lowered = TLI.LowerOperation(Result, DAG);
266 244 : Changed = Lowered != Result;
267 244 : return TranslateLegalizeResults(Op, Lowered);
268 : }
269 217 : case TargetLowering::Expand:
270 217 : Changed = true;
271 217 : return LegalizeOp(ExpandStore(Op));
272 : }
273 : }
274 : }
275 :
276 : bool HasVectorValue = false;
277 11889693 : for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
278 27517654 : J != E;
279 : ++J)
280 15627961 : HasVectorValue |= J->isVector();
281 11889693 : if (!HasVectorValue)
282 10654719 : return TranslateLegalizeResults(Op, Result);
283 :
284 : TargetLowering::LegalizeAction Action = TargetLowering::Legal;
285 1234974 : switch (Op.getOpcode()) {
286 1007111 : default:
287 1007111 : return TranslateLegalizeResults(Op, Result);
288 177 : case ISD::STRICT_FADD:
289 : case ISD::STRICT_FSUB:
290 : case ISD::STRICT_FMUL:
291 : case ISD::STRICT_FDIV:
292 : case ISD::STRICT_FREM:
293 : case ISD::STRICT_FSQRT:
294 : case ISD::STRICT_FMA:
295 : case ISD::STRICT_FPOW:
296 : case ISD::STRICT_FPOWI:
297 : case ISD::STRICT_FSIN:
298 : case ISD::STRICT_FCOS:
299 : case ISD::STRICT_FEXP:
300 : case ISD::STRICT_FEXP2:
301 : case ISD::STRICT_FLOG:
302 : case ISD::STRICT_FLOG10:
303 : case ISD::STRICT_FLOG2:
304 : case ISD::STRICT_FRINT:
305 : case ISD::STRICT_FNEARBYINT:
306 : // These pseudo-ops get legalized as if they were their non-strict
307 : // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
308 : // is also legal, but if ISD::FSQRT requires expansion then so does
309 : // ISD::STRICT_FSQRT.
310 354 : Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
311 : Node->getValueType(0));
312 : break;
313 225480 : case ISD::ADD:
314 : case ISD::SUB:
315 : case ISD::MUL:
316 : case ISD::SDIV:
317 : case ISD::UDIV:
318 : case ISD::SREM:
319 : case ISD::UREM:
320 : case ISD::SDIVREM:
321 : case ISD::UDIVREM:
322 : case ISD::FADD:
323 : case ISD::FSUB:
324 : case ISD::FMUL:
325 : case ISD::FDIV:
326 : case ISD::FREM:
327 : case ISD::AND:
328 : case ISD::OR:
329 : case ISD::XOR:
330 : case ISD::SHL:
331 : case ISD::SRA:
332 : case ISD::SRL:
333 : case ISD::ROTL:
334 : case ISD::ROTR:
335 : case ISD::BSWAP:
336 : case ISD::BITREVERSE:
337 : case ISD::CTLZ:
338 : case ISD::CTTZ:
339 : case ISD::CTLZ_ZERO_UNDEF:
340 : case ISD::CTTZ_ZERO_UNDEF:
341 : case ISD::CTPOP:
342 : case ISD::SELECT:
343 : case ISD::VSELECT:
344 : case ISD::SELECT_CC:
345 : case ISD::SETCC:
346 : case ISD::ZERO_EXTEND:
347 : case ISD::ANY_EXTEND:
348 : case ISD::TRUNCATE:
349 : case ISD::SIGN_EXTEND:
350 : case ISD::FP_TO_SINT:
351 : case ISD::FP_TO_UINT:
352 : case ISD::FNEG:
353 : case ISD::FABS:
354 : case ISD::FMINNUM:
355 : case ISD::FMAXNUM:
356 : case ISD::FMINNAN:
357 : case ISD::FMAXNAN:
358 : case ISD::FCOPYSIGN:
359 : case ISD::FSQRT:
360 : case ISD::FSIN:
361 : case ISD::FCOS:
362 : case ISD::FPOWI:
363 : case ISD::FPOW:
364 : case ISD::FLOG:
365 : case ISD::FLOG2:
366 : case ISD::FLOG10:
367 : case ISD::FEXP:
368 : case ISD::FEXP2:
369 : case ISD::FCEIL:
370 : case ISD::FTRUNC:
371 : case ISD::FRINT:
372 : case ISD::FNEARBYINT:
373 : case ISD::FROUND:
374 : case ISD::FFLOOR:
375 : case ISD::FP_ROUND:
376 : case ISD::FP_EXTEND:
377 : case ISD::FMA:
378 : case ISD::SIGN_EXTEND_INREG:
379 : case ISD::ANY_EXTEND_VECTOR_INREG:
380 : case ISD::SIGN_EXTEND_VECTOR_INREG:
381 : case ISD::ZERO_EXTEND_VECTOR_INREG:
382 : case ISD::SMIN:
383 : case ISD::SMAX:
384 : case ISD::UMIN:
385 : case ISD::UMAX:
386 : case ISD::SMUL_LOHI:
387 : case ISD::UMUL_LOHI:
388 : case ISD::FCANONICALIZE:
389 : case ISD::SADDSAT:
390 450960 : Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
391 225480 : break;
392 0 : case ISD::FP_ROUND_INREG:
393 0 : Action = TLI.getOperationAction(Node->getOpcode(),
394 : cast<VTSDNode>(Node->getOperand(1))->getVT());
395 0 : break;
396 2206 : case ISD::SINT_TO_FP:
397 : case ISD::UINT_TO_FP:
398 2206 : Action = TLI.getOperationAction(Node->getOpcode(),
399 2206 : Node->getOperand(0).getValueType());
400 2206 : break;
401 : }
402 :
403 : LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
404 :
405 227778 : switch (Action) {
406 0 : default: llvm_unreachable("This action is not supported yet!");
407 658 : case TargetLowering::Promote:
408 658 : Result = Promote(Op);
409 658 : Changed = true;
410 658 : break;
411 : case TargetLowering::Legal:
412 : LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
413 : break;
414 57325 : case TargetLowering::Custom: {
415 : LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
416 57325 : if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
417 : LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
418 55935 : Result = Tmp1;
419 55935 : break;
420 : }
421 : LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
422 : LLVM_FALLTHROUGH;
423 : }
424 : case TargetLowering::Expand:
425 9205 : Result = Expand(Op);
426 : }
427 :
428 : // Make sure that the generated code is itself legal.
429 : if (Result != Op) {
430 43107 : Result = LegalizeOp(Result);
431 43107 : Changed = true;
432 : }
433 :
434 : // Note that LegalizeOp may be reentered even from single-use nodes, which
435 : // means that we always must cache transformed nodes.
436 227863 : AddLegalizedOperand(Op, Result);
437 227863 : return Result;
438 : }
439 :
440 658 : SDValue VectorLegalizer::Promote(SDValue Op) {
441 : // For a few operations there is a specific concept for promotion based on
442 : // the operand's type.
443 658 : switch (Op.getOpcode()) {
444 22 : case ISD::SINT_TO_FP:
445 : case ISD::UINT_TO_FP:
446 : // "Promote" the operation by extending the operand.
447 22 : return PromoteINT_TO_FP(Op);
448 171 : case ISD::FP_TO_UINT:
449 : case ISD::FP_TO_SINT:
450 : // Promote the operation by extending the operand.
451 171 : return PromoteFP_TO_INT(Op);
452 : }
453 :
454 : // There are currently two cases of vector promotion:
455 : // 1) Bitcasting a vector of integers to a different type to a vector of the
456 : // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
457 : // 2) Extending a vector of floats to a vector of the same number of larger
458 : // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
459 465 : MVT VT = Op.getSimpleValueType();
460 : assert(Op.getNode()->getNumValues() == 1 &&
461 : "Can't promote a vector with multiple results!");
462 465 : MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
463 : SDLoc dl(Op);
464 930 : SmallVector<SDValue, 4> Operands(Op.getNumOperands());
465 :
466 1461 : for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
467 2988 : if (Op.getOperand(j).getValueType().isVector())
468 : if (Op.getOperand(j)
469 1694 : .getValueType()
470 847 : .getVectorElementType()
471 263 : .isFloatingPoint() &&
472 847 : NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
473 282 : Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
474 : else
475 1412 : Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
476 : else
477 149 : Operands[j] = Op.getOperand(j);
478 : }
479 :
480 1395 : Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
481 930 : if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
482 353 : (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
483 61 : NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
484 224 : return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
485 : else
486 706 : return DAG.getNode(ISD::BITCAST, dl, VT, Op);
487 : }
488 :
489 0 : SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
490 : // INT_TO_FP operations may require the input operand be promoted even
491 : // when the type is otherwise legal.
492 0 : MVT VT = Op.getOperand(0).getSimpleValueType();
493 0 : MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
494 : assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
495 : "Vectors have different number of elements!");
496 :
497 0 : SDLoc dl(Op);
498 0 : SmallVector<SDValue, 4> Operands(Op.getNumOperands());
499 :
500 0 : unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
501 : ISD::SIGN_EXTEND;
502 0 : for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
503 0 : if (Op.getOperand(j).getValueType().isVector())
504 0 : Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
505 : else
506 0 : Operands[j] = Op.getOperand(j);
507 : }
508 :
509 0 : return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
510 : }
511 :
512 : // For FP_TO_INT we promote the result type to a vector type with wider
513 : // elements and then truncate the result. This is different from the default
514 : // PromoteVector which uses bitcast to promote thus assumning that the
515 : // promoted vector type has the same overall size.
516 0 : SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
517 0 : MVT VT = Op.getSimpleValueType();
518 0 : MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
519 : assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
520 : "Vectors have different number of elements!");
521 :
522 0 : unsigned NewOpc = Op->getOpcode();
523 : // Change FP_TO_UINT to FP_TO_SINT if possible.
524 : // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
525 0 : if (NewOpc == ISD::FP_TO_UINT &&
526 0 : TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
527 : NewOpc = ISD::FP_TO_SINT;
528 :
529 0 : SDLoc dl(Op);
530 0 : SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
531 :
532 : // Assert that the converted value fits in the original type. If it doesn't
533 : // (eg: because the value being converted is too big), then the result of the
534 : // original operation was undefined anyway, so the assert is still correct.
535 0 : Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
536 : : ISD::AssertSext,
537 : dl, NVT, Promoted,
538 0 : DAG.getValueType(VT.getScalarType()));
539 0 : return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
540 : }
541 :
542 0 : SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
543 : LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
544 :
545 0 : EVT SrcVT = LD->getMemoryVT();
546 0 : EVT SrcEltVT = SrcVT.getScalarType();
547 : unsigned NumElem = SrcVT.getVectorNumElements();
548 :
549 0 : SDValue NewChain;
550 0 : SDValue Value;
551 0 : if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
552 0 : SDLoc dl(Op);
553 :
554 : SmallVector<SDValue, 8> Vals;
555 : SmallVector<SDValue, 8> LoadChains;
556 :
557 0 : EVT DstEltVT = LD->getValueType(0).getScalarType();
558 0 : SDValue Chain = LD->getChain();
559 0 : SDValue BasePTR = LD->getBasePtr();
560 : ISD::LoadExtType ExtType = LD->getExtensionType();
561 :
562 : // When elements in a vector is not byte-addressable, we cannot directly
563 : // load each element by advancing pointer, which could only address bytes.
564 : // Instead, we load all significant words, mask bits off, and concatenate
565 : // them to form each element. Finally, they are extended to destination
566 : // scalar type to build the destination vector.
567 0 : EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
568 :
569 : assert(WideVT.isRound() &&
570 : "Could not handle the sophisticated case when the widest integer is"
571 : " not power of 2.");
572 : assert(WideVT.bitsGE(SrcEltVT) &&
573 : "Type is not legalized?");
574 :
575 : unsigned WideBytes = WideVT.getStoreSize();
576 : unsigned Offset = 0;
577 : unsigned RemainingBytes = SrcVT.getStoreSize();
578 : SmallVector<SDValue, 8> LoadVals;
579 0 : while (RemainingBytes > 0) {
580 : SDValue ScalarLoad;
581 : unsigned LoadBytes = WideBytes;
582 :
583 0 : if (RemainingBytes >= LoadBytes) {
584 0 : ScalarLoad =
585 0 : DAG.getLoad(WideVT, dl, Chain, BasePTR,
586 0 : LD->getPointerInfo().getWithOffset(Offset),
587 0 : MinAlign(LD->getAlignment(), Offset),
588 0 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
589 : } else {
590 0 : EVT LoadVT = WideVT;
591 0 : while (RemainingBytes < LoadBytes) {
592 0 : LoadBytes >>= 1; // Reduce the load size by half.
593 0 : LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
594 : }
595 0 : ScalarLoad =
596 0 : DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
597 0 : LD->getPointerInfo().getWithOffset(Offset), LoadVT,
598 0 : MinAlign(LD->getAlignment(), Offset),
599 0 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
600 : }
601 :
602 0 : RemainingBytes -= LoadBytes;
603 0 : Offset += LoadBytes;
604 :
605 0 : BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
606 :
607 0 : LoadVals.push_back(ScalarLoad.getValue(0));
608 0 : LoadChains.push_back(ScalarLoad.getValue(1));
609 : }
610 :
611 : // Extract bits, pack and extend/trunc them into destination type.
612 0 : unsigned SrcEltBits = SrcEltVT.getSizeInBits();
613 0 : SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
614 :
615 : unsigned BitOffset = 0;
616 : unsigned WideIdx = 0;
617 0 : unsigned WideBits = WideVT.getSizeInBits();
618 :
619 0 : for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
620 0 : SDValue Lo, Hi, ShAmt;
621 :
622 0 : if (BitOffset < WideBits) {
623 0 : ShAmt = DAG.getConstant(
624 0 : BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
625 0 : Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
626 0 : Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
627 : }
628 :
629 0 : BitOffset += SrcEltBits;
630 0 : if (BitOffset >= WideBits) {
631 0 : WideIdx++;
632 0 : BitOffset -= WideBits;
633 0 : if (BitOffset > 0) {
634 0 : ShAmt = DAG.getConstant(
635 0 : SrcEltBits - BitOffset, dl,
636 0 : TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
637 0 : Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
638 0 : Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
639 : }
640 : }
641 :
642 0 : if (Hi.getNode())
643 0 : Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
644 :
645 0 : switch (ExtType) {
646 0 : default: llvm_unreachable("Unknown extended-load op!");
647 0 : case ISD::EXTLOAD:
648 0 : Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
649 0 : break;
650 0 : case ISD::ZEXTLOAD:
651 0 : Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
652 0 : break;
653 0 : case ISD::SEXTLOAD:
654 0 : ShAmt =
655 0 : DAG.getConstant(WideBits - SrcEltBits, dl,
656 0 : TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
657 0 : Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
658 0 : Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
659 0 : Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
660 0 : break;
661 : }
662 0 : Vals.push_back(Lo);
663 : }
664 :
665 0 : NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
666 0 : Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
667 : } else {
668 0 : SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
669 : // Skip past MERGE_VALUE node if known.
670 0 : if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
671 0 : NewChain = Scalarized.getOperand(1);
672 0 : Value = Scalarized.getOperand(0);
673 : } else {
674 0 : NewChain = Scalarized.getValue(1);
675 0 : Value = Scalarized.getValue(0);
676 : }
677 : }
678 :
679 0 : AddLegalizedOperand(Op.getValue(0), Value);
680 0 : AddLegalizedOperand(Op.getValue(1), NewChain);
681 :
682 0 : return (Op.getResNo() ? NewChain : Value);
683 : }
684 :
685 0 : SDValue VectorLegalizer::ExpandStore(SDValue Op) {
686 : StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
687 0 : SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
688 0 : AddLegalizedOperand(Op, TF);
689 0 : return TF;
690 : }
691 :
692 9205 : SDValue VectorLegalizer::Expand(SDValue Op) {
693 18410 : switch (Op->getOpcode()) {
694 717 : case ISD::SIGN_EXTEND_INREG:
695 717 : return ExpandSEXTINREG(Op);
696 292 : case ISD::ANY_EXTEND_VECTOR_INREG:
697 292 : return ExpandANY_EXTEND_VECTOR_INREG(Op);
698 12 : case ISD::SIGN_EXTEND_VECTOR_INREG:
699 12 : return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
700 489 : case ISD::ZERO_EXTEND_VECTOR_INREG:
701 489 : return ExpandZERO_EXTEND_VECTOR_INREG(Op);
702 49 : case ISD::BSWAP:
703 49 : return ExpandBSWAP(Op);
704 1425 : case ISD::VSELECT:
705 1425 : return ExpandVSELECT(Op);
706 82 : case ISD::SELECT:
707 82 : return ExpandSELECT(Op);
708 155 : case ISD::UINT_TO_FP:
709 155 : return ExpandUINT_TO_FLOAT(Op);
710 40 : case ISD::FNEG:
711 40 : return ExpandFNEG(Op);
712 48 : case ISD::FSUB:
713 48 : return ExpandFSUB(Op);
714 93 : case ISD::SETCC:
715 93 : return UnrollVSETCC(Op);
716 132 : case ISD::BITREVERSE:
717 132 : return ExpandBITREVERSE(Op);
718 136 : case ISD::CTLZ:
719 : case ISD::CTLZ_ZERO_UNDEF:
720 136 : return ExpandCTLZ(Op);
721 248 : case ISD::CTTZ:
722 : case ISD::CTTZ_ZERO_UNDEF:
723 248 : return ExpandCTTZ(Op);
724 85 : case ISD::STRICT_FADD:
725 : case ISD::STRICT_FSUB:
726 : case ISD::STRICT_FMUL:
727 : case ISD::STRICT_FDIV:
728 : case ISD::STRICT_FREM:
729 : case ISD::STRICT_FSQRT:
730 : case ISD::STRICT_FMA:
731 : case ISD::STRICT_FPOW:
732 : case ISD::STRICT_FPOWI:
733 : case ISD::STRICT_FSIN:
734 : case ISD::STRICT_FCOS:
735 : case ISD::STRICT_FEXP:
736 : case ISD::STRICT_FEXP2:
737 : case ISD::STRICT_FLOG:
738 : case ISD::STRICT_FLOG10:
739 : case ISD::STRICT_FLOG2:
740 : case ISD::STRICT_FRINT:
741 : case ISD::STRICT_FNEARBYINT:
742 85 : return ExpandStrictFPOp(Op);
743 5202 : default:
744 5202 : return DAG.UnrollVectorOp(Op.getNode());
745 : }
746 : }
747 :
748 0 : SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
749 : // Lower a select instruction where the condition is a scalar and the
750 : // operands are vectors. Lower this select to VSELECT and implement it
751 : // using XOR AND OR. The selector bit is broadcasted.
752 0 : EVT VT = Op.getValueType();
753 0 : SDLoc DL(Op);
754 :
755 0 : SDValue Mask = Op.getOperand(0);
756 0 : SDValue Op1 = Op.getOperand(1);
757 0 : SDValue Op2 = Op.getOperand(2);
758 :
759 : assert(VT.isVector() && !Mask.getValueType().isVector()
760 : && Op1.getValueType() == Op2.getValueType() && "Invalid type");
761 :
762 : // If we can't even use the basic vector operations of
763 : // AND,OR,XOR, we will have to scalarize the op.
764 : // Notice that the operation may be 'promoted' which means that it is
765 : // 'bitcasted' to another type which is handled.
766 : // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
767 0 : if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
768 0 : TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
769 0 : TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
770 : TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
771 0 : return DAG.UnrollVectorOp(Op.getNode());
772 :
773 : // Generate a mask operand.
774 0 : EVT MaskTy = VT.changeVectorElementTypeToInteger();
775 :
776 : // What is the size of each element in the vector mask.
777 0 : EVT BitTy = MaskTy.getScalarType();
778 :
779 0 : Mask = DAG.getSelect(DL, BitTy, Mask,
780 0 : DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
781 : BitTy),
782 0 : DAG.getConstant(0, DL, BitTy));
783 :
784 : // Broadcast the mask so that the entire vector is all-one or all zero.
785 0 : Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
786 :
787 : // Bitcast the operands to be the same type as the mask.
788 : // This is needed when we select between FP types because
789 : // the mask is a vector of integers.
790 0 : Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
791 0 : Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
792 :
793 0 : SDValue AllOnes = DAG.getConstant(
794 0 : APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
795 0 : SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
796 :
797 0 : Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
798 0 : Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
799 0 : SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
800 0 : return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
801 : }
802 :
803 0 : SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
804 0 : EVT VT = Op.getValueType();
805 :
806 : // Make sure that the SRA and SHL instructions are available.
807 0 : if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
808 : TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
809 0 : return DAG.UnrollVectorOp(Op.getNode());
810 :
811 0 : SDLoc DL(Op);
812 0 : EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
813 :
814 : unsigned BW = VT.getScalarSizeInBits();
815 : unsigned OrigBW = OrigTy.getScalarSizeInBits();
816 0 : SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
817 :
818 0 : Op = Op.getOperand(0);
819 0 : Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
820 0 : return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
821 : }
822 :
823 : // Generically expand a vector anyext in register to a shuffle of the relevant
824 : // lanes into the appropriate locations, with other lanes left undef.
825 0 : SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
826 0 : SDLoc DL(Op);
827 0 : EVT VT = Op.getValueType();
828 0 : int NumElements = VT.getVectorNumElements();
829 0 : SDValue Src = Op.getOperand(0);
830 0 : EVT SrcVT = Src.getValueType();
831 0 : int NumSrcElements = SrcVT.getVectorNumElements();
832 :
833 : // Build a base mask of undef shuffles.
834 : SmallVector<int, 16> ShuffleMask;
835 0 : ShuffleMask.resize(NumSrcElements, -1);
836 :
837 : // Place the extended lanes into the correct locations.
838 0 : int ExtLaneScale = NumSrcElements / NumElements;
839 0 : int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
840 0 : for (int i = 0; i < NumElements; ++i)
841 0 : ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
842 :
843 0 : return DAG.getNode(
844 : ISD::BITCAST, DL, VT,
845 0 : DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
846 : }
847 :
848 0 : SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
849 0 : SDLoc DL(Op);
850 0 : EVT VT = Op.getValueType();
851 0 : SDValue Src = Op.getOperand(0);
852 0 : EVT SrcVT = Src.getValueType();
853 :
854 : // First build an any-extend node which can be legalized above when we
855 : // recurse through it.
856 0 : Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
857 :
858 : // Now we need sign extend. Do this by shifting the elements. Even if these
859 : // aren't legal operations, they have a better chance of being legalized
860 : // without full scalarization than the sign extension does.
861 : unsigned EltWidth = VT.getScalarSizeInBits();
862 : unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
863 0 : SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
864 0 : return DAG.getNode(ISD::SRA, DL, VT,
865 : DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
866 0 : ShiftAmount);
867 : }
868 :
869 : // Generically expand a vector zext in register to a shuffle of the relevant
870 : // lanes into the appropriate locations, a blend of zero into the high bits,
871 : // and a bitcast to the wider element type.
872 0 : SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
873 0 : SDLoc DL(Op);
874 0 : EVT VT = Op.getValueType();
875 0 : int NumElements = VT.getVectorNumElements();
876 0 : SDValue Src = Op.getOperand(0);
877 0 : EVT SrcVT = Src.getValueType();
878 0 : int NumSrcElements = SrcVT.getVectorNumElements();
879 :
880 : // Build up a zero vector to blend into this one.
881 0 : SDValue Zero = DAG.getConstant(0, DL, SrcVT);
882 :
883 : // Shuffle the incoming lanes into the correct position, and pull all other
884 : // lanes from the zero vector.
885 : SmallVector<int, 16> ShuffleMask;
886 0 : ShuffleMask.reserve(NumSrcElements);
887 0 : for (int i = 0; i < NumSrcElements; ++i)
888 0 : ShuffleMask.push_back(i);
889 :
890 0 : int ExtLaneScale = NumSrcElements / NumElements;
891 0 : int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
892 0 : for (int i = 0; i < NumElements; ++i)
893 0 : ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
894 :
895 0 : return DAG.getNode(ISD::BITCAST, DL, VT,
896 0 : DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
897 : }
898 :
899 141 : static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
900 141 : int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
901 983 : for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
902 3866 : for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
903 3024 : ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
904 141 : }
905 :
906 0 : SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
907 0 : EVT VT = Op.getValueType();
908 :
909 : // Generate a byte wise shuffle mask for the BSWAP.
910 : SmallVector<int, 16> ShuffleMask;
911 0 : createBSWAPShuffleMask(VT, ShuffleMask);
912 0 : EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
913 :
914 : // Only emit a shuffle if the mask is legal.
915 0 : if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
916 0 : return DAG.UnrollVectorOp(Op.getNode());
917 :
918 0 : SDLoc DL(Op);
919 0 : Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
920 0 : Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
921 0 : return DAG.getNode(ISD::BITCAST, DL, VT, Op);
922 : }
923 :
924 0 : SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
925 0 : EVT VT = Op.getValueType();
926 :
927 : // If we have the scalar operation, it's probably cheaper to unroll it.
928 0 : if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
929 0 : return DAG.UnrollVectorOp(Op.getNode());
930 :
931 : // If the vector element width is a whole number of bytes, test if its legal
932 : // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
933 : // vector. This greatly reduces the number of bit shifts necessary.
934 : unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
935 0 : if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
936 : SmallVector<int, 16> BSWAPMask;
937 0 : createBSWAPShuffleMask(VT, BSWAPMask);
938 :
939 0 : EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
940 0 : if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
941 0 : (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
942 0 : (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
943 0 : TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
944 0 : TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
945 0 : TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
946 0 : SDLoc DL(Op);
947 0 : Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
948 0 : Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
949 0 : BSWAPMask);
950 0 : Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
951 0 : return DAG.getNode(ISD::BITCAST, DL, VT, Op);
952 : }
953 : }
954 :
955 : // If we have the appropriate vector bit operations, it is better to use them
956 : // than unrolling and expanding each component.
957 0 : if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
958 0 : !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
959 0 : !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
960 0 : !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
961 0 : return DAG.UnrollVectorOp(Op.getNode());
962 :
963 : // Let LegalizeDAG handle this later.
964 0 : return Op;
965 : }
966 :
967 0 : SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
968 : // Implement VSELECT in terms of XOR, AND, OR
969 : // on platforms which do not support blend natively.
970 0 : SDLoc DL(Op);
971 :
972 0 : SDValue Mask = Op.getOperand(0);
973 0 : SDValue Op1 = Op.getOperand(1);
974 0 : SDValue Op2 = Op.getOperand(2);
975 :
976 0 : EVT VT = Mask.getValueType();
977 :
978 : // If we can't even use the basic vector operations of
979 : // AND,OR,XOR, we will have to scalarize the op.
980 : // Notice that the operation may be 'promoted' which means that it is
981 : // 'bitcasted' to another type which is handled.
982 : // This operation also isn't safe with AND, OR, XOR when the boolean
983 : // type is 0/1 as we need an all ones vector constant to mask with.
984 : // FIXME: Sign extend 1 to all ones if thats legal on the target.
985 0 : if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
986 0 : TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
987 0 : TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
988 0 : TLI.getBooleanContents(Op1.getValueType()) !=
989 : TargetLowering::ZeroOrNegativeOneBooleanContent)
990 0 : return DAG.UnrollVectorOp(Op.getNode());
991 :
992 : // If the mask and the type are different sizes, unroll the vector op. This
993 : // can occur when getSetCCResultType returns something that is different in
994 : // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
995 0 : if (VT.getSizeInBits() != Op1.getValueSizeInBits())
996 0 : return DAG.UnrollVectorOp(Op.getNode());
997 :
998 : // Bitcast the operands to be the same type as the mask.
999 : // This is needed when we select between FP types because
1000 : // the mask is a vector of integers.
1001 0 : Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1002 0 : Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1003 :
1004 0 : SDValue AllOnes = DAG.getConstant(
1005 0 : APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1006 0 : SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1007 :
1008 0 : Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1009 0 : Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1010 0 : SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1011 0 : return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
1012 : }
1013 :
1014 0 : SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
1015 0 : EVT VT = Op.getOperand(0).getValueType();
1016 0 : SDLoc DL(Op);
1017 :
1018 : // Make sure that the SINT_TO_FP and SRL instructions are available.
1019 0 : if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
1020 : TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
1021 0 : return DAG.UnrollVectorOp(Op.getNode());
1022 :
1023 : unsigned BW = VT.getScalarSizeInBits();
1024 : assert((BW == 64 || BW == 32) &&
1025 : "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1026 :
1027 0 : SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1028 :
1029 : // Constants to clear the upper part of the word.
1030 : // Notice that we can also use SHL+SHR, but using a constant is slightly
1031 : // faster on x86.
1032 0 : uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1033 0 : SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1034 :
1035 : // Two to the power of half-word-size.
1036 0 : SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
1037 :
1038 : // Clear upper part of LO, lower HI
1039 0 : SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1040 0 : SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1041 :
1042 : // Convert hi and lo to floats
1043 : // Convert the hi part back to the upper values
1044 : // TODO: Can any fast-math-flags be set on these nodes?
1045 0 : SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1046 0 : fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1047 0 : SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1048 :
1049 : // Add the two halves
1050 0 : return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1051 : }
1052 :
1053 0 : SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1054 0 : if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1055 0 : SDLoc DL(Op);
1056 0 : SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1057 : // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1058 0 : return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1059 0 : Zero, Op.getOperand(0));
1060 : }
1061 0 : return DAG.UnrollVectorOp(Op.getNode());
1062 : }
1063 :
1064 0 : SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
1065 : // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1066 : // we can defer this to operation legalization where it will be lowered as
1067 : // a+(-b).
1068 : EVT VT = Op.getValueType();
1069 0 : if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1070 0 : TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1071 0 : return Op; // Defer to LegalizeDAG
1072 :
1073 0 : return DAG.UnrollVectorOp(Op.getNode());
1074 : }
1075 :
1076 0 : SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1077 0 : EVT VT = Op.getValueType();
1078 : unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1079 :
1080 : // If the non-ZERO_UNDEF version is supported we can use that instead.
1081 0 : if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1082 0 : TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1083 0 : SDLoc DL(Op);
1084 0 : return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
1085 : }
1086 :
1087 : // If we have the appropriate vector bit operations, it is better to use them
1088 : // than unrolling and expanding each component.
1089 : if (isPowerOf2_32(NumBitsPerElt) &&
1090 0 : TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1091 0 : TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1092 0 : TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1093 0 : return Op;
1094 :
1095 : // Otherwise go ahead and unroll.
1096 0 : return DAG.UnrollVectorOp(Op.getNode());
1097 : }
1098 :
1099 0 : SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
1100 0 : EVT VT = Op.getValueType();
1101 : unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1102 :
1103 : // If the non-ZERO_UNDEF version is supported we can use that instead.
1104 0 : if (TLI.isOperationLegalOrCustom(ISD::CTTZ, VT)) {
1105 0 : SDLoc DL(Op);
1106 0 : return DAG.getNode(ISD::CTTZ, DL, VT, Op.getOperand(0));
1107 : }
1108 :
1109 : // If we have the appropriate vector bit operations, it is better to use them
1110 : // than unrolling and expanding each component.
1111 : if (isPowerOf2_32(NumBitsPerElt) &&
1112 0 : (TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) ||
1113 0 : TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) &&
1114 0 : TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
1115 0 : TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1116 0 : TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT))
1117 0 : return Op;
1118 :
1119 : // Otherwise go ahead and unroll.
1120 0 : return DAG.UnrollVectorOp(Op.getNode());
1121 : }
1122 :
1123 0 : SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
1124 0 : EVT VT = Op.getValueType();
1125 0 : EVT EltVT = VT.getVectorElementType();
1126 : unsigned NumElems = VT.getVectorNumElements();
1127 : unsigned NumOpers = Op.getNumOperands();
1128 0 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1129 0 : EVT ValueVTs[] = {EltVT, MVT::Other};
1130 0 : SDValue Chain = Op.getOperand(0);
1131 0 : SDLoc dl(Op);
1132 :
1133 : SmallVector<SDValue, 32> OpValues;
1134 : SmallVector<SDValue, 32> OpChains;
1135 0 : for (unsigned i = 0; i < NumElems; ++i) {
1136 : SmallVector<SDValue, 4> Opers;
1137 0 : SDValue Idx = DAG.getConstant(i, dl,
1138 0 : TLI.getVectorIdxTy(DAG.getDataLayout()));
1139 :
1140 : // The Chain is the first operand.
1141 0 : Opers.push_back(Chain);
1142 :
1143 : // Now process the remaining operands.
1144 0 : for (unsigned j = 1; j < NumOpers; ++j) {
1145 0 : SDValue Oper = Op.getOperand(j);
1146 0 : EVT OperVT = Oper.getValueType();
1147 :
1148 0 : if (OperVT.isVector())
1149 0 : Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1150 0 : EltVT, Oper, Idx);
1151 :
1152 0 : Opers.push_back(Oper);
1153 : }
1154 :
1155 0 : SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
1156 :
1157 0 : OpValues.push_back(ScalarOp.getValue(0));
1158 0 : OpChains.push_back(ScalarOp.getValue(1));
1159 : }
1160 :
1161 0 : SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1162 0 : SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1163 :
1164 0 : AddLegalizedOperand(Op.getValue(0), Result);
1165 0 : AddLegalizedOperand(Op.getValue(1), NewChain);
1166 :
1167 0 : return Op.getResNo() ? NewChain : Result;
1168 : }
1169 :
1170 0 : SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1171 0 : EVT VT = Op.getValueType();
1172 : unsigned NumElems = VT.getVectorNumElements();
1173 0 : EVT EltVT = VT.getVectorElementType();
1174 0 : SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1175 0 : EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1176 0 : SDLoc dl(Op);
1177 0 : SmallVector<SDValue, 8> Ops(NumElems);
1178 0 : for (unsigned i = 0; i < NumElems; ++i) {
1179 0 : SDValue LHSElem = DAG.getNode(
1180 : ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1181 0 : DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1182 0 : SDValue RHSElem = DAG.getNode(
1183 : ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1184 0 : DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1185 0 : Ops[i] = DAG.getNode(ISD::SETCC, dl,
1186 0 : TLI.getSetCCResultType(DAG.getDataLayout(),
1187 0 : *DAG.getContext(), TmpEltVT),
1188 0 : LHSElem, RHSElem, CC);
1189 0 : Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1190 0 : DAG.getConstant(APInt::getAllOnesValue
1191 0 : (EltVT.getSizeInBits()), dl, EltVT),
1192 0 : DAG.getConstant(0, dl, EltVT));
1193 : }
1194 0 : return DAG.getBuildVector(VT, dl, Ops);
1195 : }
1196 :
1197 1269116 : bool SelectionDAG::LegalizeVectors() {
1198 1269116 : return VectorLegalizer(*this).Run();
1199 : }
|