File: | lib/Target/AArch64/AArch64ISelDAGToDAG.cpp |
Location: | line 634, column 67 |
Description: | The result of the '<<' expression is undefined |
1 | //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file defines an instruction selector for the AArch64 target. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AArch64TargetMachine.h" | |||
15 | #include "MCTargetDesc/AArch64AddressingModes.h" | |||
16 | #include "llvm/ADT/APSInt.h" | |||
17 | #include "llvm/CodeGen/SelectionDAGISel.h" | |||
18 | #include "llvm/IR/Function.h" // To access function attributes. | |||
19 | #include "llvm/IR/GlobalValue.h" | |||
20 | #include "llvm/IR/Intrinsics.h" | |||
21 | #include "llvm/Support/Debug.h" | |||
22 | #include "llvm/Support/ErrorHandling.h" | |||
23 | #include "llvm/Support/MathExtras.h" | |||
24 | #include "llvm/Support/raw_ostream.h" | |||
25 | ||||
26 | using namespace llvm; | |||
27 | ||||
28 | #define DEBUG_TYPE"aarch64-isel" "aarch64-isel" | |||
29 | ||||
30 | //===--------------------------------------------------------------------===// | |||
31 | /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine | |||
32 | /// instructions for SelectionDAG operations. | |||
33 | /// | |||
34 | namespace { | |||
35 | ||||
36 | class AArch64DAGToDAGISel : public SelectionDAGISel { | |||
37 | AArch64TargetMachine &TM; | |||
38 | ||||
39 | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can | |||
40 | /// make the right decision when generating code for different targets. | |||
41 | const AArch64Subtarget *Subtarget; | |||
42 | ||||
43 | bool ForCodeSize; | |||
44 | ||||
45 | public: | |||
46 | explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, | |||
47 | CodeGenOpt::Level OptLevel) | |||
48 | : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), | |||
49 | ForCodeSize(false) {} | |||
50 | ||||
51 | const char *getPassName() const override { | |||
52 | return "AArch64 Instruction Selection"; | |||
53 | } | |||
54 | ||||
55 | bool runOnMachineFunction(MachineFunction &MF) override { | |||
56 | ForCodeSize = | |||
57 | MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || | |||
58 | MF.getFunction()->hasFnAttribute(Attribute::MinSize); | |||
59 | Subtarget = &MF.getSubtarget<AArch64Subtarget>(); | |||
60 | return SelectionDAGISel::runOnMachineFunction(MF); | |||
61 | } | |||
62 | ||||
63 | SDNode *Select(SDNode *Node) override; | |||
64 | ||||
65 | /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for | |||
66 | /// inline asm expressions. | |||
67 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | |||
68 | unsigned ConstraintID, | |||
69 | std::vector<SDValue> &OutOps) override; | |||
70 | ||||
71 | SDNode *SelectMLAV64LaneV128(SDNode *N); | |||
72 | SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); | |||
73 | bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); | |||
74 | bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); | |||
75 | bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); | |||
76 | bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { | |||
77 | return SelectShiftedRegister(N, false, Reg, Shift); | |||
78 | } | |||
79 | bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { | |||
80 | return SelectShiftedRegister(N, true, Reg, Shift); | |||
81 | } | |||
82 | bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
83 | return SelectAddrModeIndexed(N, 1, Base, OffImm); | |||
84 | } | |||
85 | bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
86 | return SelectAddrModeIndexed(N, 2, Base, OffImm); | |||
87 | } | |||
88 | bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
89 | return SelectAddrModeIndexed(N, 4, Base, OffImm); | |||
90 | } | |||
91 | bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
92 | return SelectAddrModeIndexed(N, 8, Base, OffImm); | |||
93 | } | |||
94 | bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
95 | return SelectAddrModeIndexed(N, 16, Base, OffImm); | |||
96 | } | |||
97 | bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
98 | return SelectAddrModeUnscaled(N, 1, Base, OffImm); | |||
99 | } | |||
100 | bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
101 | return SelectAddrModeUnscaled(N, 2, Base, OffImm); | |||
102 | } | |||
103 | bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
104 | return SelectAddrModeUnscaled(N, 4, Base, OffImm); | |||
105 | } | |||
106 | bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
107 | return SelectAddrModeUnscaled(N, 8, Base, OffImm); | |||
108 | } | |||
109 | bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
110 | return SelectAddrModeUnscaled(N, 16, Base, OffImm); | |||
111 | } | |||
112 | ||||
113 | template<int Width> | |||
114 | bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, | |||
115 | SDValue &SignExtend, SDValue &DoShift) { | |||
116 | return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); | |||
117 | } | |||
118 | ||||
119 | template<int Width> | |||
120 | bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, | |||
121 | SDValue &SignExtend, SDValue &DoShift) { | |||
122 | return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); | |||
123 | } | |||
124 | ||||
125 | ||||
126 | /// Form sequences of consecutive 64/128-bit registers for use in NEON | |||
127 | /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have | |||
128 | /// between 1 and 4 elements. If it contains a single element that is returned | |||
129 | /// unchanged; otherwise a REG_SEQUENCE value is returned. | |||
130 | SDValue createDTuple(ArrayRef<SDValue> Vecs); | |||
131 | SDValue createQTuple(ArrayRef<SDValue> Vecs); | |||
132 | ||||
133 | /// Generic helper for the createDTuple/createQTuple | |||
134 | /// functions. Those should almost always be called instead. | |||
135 | SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], | |||
136 | const unsigned SubRegs[]); | |||
137 | ||||
138 | SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); | |||
139 | ||||
140 | SDNode *SelectIndexedLoad(SDNode *N, bool &Done); | |||
141 | ||||
142 | SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, | |||
143 | unsigned SubRegIdx); | |||
144 | SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, | |||
145 | unsigned SubRegIdx); | |||
146 | SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
147 | SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
148 | ||||
149 | SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
150 | SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
151 | SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
152 | SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
153 | ||||
154 | SDNode *SelectBitfieldExtractOp(SDNode *N); | |||
155 | SDNode *SelectBitfieldInsertOp(SDNode *N); | |||
156 | ||||
157 | SDNode *SelectLIBM(SDNode *N); | |||
158 | ||||
159 | // Include the pieces autogenerated from the target description. | |||
160 | #include "AArch64GenDAGISel.inc" | |||
161 | ||||
162 | private: | |||
163 | bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, | |||
164 | SDValue &Shift); | |||
165 | bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, | |||
166 | SDValue &OffImm); | |||
167 | bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, | |||
168 | SDValue &OffImm); | |||
169 | bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, | |||
170 | SDValue &Offset, SDValue &SignExtend, | |||
171 | SDValue &DoShift); | |||
172 | bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, | |||
173 | SDValue &Offset, SDValue &SignExtend, | |||
174 | SDValue &DoShift); | |||
175 | bool isWorthFolding(SDValue V) const; | |||
176 | bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, | |||
177 | SDValue &Offset, SDValue &SignExtend); | |||
178 | ||||
179 | template<unsigned RegWidth> | |||
180 | bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { | |||
181 | return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); | |||
182 | } | |||
183 | ||||
184 | bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); | |||
185 | }; | |||
186 | } // end anonymous namespace | |||
187 | ||||
188 | /// isIntImmediate - This method tests to see if the node is a constant | |||
189 | /// operand. If so Imm will receive the 32-bit value. | |||
190 | static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { | |||
191 | if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { | |||
192 | Imm = C->getZExtValue(); | |||
193 | return true; | |||
194 | } | |||
195 | return false; | |||
196 | } | |||
197 | ||||
198 | // isIntImmediate - This method tests to see if a constant operand. | |||
199 | // If so Imm will receive the value. | |||
200 | static bool isIntImmediate(SDValue N, uint64_t &Imm) { | |||
201 | return isIntImmediate(N.getNode(), Imm); | |||
202 | } | |||
203 | ||||
204 | // isOpcWithIntImmediate - This method tests to see if the node is a specific | |||
205 | // opcode and that it has a immediate integer right operand. | |||
206 | // If so Imm will receive the 32 bit value. | |||
207 | static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, | |||
208 | uint64_t &Imm) { | |||
209 | return N->getOpcode() == Opc && | |||
210 | isIntImmediate(N->getOperand(1).getNode(), Imm); | |||
211 | } | |||
212 | ||||
213 | bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( | |||
214 | const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { | |||
215 | switch(ConstraintID) { | |||
216 | default: | |||
217 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 217); | |||
218 | case InlineAsm::Constraint_i: | |||
219 | case InlineAsm::Constraint_m: | |||
220 | case InlineAsm::Constraint_Q: | |||
221 | // Require the address to be in a register. That is safe for all AArch64 | |||
222 | // variants and it is hard to do anything much smarter without knowing | |||
223 | // how the operand is used. | |||
224 | OutOps.push_back(Op); | |||
225 | return false; | |||
226 | } | |||
227 | return true; | |||
228 | } | |||
229 | ||||
230 | /// SelectArithImmed - Select an immediate value that can be represented as | |||
231 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with | |||
232 | /// Val set to the 12-bit value and Shift set to the shifter operand. | |||
233 | bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, | |||
234 | SDValue &Shift) { | |||
235 | // This function is called from the addsub_shifted_imm ComplexPattern, | |||
236 | // which lists [imm] as the list of opcode it's interested in, however | |||
237 | // we still need to check whether the operand is actually an immediate | |||
238 | // here because the ComplexPattern opcode list is only used in | |||
239 | // root-level opcode matching. | |||
240 | if (!isa<ConstantSDNode>(N.getNode())) | |||
241 | return false; | |||
242 | ||||
243 | uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); | |||
244 | unsigned ShiftAmt; | |||
245 | ||||
246 | if (Immed >> 12 == 0) { | |||
247 | ShiftAmt = 0; | |||
248 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { | |||
249 | ShiftAmt = 12; | |||
250 | Immed = Immed >> 12; | |||
251 | } else | |||
252 | return false; | |||
253 | ||||
254 | unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); | |||
255 | Val = CurDAG->getTargetConstant(Immed, MVT::i32); | |||
256 | Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); | |||
257 | return true; | |||
258 | } | |||
259 | ||||
260 | /// SelectNegArithImmed - As above, but negates the value before trying to | |||
261 | /// select it. | |||
262 | bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, | |||
263 | SDValue &Shift) { | |||
264 | // This function is called from the addsub_shifted_imm ComplexPattern, | |||
265 | // which lists [imm] as the list of opcode it's interested in, however | |||
266 | // we still need to check whether the operand is actually an immediate | |||
267 | // here because the ComplexPattern opcode list is only used in | |||
268 | // root-level opcode matching. | |||
269 | if (!isa<ConstantSDNode>(N.getNode())) | |||
270 | return false; | |||
271 | ||||
272 | // The immediate operand must be a 24-bit zero-extended immediate. | |||
273 | uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); | |||
274 | ||||
275 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" | |||
276 | // have the opposite effect on the C flag, so this pattern mustn't match under | |||
277 | // those circumstances. | |||
278 | if (Immed == 0) | |||
279 | return false; | |||
280 | ||||
281 | if (N.getValueType() == MVT::i32) | |||
282 | Immed = ~((uint32_t)Immed) + 1; | |||
283 | else | |||
284 | Immed = ~Immed + 1ULL; | |||
285 | if (Immed & 0xFFFFFFFFFF000000ULL) | |||
286 | return false; | |||
287 | ||||
288 | Immed &= 0xFFFFFFULL; | |||
289 | return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); | |||
290 | } | |||
291 | ||||
292 | /// getShiftTypeForNode - Translate a shift node to the corresponding | |||
293 | /// ShiftType value. | |||
294 | static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { | |||
295 | switch (N.getOpcode()) { | |||
296 | default: | |||
297 | return AArch64_AM::InvalidShiftExtend; | |||
298 | case ISD::SHL: | |||
299 | return AArch64_AM::LSL; | |||
300 | case ISD::SRL: | |||
301 | return AArch64_AM::LSR; | |||
302 | case ISD::SRA: | |||
303 | return AArch64_AM::ASR; | |||
304 | case ISD::ROTR: | |||
305 | return AArch64_AM::ROR; | |||
306 | } | |||
307 | } | |||
308 | ||||
309 | /// \brief Determine whether it is worth to fold V into an extended register. | |||
310 | bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { | |||
311 | // it hurts if the value is used at least twice, unless we are optimizing | |||
312 | // for code size. | |||
313 | if (ForCodeSize || V.hasOneUse()) | |||
314 | return true; | |||
315 | return false; | |||
316 | } | |||
317 | ||||
318 | /// SelectShiftedRegister - Select a "shifted register" operand. If the value | |||
319 | /// is not shifted, set the Shift operand to default of "LSL 0". The logical | |||
320 | /// instructions allow the shifted register to be rotated, but the arithmetic | |||
321 | /// instructions do not. The AllowROR parameter specifies whether ROR is | |||
322 | /// supported. | |||
323 | bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, | |||
324 | SDValue &Reg, SDValue &Shift) { | |||
325 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); | |||
326 | if (ShType == AArch64_AM::InvalidShiftExtend) | |||
327 | return false; | |||
328 | if (!AllowROR && ShType == AArch64_AM::ROR) | |||
329 | return false; | |||
330 | ||||
331 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
332 | unsigned BitSize = N.getValueType().getSizeInBits(); | |||
333 | unsigned Val = RHS->getZExtValue() & (BitSize - 1); | |||
334 | unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); | |||
335 | ||||
336 | Reg = N.getOperand(0); | |||
337 | Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); | |||
338 | return isWorthFolding(N); | |||
339 | } | |||
340 | ||||
341 | return false; | |||
342 | } | |||
343 | ||||
344 | /// getExtendTypeForNode - Translate an extend node to the corresponding | |||
345 | /// ExtendType value. | |||
346 | static AArch64_AM::ShiftExtendType | |||
347 | getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { | |||
348 | if (N.getOpcode() == ISD::SIGN_EXTEND || | |||
349 | N.getOpcode() == ISD::SIGN_EXTEND_INREG) { | |||
350 | EVT SrcVT; | |||
351 | if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) | |||
352 | SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); | |||
353 | else | |||
354 | SrcVT = N.getOperand(0).getValueType(); | |||
355 | ||||
356 | if (!IsLoadStore && SrcVT == MVT::i8) | |||
357 | return AArch64_AM::SXTB; | |||
358 | else if (!IsLoadStore && SrcVT == MVT::i16) | |||
359 | return AArch64_AM::SXTH; | |||
360 | else if (SrcVT == MVT::i32) | |||
361 | return AArch64_AM::SXTW; | |||
362 | assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast <void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 362, __PRETTY_FUNCTION__)); | |||
363 | ||||
364 | return AArch64_AM::InvalidShiftExtend; | |||
365 | } else if (N.getOpcode() == ISD::ZERO_EXTEND || | |||
366 | N.getOpcode() == ISD::ANY_EXTEND) { | |||
367 | EVT SrcVT = N.getOperand(0).getValueType(); | |||
368 | if (!IsLoadStore && SrcVT == MVT::i8) | |||
369 | return AArch64_AM::UXTB; | |||
370 | else if (!IsLoadStore && SrcVT == MVT::i16) | |||
371 | return AArch64_AM::UXTH; | |||
372 | else if (SrcVT == MVT::i32) | |||
373 | return AArch64_AM::UXTW; | |||
374 | assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast <void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 374, __PRETTY_FUNCTION__)); | |||
375 | ||||
376 | return AArch64_AM::InvalidShiftExtend; | |||
377 | } else if (N.getOpcode() == ISD::AND) { | |||
378 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
379 | if (!CSD) | |||
380 | return AArch64_AM::InvalidShiftExtend; | |||
381 | uint64_t AndMask = CSD->getZExtValue(); | |||
382 | ||||
383 | switch (AndMask) { | |||
384 | default: | |||
385 | return AArch64_AM::InvalidShiftExtend; | |||
386 | case 0xFF: | |||
387 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; | |||
388 | case 0xFFFF: | |||
389 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; | |||
390 | case 0xFFFFFFFF: | |||
391 | return AArch64_AM::UXTW; | |||
392 | } | |||
393 | } | |||
394 | ||||
395 | return AArch64_AM::InvalidShiftExtend; | |||
396 | } | |||
397 | ||||
398 | // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. | |||
399 | static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { | |||
400 | if (DL->getOpcode() != AArch64ISD::DUPLANE16 && | |||
401 | DL->getOpcode() != AArch64ISD::DUPLANE32) | |||
402 | return false; | |||
403 | ||||
404 | SDValue SV = DL->getOperand(0); | |||
405 | if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) | |||
406 | return false; | |||
407 | ||||
408 | SDValue EV = SV.getOperand(1); | |||
409 | if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) | |||
410 | return false; | |||
411 | ||||
412 | ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); | |||
413 | ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); | |||
414 | LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); | |||
415 | LaneOp = EV.getOperand(0); | |||
416 | ||||
417 | return true; | |||
418 | } | |||
419 | ||||
420 | // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a | |||
421 | // high lane extract. | |||
422 | static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, | |||
423 | SDValue &LaneOp, int &LaneIdx) { | |||
424 | ||||
425 | if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { | |||
426 | std::swap(Op0, Op1); | |||
427 | if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) | |||
428 | return false; | |||
429 | } | |||
430 | StdOp = Op1; | |||
431 | return true; | |||
432 | } | |||
433 | ||||
434 | /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand | |||
435 | /// is a lane in the upper half of a 128-bit vector. Recognize and select this | |||
436 | /// so that we don't emit unnecessary lane extracts. | |||
437 | SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { | |||
438 | SDValue Op0 = N->getOperand(0); | |||
439 | SDValue Op1 = N->getOperand(1); | |||
440 | SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. | |||
441 | SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. | |||
442 | int LaneIdx = -1; // Will hold the lane index. | |||
443 | ||||
444 | if (Op1.getOpcode() != ISD::MUL || | |||
445 | !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, | |||
446 | LaneIdx)) { | |||
447 | std::swap(Op0, Op1); | |||
448 | if (Op1.getOpcode() != ISD::MUL || | |||
449 | !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, | |||
450 | LaneIdx)) | |||
451 | return nullptr; | |||
452 | } | |||
453 | ||||
454 | SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); | |||
455 | ||||
456 | SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; | |||
457 | ||||
458 | unsigned MLAOpc = ~0U; | |||
459 | ||||
460 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
461 | default: | |||
462 | llvm_unreachable("Unrecognized MLA.")::llvm::llvm_unreachable_internal("Unrecognized MLA.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 462); | |||
463 | case MVT::v4i16: | |||
464 | MLAOpc = AArch64::MLAv4i16_indexed; | |||
465 | break; | |||
466 | case MVT::v8i16: | |||
467 | MLAOpc = AArch64::MLAv8i16_indexed; | |||
468 | break; | |||
469 | case MVT::v2i32: | |||
470 | MLAOpc = AArch64::MLAv2i32_indexed; | |||
471 | break; | |||
472 | case MVT::v4i32: | |||
473 | MLAOpc = AArch64::MLAv4i32_indexed; | |||
474 | break; | |||
475 | } | |||
476 | ||||
477 | return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); | |||
478 | } | |||
479 | ||||
480 | SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { | |||
481 | SDValue SMULLOp0; | |||
482 | SDValue SMULLOp1; | |||
483 | int LaneIdx; | |||
484 | ||||
485 | if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, | |||
486 | LaneIdx)) | |||
487 | return nullptr; | |||
488 | ||||
489 | SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); | |||
490 | ||||
491 | SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; | |||
492 | ||||
493 | unsigned SMULLOpc = ~0U; | |||
494 | ||||
495 | if (IntNo == Intrinsic::aarch64_neon_smull) { | |||
496 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
497 | default: | |||
498 | llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 498); | |||
499 | case MVT::v4i32: | |||
500 | SMULLOpc = AArch64::SMULLv4i16_indexed; | |||
501 | break; | |||
502 | case MVT::v2i64: | |||
503 | SMULLOpc = AArch64::SMULLv2i32_indexed; | |||
504 | break; | |||
505 | } | |||
506 | } else if (IntNo == Intrinsic::aarch64_neon_umull) { | |||
507 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
508 | default: | |||
509 | llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 509); | |||
510 | case MVT::v4i32: | |||
511 | SMULLOpc = AArch64::UMULLv4i16_indexed; | |||
512 | break; | |||
513 | case MVT::v2i64: | |||
514 | SMULLOpc = AArch64::UMULLv2i32_indexed; | |||
515 | break; | |||
516 | } | |||
517 | } else | |||
518 | llvm_unreachable("Unrecognized intrinsic.")::llvm::llvm_unreachable_internal("Unrecognized intrinsic.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 518); | |||
519 | ||||
520 | return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); | |||
521 | } | |||
522 | ||||
523 | /// Instructions that accept extend modifiers like UXTW expect the register | |||
524 | /// being extended to be a GPR32, but the incoming DAG might be acting on a | |||
525 | /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if | |||
526 | /// this is the case. | |||
527 | static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { | |||
528 | if (N.getValueType() == MVT::i32) | |||
529 | return N; | |||
530 | ||||
531 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); | |||
532 | MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | |||
533 | SDLoc(N), MVT::i32, N, SubReg); | |||
534 | return SDValue(Node, 0); | |||
535 | } | |||
536 | ||||
537 | ||||
538 | /// SelectArithExtendedRegister - Select a "extended register" operand. This | |||
539 | /// operand folds in an extend followed by an optional left shift. | |||
540 | bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, | |||
541 | SDValue &Shift) { | |||
542 | unsigned ShiftVal = 0; | |||
543 | AArch64_AM::ShiftExtendType Ext; | |||
544 | ||||
545 | if (N.getOpcode() == ISD::SHL) { | |||
546 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
547 | if (!CSD) | |||
548 | return false; | |||
549 | ShiftVal = CSD->getZExtValue(); | |||
550 | if (ShiftVal > 4) | |||
551 | return false; | |||
552 | ||||
553 | Ext = getExtendTypeForNode(N.getOperand(0)); | |||
554 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
555 | return false; | |||
556 | ||||
557 | Reg = N.getOperand(0).getOperand(0); | |||
558 | } else { | |||
559 | Ext = getExtendTypeForNode(N); | |||
560 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
561 | return false; | |||
562 | ||||
563 | Reg = N.getOperand(0); | |||
564 | } | |||
565 | ||||
566 | // AArch64 mandates that the RHS of the operation must use the smallest | |||
567 | // register classs that could contain the size being extended from. Thus, | |||
568 | // if we're folding a (sext i8), we need the RHS to be a GPR32, even though | |||
569 | // there might not be an actual 32-bit value in the program. We can | |||
570 | // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. | |||
571 | assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)((Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX) ? static_cast<void> (0) : __assert_fail ("Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 571, __PRETTY_FUNCTION__)); | |||
572 | Reg = narrowIfNeeded(CurDAG, Reg); | |||
573 | Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); | |||
574 | return isWorthFolding(N); | |||
575 | } | |||
576 | ||||
577 | /// If there's a use of this ADDlow that's not itself a load/store then we'll | |||
578 | /// need to create a real ADD instruction from it anyway and there's no point in | |||
579 | /// folding it into the mem op. Theoretically, it shouldn't matter, but there's | |||
580 | /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding | |||
581 | /// leads to duplaicated ADRP instructions. | |||
582 | static bool isWorthFoldingADDlow(SDValue N) { | |||
583 | for (auto Use : N->uses()) { | |||
584 | if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && | |||
585 | Use->getOpcode() != ISD::ATOMIC_LOAD && | |||
586 | Use->getOpcode() != ISD::ATOMIC_STORE) | |||
587 | return false; | |||
588 | ||||
589 | // ldar and stlr have much more restrictive addressing modes (just a | |||
590 | // register). | |||
591 | if (cast<MemSDNode>(Use)->getOrdering() > Monotonic) | |||
592 | return false; | |||
593 | } | |||
594 | ||||
595 | return true; | |||
596 | } | |||
597 | ||||
598 | /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit | |||
599 | /// immediate" address. The "Size" argument is the size in bytes of the memory | |||
600 | /// reference, which determines the scale. | |||
601 | bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, | |||
602 | SDValue &Base, SDValue &OffImm) { | |||
603 | const TargetLowering *TLI = getTargetLowering(); | |||
604 | if (N.getOpcode() == ISD::FrameIndex) { | |||
| ||||
605 | int FI = cast<FrameIndexSDNode>(N)->getIndex(); | |||
606 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); | |||
607 | OffImm = CurDAG->getTargetConstant(0, MVT::i64); | |||
608 | return true; | |||
609 | } | |||
610 | ||||
611 | if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { | |||
612 | GlobalAddressSDNode *GAN = | |||
613 | dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); | |||
614 | Base = N.getOperand(0); | |||
615 | OffImm = N.getOperand(1); | |||
616 | if (!GAN) | |||
617 | return true; | |||
618 | ||||
619 | const GlobalValue *GV = GAN->getGlobal(); | |||
620 | unsigned Alignment = GV->getAlignment(); | |||
621 | const DataLayout *DL = TLI->getDataLayout(); | |||
622 | Type *Ty = GV->getType()->getElementType(); | |||
623 | if (Alignment == 0 && Ty->isSized()) | |||
624 | Alignment = DL->getABITypeAlignment(Ty); | |||
625 | ||||
626 | if (Alignment >= Size) | |||
627 | return true; | |||
628 | } | |||
629 | ||||
630 | if (CurDAG->isBaseWithConstantOffset(N)) { | |||
631 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
632 | int64_t RHSC = (int64_t)RHS->getZExtValue(); | |||
633 | unsigned Scale = Log2_32(Size); | |||
634 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { | |||
| ||||
635 | Base = N.getOperand(0); | |||
636 | if (Base.getOpcode() == ISD::FrameIndex) { | |||
637 | int FI = cast<FrameIndexSDNode>(Base)->getIndex(); | |||
638 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); | |||
639 | } | |||
640 | OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); | |||
641 | return true; | |||
642 | } | |||
643 | } | |||
644 | } | |||
645 | ||||
646 | // Before falling back to our general case, check if the unscaled | |||
647 | // instructions can handle this. If so, that's preferable. | |||
648 | if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) | |||
649 | return false; | |||
650 | ||||
651 | // Base only. The address will be materialized into a register before | |||
652 | // the memory is accessed. | |||
653 | // add x0, Xbase, #offset | |||
654 | // ldr x0, [x0] | |||
655 | Base = N; | |||
656 | OffImm = CurDAG->getTargetConstant(0, MVT::i64); | |||
657 | return true; | |||
658 | } | |||
659 | ||||
660 | /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit | |||
661 | /// immediate" address. This should only match when there is an offset that | |||
662 | /// is not valid for a scaled immediate addressing mode. The "Size" argument | |||
663 | /// is the size in bytes of the memory reference, which is needed here to know | |||
664 | /// what is valid for a scaled immediate. | |||
665 | bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, | |||
666 | SDValue &Base, | |||
667 | SDValue &OffImm) { | |||
668 | if (!CurDAG->isBaseWithConstantOffset(N)) | |||
669 | return false; | |||
670 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
671 | int64_t RHSC = RHS->getSExtValue(); | |||
672 | // If the offset is valid as a scaled immediate, don't match here. | |||
673 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && | |||
674 | RHSC < (0x1000 << Log2_32(Size))) | |||
675 | return false; | |||
676 | if (RHSC >= -256 && RHSC < 256) { | |||
677 | Base = N.getOperand(0); | |||
678 | if (Base.getOpcode() == ISD::FrameIndex) { | |||
679 | int FI = cast<FrameIndexSDNode>(Base)->getIndex(); | |||
680 | const TargetLowering *TLI = getTargetLowering(); | |||
681 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); | |||
682 | } | |||
683 | OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); | |||
684 | return true; | |||
685 | } | |||
686 | } | |||
687 | return false; | |||
688 | } | |||
689 | ||||
690 | static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { | |||
691 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); | |||
692 | SDValue ImpDef = SDValue( | |||
693 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), | |||
694 | 0); | |||
695 | MachineSDNode *Node = CurDAG->getMachineNode( | |||
696 | TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); | |||
697 | return SDValue(Node, 0); | |||
698 | } | |||
699 | ||||
700 | /// \brief Check if the given SHL node (\p N), can be used to form an | |||
701 | /// extended register for an addressing mode. | |||
702 | bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, | |||
703 | bool WantExtend, SDValue &Offset, | |||
704 | SDValue &SignExtend) { | |||
705 | assert(N.getOpcode() == ISD::SHL && "Invalid opcode.")((N.getOpcode() == ISD::SHL && "Invalid opcode.") ? static_cast <void> (0) : __assert_fail ("N.getOpcode() == ISD::SHL && \"Invalid opcode.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 705, __PRETTY_FUNCTION__)); | |||
706 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
707 | if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) | |||
708 | return false; | |||
709 | ||||
710 | if (WantExtend) { | |||
711 | AArch64_AM::ShiftExtendType Ext = | |||
712 | getExtendTypeForNode(N.getOperand(0), true); | |||
713 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
714 | return false; | |||
715 | ||||
716 | Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); | |||
717 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); | |||
718 | } else { | |||
719 | Offset = N.getOperand(0); | |||
720 | SignExtend = CurDAG->getTargetConstant(0, MVT::i32); | |||
721 | } | |||
722 | ||||
723 | unsigned LegalShiftVal = Log2_32(Size); | |||
724 | unsigned ShiftVal = CSD->getZExtValue(); | |||
725 | ||||
726 | if (ShiftVal != 0 && ShiftVal != LegalShiftVal) | |||
727 | return false; | |||
728 | ||||
729 | if (isWorthFolding(N)) | |||
730 | return true; | |||
731 | ||||
732 | return false; | |||
733 | } | |||
734 | ||||
735 | bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, | |||
736 | SDValue &Base, SDValue &Offset, | |||
737 | SDValue &SignExtend, | |||
738 | SDValue &DoShift) { | |||
739 | if (N.getOpcode() != ISD::ADD) | |||
740 | return false; | |||
741 | SDValue LHS = N.getOperand(0); | |||
742 | SDValue RHS = N.getOperand(1); | |||
743 | ||||
744 | // We don't want to match immediate adds here, because they are better lowered | |||
745 | // to the register-immediate addressing modes. | |||
746 | if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) | |||
747 | return false; | |||
748 | ||||
749 | // Check if this particular node is reused in any non-memory related | |||
750 | // operation. If yes, do not try to fold this node into the address | |||
751 | // computation, since the computation will be kept. | |||
752 | const SDNode *Node = N.getNode(); | |||
753 | for (SDNode *UI : Node->uses()) { | |||
754 | if (!isa<MemSDNode>(*UI)) | |||
755 | return false; | |||
756 | } | |||
757 | ||||
758 | // Remember if it is worth folding N when it produces extended register. | |||
759 | bool IsExtendedRegisterWorthFolding = isWorthFolding(N); | |||
760 | ||||
761 | // Try to match a shifted extend on the RHS. | |||
762 | if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && | |||
763 | SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { | |||
764 | Base = LHS; | |||
765 | DoShift = CurDAG->getTargetConstant(true, MVT::i32); | |||
766 | return true; | |||
767 | } | |||
768 | ||||
769 | // Try to match a shifted extend on the LHS. | |||
770 | if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && | |||
771 | SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { | |||
772 | Base = RHS; | |||
773 | DoShift = CurDAG->getTargetConstant(true, MVT::i32); | |||
774 | return true; | |||
775 | } | |||
776 | ||||
777 | // There was no shift, whatever else we find. | |||
778 | DoShift = CurDAG->getTargetConstant(false, MVT::i32); | |||
779 | ||||
780 | AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; | |||
781 | // Try to match an unshifted extend on the LHS. | |||
782 | if (IsExtendedRegisterWorthFolding && | |||
783 | (Ext = getExtendTypeForNode(LHS, true)) != | |||
784 | AArch64_AM::InvalidShiftExtend) { | |||
785 | Base = RHS; | |||
786 | Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); | |||
787 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); | |||
788 | if (isWorthFolding(LHS)) | |||
789 | return true; | |||
790 | } | |||
791 | ||||
792 | // Try to match an unshifted extend on the RHS. | |||
793 | if (IsExtendedRegisterWorthFolding && | |||
794 | (Ext = getExtendTypeForNode(RHS, true)) != | |||
795 | AArch64_AM::InvalidShiftExtend) { | |||
796 | Base = LHS; | |||
797 | Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); | |||
798 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); | |||
799 | if (isWorthFolding(RHS)) | |||
800 | return true; | |||
801 | } | |||
802 | ||||
803 | return false; | |||
804 | } | |||
805 | ||||
806 | // Check if the given immediate is preferred by ADD. If an immediate can be | |||
807 | // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be | |||
808 | // encoded by one MOVZ, return true. | |||
809 | static bool isPreferredADD(int64_t ImmOff) { | |||
810 | // Constant in [0x0, 0xfff] can be encoded in ADD. | |||
811 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) | |||
812 | return true; | |||
813 | // Check if it can be encoded in an "ADD LSL #12". | |||
814 | if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) | |||
815 | // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. | |||
816 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && | |||
817 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; | |||
818 | return false; | |||
819 | } | |||
820 | ||||
821 | bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, | |||
822 | SDValue &Base, SDValue &Offset, | |||
823 | SDValue &SignExtend, | |||
824 | SDValue &DoShift) { | |||
825 | if (N.getOpcode() != ISD::ADD) | |||
826 | return false; | |||
827 | SDValue LHS = N.getOperand(0); | |||
828 | SDValue RHS = N.getOperand(1); | |||
829 | ||||
830 | // Check if this particular node is reused in any non-memory related | |||
831 | // operation. If yes, do not try to fold this node into the address | |||
832 | // computation, since the computation will be kept. | |||
833 | const SDNode *Node = N.getNode(); | |||
834 | for (SDNode *UI : Node->uses()) { | |||
835 | if (!isa<MemSDNode>(*UI)) | |||
836 | return false; | |||
837 | } | |||
838 | ||||
839 | // Watch out if RHS is a wide immediate, it can not be selected into | |||
840 | // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into | |||
841 | // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate | |||
842 | // instructions like: | |||
843 | // MOV X0, WideImmediate | |||
844 | // ADD X1, BaseReg, X0 | |||
845 | // LDR X2, [X1, 0] | |||
846 | // For such situation, using [BaseReg, XReg] addressing mode can save one | |||
847 | // ADD/SUB: | |||
848 | // MOV X0, WideImmediate | |||
849 | // LDR X2, [BaseReg, X0] | |||
850 | if (isa<ConstantSDNode>(RHS)) { | |||
851 | int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); | |||
852 | unsigned Scale = Log2_32(Size); | |||
853 | // Skip the immediate can be seleced by load/store addressing mode. | |||
854 | // Also skip the immediate can be encoded by a single ADD (SUB is also | |||
855 | // checked by using -ImmOff). | |||
856 | if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || | |||
857 | isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) | |||
858 | return false; | |||
859 | ||||
860 | SDLoc DL(N.getNode()); | |||
861 | SDValue Ops[] = { RHS }; | |||
862 | SDNode *MOVI = | |||
863 | CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); | |||
864 | SDValue MOVIV = SDValue(MOVI, 0); | |||
865 | // This ADD of two X register will be selected into [Reg+Reg] mode. | |||
866 | N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); | |||
867 | } | |||
868 | ||||
869 | // Remember if it is worth folding N when it produces extended register. | |||
870 | bool IsExtendedRegisterWorthFolding = isWorthFolding(N); | |||
871 | ||||
872 | // Try to match a shifted extend on the RHS. | |||
873 | if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && | |||
874 | SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { | |||
875 | Base = LHS; | |||
876 | DoShift = CurDAG->getTargetConstant(true, MVT::i32); | |||
877 | return true; | |||
878 | } | |||
879 | ||||
880 | // Try to match a shifted extend on the LHS. | |||
881 | if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && | |||
882 | SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { | |||
883 | Base = RHS; | |||
884 | DoShift = CurDAG->getTargetConstant(true, MVT::i32); | |||
885 | return true; | |||
886 | } | |||
887 | ||||
888 | // Match any non-shifted, non-extend, non-immediate add expression. | |||
889 | Base = LHS; | |||
890 | Offset = RHS; | |||
891 | SignExtend = CurDAG->getTargetConstant(false, MVT::i32); | |||
892 | DoShift = CurDAG->getTargetConstant(false, MVT::i32); | |||
893 | // Reg1 + Reg2 is free: no check needed. | |||
894 | return true; | |||
895 | } | |||
896 | ||||
897 | SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { | |||
898 | static const unsigned RegClassIDs[] = { | |||
899 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; | |||
900 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, | |||
901 | AArch64::dsub2, AArch64::dsub3}; | |||
902 | ||||
903 | return createTuple(Regs, RegClassIDs, SubRegs); | |||
904 | } | |||
905 | ||||
906 | SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { | |||
907 | static const unsigned RegClassIDs[] = { | |||
908 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; | |||
909 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, | |||
910 | AArch64::qsub2, AArch64::qsub3}; | |||
911 | ||||
912 | return createTuple(Regs, RegClassIDs, SubRegs); | |||
913 | } | |||
914 | ||||
915 | SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, | |||
916 | const unsigned RegClassIDs[], | |||
917 | const unsigned SubRegs[]) { | |||
918 | // There's no special register-class for a vector-list of 1 element: it's just | |||
919 | // a vector. | |||
920 | if (Regs.size() == 1) | |||
921 | return Regs[0]; | |||
922 | ||||
923 | assert(Regs.size() >= 2 && Regs.size() <= 4)((Regs.size() >= 2 && Regs.size() <= 4) ? static_cast <void> (0) : __assert_fail ("Regs.size() >= 2 && Regs.size() <= 4" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 923, __PRETTY_FUNCTION__)); | |||
924 | ||||
925 | SDLoc DL(Regs[0].getNode()); | |||
926 | ||||
927 | SmallVector<SDValue, 4> Ops; | |||
928 | ||||
929 | // First operand of REG_SEQUENCE is the desired RegClass. | |||
930 | Ops.push_back( | |||
931 | CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); | |||
932 | ||||
933 | // Then we get pairs of source & subregister-position for the components. | |||
934 | for (unsigned i = 0; i < Regs.size(); ++i) { | |||
935 | Ops.push_back(Regs[i]); | |||
936 | Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); | |||
937 | } | |||
938 | ||||
939 | SDNode *N = | |||
940 | CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); | |||
941 | return SDValue(N, 0); | |||
942 | } | |||
943 | ||||
944 | SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, | |||
945 | unsigned Opc, bool isExt) { | |||
946 | SDLoc dl(N); | |||
947 | EVT VT = N->getValueType(0); | |||
948 | ||||
949 | unsigned ExtOff = isExt; | |||
950 | ||||
951 | // Form a REG_SEQUENCE to force register allocation. | |||
952 | unsigned Vec0Off = ExtOff + 1; | |||
953 | SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, | |||
954 | N->op_begin() + Vec0Off + NumVecs); | |||
955 | SDValue RegSeq = createQTuple(Regs); | |||
956 | ||||
957 | SmallVector<SDValue, 6> Ops; | |||
958 | if (isExt) | |||
959 | Ops.push_back(N->getOperand(1)); | |||
960 | Ops.push_back(RegSeq); | |||
961 | Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); | |||
962 | return CurDAG->getMachineNode(Opc, dl, VT, Ops); | |||
963 | } | |||
964 | ||||
965 | SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { | |||
966 | LoadSDNode *LD = cast<LoadSDNode>(N); | |||
967 | if (LD->isUnindexed()) | |||
968 | return nullptr; | |||
969 | EVT VT = LD->getMemoryVT(); | |||
970 | EVT DstVT = N->getValueType(0); | |||
971 | ISD::MemIndexedMode AM = LD->getAddressingMode(); | |||
972 | bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; | |||
973 | ||||
974 | // We're not doing validity checking here. That was done when checking | |||
975 | // if we should mark the load as indexed or not. We're just selecting | |||
976 | // the right instruction. | |||
977 | unsigned Opcode = 0; | |||
978 | ||||
979 | ISD::LoadExtType ExtType = LD->getExtensionType(); | |||
980 | bool InsertTo64 = false; | |||
981 | if (VT == MVT::i64) | |||
982 | Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; | |||
983 | else if (VT == MVT::i32) { | |||
984 | if (ExtType == ISD::NON_EXTLOAD) | |||
985 | Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; | |||
986 | else if (ExtType == ISD::SEXTLOAD) | |||
987 | Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; | |||
988 | else { | |||
989 | Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; | |||
990 | InsertTo64 = true; | |||
991 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
992 | // it into an i64. | |||
993 | DstVT = MVT::i32; | |||
994 | } | |||
995 | } else if (VT == MVT::i16) { | |||
996 | if (ExtType == ISD::SEXTLOAD) { | |||
997 | if (DstVT == MVT::i64) | |||
998 | Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; | |||
999 | else | |||
1000 | Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; | |||
1001 | } else { | |||
1002 | Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; | |||
1003 | InsertTo64 = DstVT == MVT::i64; | |||
1004 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
1005 | // it into an i64. | |||
1006 | DstVT = MVT::i32; | |||
1007 | } | |||
1008 | } else if (VT == MVT::i8) { | |||
1009 | if (ExtType == ISD::SEXTLOAD) { | |||
1010 | if (DstVT == MVT::i64) | |||
1011 | Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; | |||
1012 | else | |||
1013 | Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; | |||
1014 | } else { | |||
1015 | Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; | |||
1016 | InsertTo64 = DstVT == MVT::i64; | |||
1017 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
1018 | // it into an i64. | |||
1019 | DstVT = MVT::i32; | |||
1020 | } | |||
1021 | } else if (VT == MVT::f32) { | |||
1022 | Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; | |||
1023 | } else if (VT == MVT::f64 || VT.is64BitVector()) { | |||
1024 | Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; | |||
1025 | } else if (VT.is128BitVector()) { | |||
1026 | Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; | |||
1027 | } else | |||
1028 | return nullptr; | |||
1029 | SDValue Chain = LD->getChain(); | |||
1030 | SDValue Base = LD->getBasePtr(); | |||
1031 | ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); | |||
1032 | int OffsetVal = (int)OffsetOp->getZExtValue(); | |||
1033 | SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); | |||
1034 | SDValue Ops[] = { Base, Offset, Chain }; | |||
1035 | SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, | |||
1036 | MVT::Other, Ops); | |||
1037 | // Either way, we're replacing the node, so tell the caller that. | |||
1038 | Done = true; | |||
1039 | SDValue LoadedVal = SDValue(Res, 1); | |||
1040 | if (InsertTo64) { | |||
1041 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); | |||
1042 | LoadedVal = | |||
1043 | SDValue(CurDAG->getMachineNode( | |||
1044 | AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, | |||
1045 | CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), | |||
1046 | 0); | |||
1047 | } | |||
1048 | ||||
1049 | ReplaceUses(SDValue(N, 0), LoadedVal); | |||
1050 | ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); | |||
1051 | ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); | |||
1052 | ||||
1053 | return nullptr; | |||
1054 | } | |||
1055 | ||||
1056 | SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, | |||
1057 | unsigned Opc, unsigned SubRegIdx) { | |||
1058 | SDLoc dl(N); | |||
1059 | EVT VT = N->getValueType(0); | |||
1060 | SDValue Chain = N->getOperand(0); | |||
1061 | ||||
1062 | SDValue Ops[] = {N->getOperand(2), // Mem operand; | |||
1063 | Chain}; | |||
1064 | ||||
1065 | const EVT ResTys[] = {MVT::Untyped, MVT::Other}; | |||
1066 | ||||
1067 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1068 | SDValue SuperReg = SDValue(Ld, 0); | |||
1069 | for (unsigned i = 0; i < NumVecs; ++i) | |||
1070 | ReplaceUses(SDValue(N, i), | |||
1071 | CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); | |||
1072 | ||||
1073 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); | |||
1074 | return nullptr; | |||
1075 | } | |||
1076 | ||||
1077 | SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, | |||
1078 | unsigned Opc, unsigned SubRegIdx) { | |||
1079 | SDLoc dl(N); | |||
1080 | EVT VT = N->getValueType(0); | |||
1081 | SDValue Chain = N->getOperand(0); | |||
1082 | ||||
1083 | SDValue Ops[] = {N->getOperand(1), // Mem operand | |||
1084 | N->getOperand(2), // Incremental | |||
1085 | Chain}; | |||
1086 | ||||
1087 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1088 | MVT::Untyped, MVT::Other}; | |||
1089 | ||||
1090 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1091 | ||||
1092 | // Update uses of write back register | |||
1093 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); | |||
1094 | ||||
1095 | // Update uses of vector list | |||
1096 | SDValue SuperReg = SDValue(Ld, 1); | |||
1097 | if (NumVecs == 1) | |||
1098 | ReplaceUses(SDValue(N, 0), SuperReg); | |||
1099 | else | |||
1100 | for (unsigned i = 0; i < NumVecs; ++i) | |||
1101 | ReplaceUses(SDValue(N, i), | |||
1102 | CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); | |||
1103 | ||||
1104 | // Update the chain | |||
1105 | ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); | |||
1106 | return nullptr; | |||
1107 | } | |||
1108 | ||||
1109 | SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, | |||
1110 | unsigned Opc) { | |||
1111 | SDLoc dl(N); | |||
1112 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1113 | ||||
1114 | // Form a REG_SEQUENCE to force register allocation. | |||
1115 | bool Is128Bit = VT.getSizeInBits() == 128; | |||
1116 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1117 | SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); | |||
1118 | ||||
1119 | SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; | |||
1120 | SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); | |||
1121 | ||||
1122 | return St; | |||
1123 | } | |||
1124 | ||||
1125 | SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, | |||
1126 | unsigned Opc) { | |||
1127 | SDLoc dl(N); | |||
1128 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1129 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1130 | MVT::Other}; // Type for the Chain | |||
1131 | ||||
1132 | // Form a REG_SEQUENCE to force register allocation. | |||
1133 | bool Is128Bit = VT.getSizeInBits() == 128; | |||
1134 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1135 | SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); | |||
1136 | ||||
1137 | SDValue Ops[] = {RegSeq, | |||
1138 | N->getOperand(NumVecs + 1), // base register | |||
1139 | N->getOperand(NumVecs + 2), // Incremental | |||
1140 | N->getOperand(0)}; // Chain | |||
1141 | SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1142 | ||||
1143 | return St; | |||
1144 | } | |||
1145 | ||||
1146 | namespace { | |||
1147 | /// WidenVector - Given a value in the V64 register class, produce the | |||
1148 | /// equivalent value in the V128 register class. | |||
1149 | class WidenVector { | |||
1150 | SelectionDAG &DAG; | |||
1151 | ||||
1152 | public: | |||
1153 | WidenVector(SelectionDAG &DAG) : DAG(DAG) {} | |||
1154 | ||||
1155 | SDValue operator()(SDValue V64Reg) { | |||
1156 | EVT VT = V64Reg.getValueType(); | |||
1157 | unsigned NarrowSize = VT.getVectorNumElements(); | |||
1158 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); | |||
1159 | MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); | |||
1160 | SDLoc DL(V64Reg); | |||
1161 | ||||
1162 | SDValue Undef = | |||
1163 | SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); | |||
1164 | return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); | |||
1165 | } | |||
1166 | }; | |||
1167 | } // namespace | |||
1168 | ||||
1169 | /// NarrowVector - Given a value in the V128 register class, produce the | |||
1170 | /// equivalent value in the V64 register class. | |||
1171 | static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { | |||
1172 | EVT VT = V128Reg.getValueType(); | |||
1173 | unsigned WideSize = VT.getVectorNumElements(); | |||
1174 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); | |||
1175 | MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); | |||
1176 | ||||
1177 | return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, | |||
1178 | V128Reg); | |||
1179 | } | |||
1180 | ||||
1181 | SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, | |||
1182 | unsigned Opc) { | |||
1183 | SDLoc dl(N); | |||
1184 | EVT VT = N->getValueType(0); | |||
1185 | bool Narrow = VT.getSizeInBits() == 64; | |||
1186 | ||||
1187 | // Form a REG_SEQUENCE to force register allocation. | |||
1188 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1189 | ||||
1190 | if (Narrow) | |||
1191 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1192 | WidenVector(*CurDAG)); | |||
1193 | ||||
1194 | SDValue RegSeq = createQTuple(Regs); | |||
1195 | ||||
1196 | const EVT ResTys[] = {MVT::Untyped, MVT::Other}; | |||
1197 | ||||
1198 | unsigned LaneNo = | |||
1199 | cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); | |||
1200 | ||||
1201 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), | |||
1202 | N->getOperand(NumVecs + 3), N->getOperand(0)}; | |||
1203 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1204 | SDValue SuperReg = SDValue(Ld, 0); | |||
1205 | ||||
1206 | EVT WideVT = RegSeq.getOperand(1)->getValueType(0); | |||
1207 | static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, | |||
1208 | AArch64::qsub3 }; | |||
1209 | for (unsigned i = 0; i < NumVecs; ++i) { | |||
1210 | SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); | |||
1211 | if (Narrow) | |||
1212 | NV = NarrowVector(NV, *CurDAG); | |||
1213 | ReplaceUses(SDValue(N, i), NV); | |||
1214 | } | |||
1215 | ||||
1216 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); | |||
1217 | ||||
1218 | return Ld; | |||
1219 | } | |||
1220 | ||||
1221 | SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, | |||
1222 | unsigned Opc) { | |||
1223 | SDLoc dl(N); | |||
1224 | EVT VT = N->getValueType(0); | |||
1225 | bool Narrow = VT.getSizeInBits() == 64; | |||
1226 | ||||
1227 | // Form a REG_SEQUENCE to force register allocation. | |||
1228 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1229 | ||||
1230 | if (Narrow) | |||
1231 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1232 | WidenVector(*CurDAG)); | |||
1233 | ||||
1234 | SDValue RegSeq = createQTuple(Regs); | |||
1235 | ||||
1236 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1237 | RegSeq->getValueType(0), MVT::Other}; | |||
1238 | ||||
1239 | unsigned LaneNo = | |||
1240 | cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); | |||
1241 | ||||
1242 | SDValue Ops[] = {RegSeq, | |||
1243 | CurDAG->getTargetConstant(LaneNo, MVT::i64), // Lane Number | |||
1244 | N->getOperand(NumVecs + 2), // Base register | |||
1245 | N->getOperand(NumVecs + 3), // Incremental | |||
1246 | N->getOperand(0)}; | |||
1247 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1248 | ||||
1249 | // Update uses of the write back register | |||
1250 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); | |||
1251 | ||||
1252 | // Update uses of the vector list | |||
1253 | SDValue SuperReg = SDValue(Ld, 1); | |||
1254 | if (NumVecs == 1) { | |||
1255 | ReplaceUses(SDValue(N, 0), | |||
1256 | Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); | |||
1257 | } else { | |||
1258 | EVT WideVT = RegSeq.getOperand(1)->getValueType(0); | |||
1259 | static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, | |||
1260 | AArch64::qsub3 }; | |||
1261 | for (unsigned i = 0; i < NumVecs; ++i) { | |||
1262 | SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, | |||
1263 | SuperReg); | |||
1264 | if (Narrow) | |||
1265 | NV = NarrowVector(NV, *CurDAG); | |||
1266 | ReplaceUses(SDValue(N, i), NV); | |||
1267 | } | |||
1268 | } | |||
1269 | ||||
1270 | // Update the Chain | |||
1271 | ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); | |||
1272 | ||||
1273 | return Ld; | |||
1274 | } | |||
1275 | ||||
1276 | SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, | |||
1277 | unsigned Opc) { | |||
1278 | SDLoc dl(N); | |||
1279 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1280 | bool Narrow = VT.getSizeInBits() == 64; | |||
1281 | ||||
1282 | // Form a REG_SEQUENCE to force register allocation. | |||
1283 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1284 | ||||
1285 | if (Narrow) | |||
1286 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1287 | WidenVector(*CurDAG)); | |||
1288 | ||||
1289 | SDValue RegSeq = createQTuple(Regs); | |||
1290 | ||||
1291 | unsigned LaneNo = | |||
1292 | cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); | |||
1293 | ||||
1294 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), | |||
1295 | N->getOperand(NumVecs + 3), N->getOperand(0)}; | |||
1296 | SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); | |||
1297 | ||||
1298 | // Transfer memoperands. | |||
1299 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
1300 | MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); | |||
1301 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
1302 | ||||
1303 | return St; | |||
1304 | } | |||
1305 | ||||
1306 | SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, | |||
1307 | unsigned Opc) { | |||
1308 | SDLoc dl(N); | |||
1309 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1310 | bool Narrow = VT.getSizeInBits() == 64; | |||
1311 | ||||
1312 | // Form a REG_SEQUENCE to force register allocation. | |||
1313 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1314 | ||||
1315 | if (Narrow) | |||
1316 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1317 | WidenVector(*CurDAG)); | |||
1318 | ||||
1319 | SDValue RegSeq = createQTuple(Regs); | |||
1320 | ||||
1321 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1322 | MVT::Other}; | |||
1323 | ||||
1324 | unsigned LaneNo = | |||
1325 | cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); | |||
1326 | ||||
1327 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), | |||
1328 | N->getOperand(NumVecs + 2), // Base Register | |||
1329 | N->getOperand(NumVecs + 3), // Incremental | |||
1330 | N->getOperand(0)}; | |||
1331 | SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1332 | ||||
1333 | // Transfer memoperands. | |||
1334 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
1335 | MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); | |||
1336 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
1337 | ||||
1338 | return St; | |||
1339 | } | |||
1340 | ||||
1341 | static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, | |||
1342 | unsigned &Opc, SDValue &Opd0, | |||
1343 | unsigned &LSB, unsigned &MSB, | |||
1344 | unsigned NumberOfIgnoredLowBits, | |||
1345 | bool BiggerPattern) { | |||
1346 | assert(N->getOpcode() == ISD::AND &&((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1347, __PRETTY_FUNCTION__)) | |||
1347 | "N must be a AND operation to call this function")((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1347, __PRETTY_FUNCTION__)); | |||
1348 | ||||
1349 | EVT VT = N->getValueType(0); | |||
1350 | ||||
1351 | // Here we can test the type of VT and return false when the type does not | |||
1352 | // match, but since it is done prior to that call in the current context | |||
1353 | // we turned that into an assert to avoid redundant code. | |||
1354 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1355, __PRETTY_FUNCTION__)) | |||
1355 | "Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1355, __PRETTY_FUNCTION__)); | |||
1356 | ||||
1357 | // FIXME: simplify-demanded-bits in DAGCombine will probably have | |||
1358 | // changed the AND node to a 32-bit mask operation. We'll have to | |||
1359 | // undo that as part of the transform here if we want to catch all | |||
1360 | // the opportunities. | |||
1361 | // Currently the NumberOfIgnoredLowBits argument helps to recover | |||
1362 | // form these situations when matching bigger pattern (bitfield insert). | |||
1363 | ||||
1364 | // For unsigned extracts, check for a shift right and mask | |||
1365 | uint64_t And_imm = 0; | |||
1366 | if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) | |||
1367 | return false; | |||
1368 | ||||
1369 | const SDNode *Op0 = N->getOperand(0).getNode(); | |||
1370 | ||||
1371 | // Because of simplify-demanded-bits in DAGCombine, the mask may have been | |||
1372 | // simplified. Try to undo that | |||
1373 | And_imm |= (1 << NumberOfIgnoredLowBits) - 1; | |||
1374 | ||||
1375 | // The immediate is a mask of the low bits iff imm & (imm+1) == 0 | |||
1376 | if (And_imm & (And_imm + 1)) | |||
1377 | return false; | |||
1378 | ||||
1379 | bool ClampMSB = false; | |||
1380 | uint64_t Srl_imm = 0; | |||
1381 | // Handle the SRL + ANY_EXTEND case. | |||
1382 | if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && | |||
1383 | isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { | |||
1384 | // Extend the incoming operand of the SRL to 64-bit. | |||
1385 | Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); | |||
1386 | // Make sure to clamp the MSB so that we preserve the semantics of the | |||
1387 | // original operations. | |||
1388 | ClampMSB = true; | |||
1389 | } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && | |||
1390 | isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, | |||
1391 | Srl_imm)) { | |||
1392 | // If the shift result was truncated, we can still combine them. | |||
1393 | Opd0 = Op0->getOperand(0).getOperand(0); | |||
1394 | ||||
1395 | // Use the type of SRL node. | |||
1396 | VT = Opd0->getValueType(0); | |||
1397 | } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { | |||
1398 | Opd0 = Op0->getOperand(0); | |||
1399 | } else if (BiggerPattern) { | |||
1400 | // Let's pretend a 0 shift right has been performed. | |||
1401 | // The resulting code will be at least as good as the original one | |||
1402 | // plus it may expose more opportunities for bitfield insert pattern. | |||
1403 | // FIXME: Currently we limit this to the bigger pattern, because | |||
1404 | // some optimizations expect AND and not UBFM | |||
1405 | Opd0 = N->getOperand(0); | |||
1406 | } else | |||
1407 | return false; | |||
1408 | ||||
1409 | // Bail out on large immediates. This happens when no proper | |||
1410 | // combining/constant folding was performed. | |||
1411 | if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) { | |||
1412 | DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0) | |||
1413 | << ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0); | |||
1414 | return false; | |||
1415 | } | |||
1416 | ||||
1417 | LSB = Srl_imm; | |||
1418 | MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm) | |||
1419 | : countTrailingOnes<uint64_t>(And_imm)) - | |||
1420 | 1; | |||
1421 | if (ClampMSB) | |||
1422 | // Since we're moving the extend before the right shift operation, we need | |||
1423 | // to clamp the MSB to make sure we don't shift in undefined bits instead of | |||
1424 | // the zeros which would get shifted in with the original right shift | |||
1425 | // operation. | |||
1426 | MSB = MSB > 31 ? 31 : MSB; | |||
1427 | ||||
1428 | Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; | |||
1429 | return true; | |||
1430 | } | |||
1431 | ||||
1432 | static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, | |||
1433 | SDValue &Opd0, unsigned &LSB, | |||
1434 | unsigned &MSB) { | |||
1435 | // We are looking for the following pattern which basically extracts several | |||
1436 | // continuous bits from the source value and places it from the LSB of the | |||
1437 | // destination value, all other bits of the destination value or set to zero: | |||
1438 | // | |||
1439 | // Value2 = AND Value, MaskImm | |||
1440 | // SRL Value2, ShiftImm | |||
1441 | // | |||
1442 | // with MaskImm >> ShiftImm to search for the bit width. | |||
1443 | // | |||
1444 | // This gets selected into a single UBFM: | |||
1445 | // | |||
1446 | // UBFM Value, ShiftImm, BitWide + Srl_imm -1 | |||
1447 | // | |||
1448 | ||||
1449 | if (N->getOpcode() != ISD::SRL) | |||
1450 | return false; | |||
1451 | ||||
1452 | uint64_t And_mask = 0; | |||
1453 | if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) | |||
1454 | return false; | |||
1455 | ||||
1456 | Opd0 = N->getOperand(0).getOperand(0); | |||
1457 | ||||
1458 | uint64_t Srl_imm = 0; | |||
1459 | if (!isIntImmediate(N->getOperand(1), Srl_imm)) | |||
1460 | return false; | |||
1461 | ||||
1462 | // Check whether we really have several bits extract here. | |||
1463 | unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm)); | |||
1464 | if (BitWide && isMask_64(And_mask >> Srl_imm)) { | |||
1465 | if (N->getValueType(0) == MVT::i32) | |||
1466 | Opc = AArch64::UBFMWri; | |||
1467 | else | |||
1468 | Opc = AArch64::UBFMXri; | |||
1469 | ||||
1470 | LSB = Srl_imm; | |||
1471 | MSB = BitWide + Srl_imm - 1; | |||
1472 | return true; | |||
1473 | } | |||
1474 | ||||
1475 | return false; | |||
1476 | } | |||
1477 | ||||
1478 | static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, | |||
1479 | unsigned &LSB, unsigned &MSB, | |||
1480 | bool BiggerPattern) { | |||
1481 | assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD:: SRL) && "N must be a SHR/SRA operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1482, __PRETTY_FUNCTION__)) | |||
1482 | "N must be a SHR/SRA operation to call this function")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD:: SRL) && "N must be a SHR/SRA operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1482, __PRETTY_FUNCTION__)); | |||
1483 | ||||
1484 | EVT VT = N->getValueType(0); | |||
1485 | ||||
1486 | // Here we can test the type of VT and return false when the type does not | |||
1487 | // match, but since it is done prior to that call in the current context | |||
1488 | // we turned that into an assert to avoid redundant code. | |||
1489 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1490, __PRETTY_FUNCTION__)) | |||
1490 | "Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1490, __PRETTY_FUNCTION__)); | |||
1491 | ||||
1492 | // Check for AND + SRL doing several bits extract. | |||
1493 | if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) | |||
1494 | return true; | |||
1495 | ||||
1496 | // we're looking for a shift of a shift | |||
1497 | uint64_t Shl_imm = 0; | |||
1498 | uint64_t Trunc_bits = 0; | |||
1499 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { | |||
1500 | Opd0 = N->getOperand(0).getOperand(0); | |||
1501 | } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && | |||
1502 | N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { | |||
1503 | // We are looking for a shift of truncate. Truncate from i64 to i32 could | |||
1504 | // be considered as setting high 32 bits as zero. Our strategy here is to | |||
1505 | // always generate 64bit UBFM. This consistency will help the CSE pass | |||
1506 | // later find more redundancy. | |||
1507 | Opd0 = N->getOperand(0).getOperand(0); | |||
1508 | Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); | |||
1509 | VT = Opd0->getValueType(0); | |||
1510 | assert(VT == MVT::i64 && "the promoted type should be i64")((VT == MVT::i64 && "the promoted type should be i64" ) ? static_cast<void> (0) : __assert_fail ("VT == MVT::i64 && \"the promoted type should be i64\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1510, __PRETTY_FUNCTION__)); | |||
1511 | } else if (BiggerPattern) { | |||
1512 | // Let's pretend a 0 shift left has been performed. | |||
1513 | // FIXME: Currently we limit this to the bigger pattern case, | |||
1514 | // because some optimizations expect AND and not UBFM | |||
1515 | Opd0 = N->getOperand(0); | |||
1516 | } else | |||
1517 | return false; | |||
1518 | ||||
1519 | // Missing combines/constant folding may have left us with strange | |||
1520 | // constants. | |||
1521 | if (Shl_imm >= VT.getSizeInBits()) { | |||
1522 | DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0) | |||
1523 | << ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0); | |||
1524 | return false; | |||
1525 | } | |||
1526 | ||||
1527 | uint64_t Srl_imm = 0; | |||
1528 | if (!isIntImmediate(N->getOperand(1), Srl_imm)) | |||
1529 | return false; | |||
1530 | ||||
1531 | assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && "bad amount in shift node!") ? static_cast<void> (0) : __assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1532, __PRETTY_FUNCTION__)) | |||
1532 | "bad amount in shift node!")((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && "bad amount in shift node!") ? static_cast<void> (0) : __assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1532, __PRETTY_FUNCTION__)); | |||
1533 | // Note: The width operand is encoded as width-1. | |||
1534 | unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; | |||
1535 | int sLSB = Srl_imm - Shl_imm; | |||
1536 | if (sLSB < 0) | |||
1537 | return false; | |||
1538 | LSB = sLSB; | |||
1539 | MSB = LSB + Width; | |||
1540 | // SRA requires a signed extraction | |||
1541 | if (VT == MVT::i32) | |||
1542 | Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; | |||
1543 | else | |||
1544 | Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; | |||
1545 | return true; | |||
1546 | } | |||
1547 | ||||
1548 | static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, | |||
1549 | SDValue &Opd0, unsigned &LSB, unsigned &MSB, | |||
1550 | unsigned NumberOfIgnoredLowBits = 0, | |||
1551 | bool BiggerPattern = false) { | |||
1552 | if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) | |||
1553 | return false; | |||
1554 | ||||
1555 | switch (N->getOpcode()) { | |||
1556 | default: | |||
1557 | if (!N->isMachineOpcode()) | |||
1558 | return false; | |||
1559 | break; | |||
1560 | case ISD::AND: | |||
1561 | return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, | |||
1562 | NumberOfIgnoredLowBits, BiggerPattern); | |||
1563 | case ISD::SRL: | |||
1564 | case ISD::SRA: | |||
1565 | return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); | |||
1566 | } | |||
1567 | ||||
1568 | unsigned NOpc = N->getMachineOpcode(); | |||
1569 | switch (NOpc) { | |||
1570 | default: | |||
1571 | return false; | |||
1572 | case AArch64::SBFMWri: | |||
1573 | case AArch64::UBFMWri: | |||
1574 | case AArch64::SBFMXri: | |||
1575 | case AArch64::UBFMXri: | |||
1576 | Opc = NOpc; | |||
1577 | Opd0 = N->getOperand(0); | |||
1578 | LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); | |||
1579 | MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); | |||
1580 | return true; | |||
1581 | } | |||
1582 | // Unreachable | |||
1583 | return false; | |||
1584 | } | |||
1585 | ||||
1586 | SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { | |||
1587 | unsigned Opc, LSB, MSB; | |||
1588 | SDValue Opd0; | |||
1589 | if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) | |||
1590 | return nullptr; | |||
1591 | ||||
1592 | EVT VT = N->getValueType(0); | |||
1593 | ||||
1594 | // If the bit extract operation is 64bit but the original type is 32bit, we | |||
1595 | // need to add one EXTRACT_SUBREG. | |||
1596 | if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { | |||
1597 | SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), | |||
1598 | CurDAG->getTargetConstant(MSB, MVT::i64)}; | |||
1599 | ||||
1600 | SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); | |||
1601 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); | |||
1602 | MachineSDNode *Node = | |||
1603 | CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, | |||
1604 | SDValue(BFM, 0), SubReg); | |||
1605 | return Node; | |||
1606 | } | |||
1607 | ||||
1608 | SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), | |||
1609 | CurDAG->getTargetConstant(MSB, VT)}; | |||
1610 | return CurDAG->SelectNodeTo(N, Opc, VT, Ops); | |||
1611 | } | |||
1612 | ||||
1613 | /// Does DstMask form a complementary pair with the mask provided by | |||
1614 | /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, | |||
1615 | /// this asks whether DstMask zeroes precisely those bits that will be set by | |||
1616 | /// the other half. | |||
1617 | static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, | |||
1618 | unsigned NumberOfIgnoredHighBits, EVT VT) { | |||
1619 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1620, __PRETTY_FUNCTION__)) | |||
1620 | "i32 or i64 mask type expected!")(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1620, __PRETTY_FUNCTION__)); | |||
1621 | unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; | |||
1622 | ||||
1623 | APInt SignificantDstMask = APInt(BitWidth, DstMask); | |||
1624 | APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); | |||
1625 | ||||
1626 | return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && | |||
1627 | (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); | |||
1628 | } | |||
1629 | ||||
1630 | // Look for bits that will be useful for later uses. | |||
1631 | // A bit is consider useless as soon as it is dropped and never used | |||
1632 | // before it as been dropped. | |||
1633 | // E.g., looking for useful bit of x | |||
1634 | // 1. y = x & 0x7 | |||
1635 | // 2. z = y >> 2 | |||
1636 | // After #1, x useful bits are 0x7, then the useful bits of x, live through | |||
1637 | // y. | |||
1638 | // After #2, the useful bits of x are 0x4. | |||
1639 | // However, if x is used on an unpredicatable instruction, then all its bits | |||
1640 | // are useful. | |||
1641 | // E.g. | |||
1642 | // 1. y = x & 0x7 | |||
1643 | // 2. z = y >> 2 | |||
1644 | // 3. str x, [@x] | |||
1645 | static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); | |||
1646 | ||||
1647 | static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, | |||
1648 | unsigned Depth) { | |||
1649 | uint64_t Imm = | |||
1650 | cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); | |||
1651 | Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); | |||
1652 | UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); | |||
1653 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1654 | } | |||
1655 | ||||
1656 | static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, | |||
1657 | uint64_t Imm, uint64_t MSB, | |||
1658 | unsigned Depth) { | |||
1659 | // inherit the bitwidth value | |||
1660 | APInt OpUsefulBits(UsefulBits); | |||
1661 | OpUsefulBits = 1; | |||
1662 | ||||
1663 | if (MSB >= Imm) { | |||
1664 | OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); | |||
1665 | --OpUsefulBits; | |||
1666 | // The interesting part will be in the lower part of the result | |||
1667 | getUsefulBits(Op, OpUsefulBits, Depth + 1); | |||
1668 | // The interesting part was starting at Imm in the argument | |||
1669 | OpUsefulBits = OpUsefulBits.shl(Imm); | |||
1670 | } else { | |||
1671 | OpUsefulBits = OpUsefulBits.shl(MSB + 1); | |||
1672 | --OpUsefulBits; | |||
1673 | // The interesting part will be shifted in the result | |||
1674 | OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); | |||
1675 | getUsefulBits(Op, OpUsefulBits, Depth + 1); | |||
1676 | // The interesting part was at zero in the argument | |||
1677 | OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); | |||
1678 | } | |||
1679 | ||||
1680 | UsefulBits &= OpUsefulBits; | |||
1681 | } | |||
1682 | ||||
1683 | static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, | |||
1684 | unsigned Depth) { | |||
1685 | uint64_t Imm = | |||
1686 | cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); | |||
1687 | uint64_t MSB = | |||
1688 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1689 | ||||
1690 | getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); | |||
1691 | } | |||
1692 | ||||
1693 | static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, | |||
1694 | unsigned Depth) { | |||
1695 | uint64_t ShiftTypeAndValue = | |||
1696 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1697 | APInt Mask(UsefulBits); | |||
1698 | Mask.clearAllBits(); | |||
1699 | Mask.flipAllBits(); | |||
1700 | ||||
1701 | if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { | |||
1702 | // Shift Left | |||
1703 | uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); | |||
1704 | Mask = Mask.shl(ShiftAmt); | |||
1705 | getUsefulBits(Op, Mask, Depth + 1); | |||
1706 | Mask = Mask.lshr(ShiftAmt); | |||
1707 | } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { | |||
1708 | // Shift Right | |||
1709 | // We do not handle AArch64_AM::ASR, because the sign will change the | |||
1710 | // number of useful bits | |||
1711 | uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); | |||
1712 | Mask = Mask.lshr(ShiftAmt); | |||
1713 | getUsefulBits(Op, Mask, Depth + 1); | |||
1714 | Mask = Mask.shl(ShiftAmt); | |||
1715 | } else | |||
1716 | return; | |||
1717 | ||||
1718 | UsefulBits &= Mask; | |||
1719 | } | |||
1720 | ||||
1721 | static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, | |||
1722 | unsigned Depth) { | |||
1723 | uint64_t Imm = | |||
1724 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1725 | uint64_t MSB = | |||
1726 | cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); | |||
1727 | ||||
1728 | if (Op.getOperand(1) == Orig) | |||
1729 | return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); | |||
1730 | ||||
1731 | APInt OpUsefulBits(UsefulBits); | |||
1732 | OpUsefulBits = 1; | |||
1733 | ||||
1734 | if (MSB >= Imm) { | |||
1735 | OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); | |||
1736 | --OpUsefulBits; | |||
1737 | UsefulBits &= ~OpUsefulBits; | |||
1738 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1739 | } else { | |||
1740 | OpUsefulBits = OpUsefulBits.shl(MSB + 1); | |||
1741 | --OpUsefulBits; | |||
1742 | UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); | |||
1743 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1744 | } | |||
1745 | } | |||
1746 | ||||
1747 | static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, | |||
1748 | SDValue Orig, unsigned Depth) { | |||
1749 | ||||
1750 | // Users of this node should have already been instruction selected | |||
1751 | // FIXME: Can we turn that into an assert? | |||
1752 | if (!UserNode->isMachineOpcode()) | |||
1753 | return; | |||
1754 | ||||
1755 | switch (UserNode->getMachineOpcode()) { | |||
1756 | default: | |||
1757 | return; | |||
1758 | case AArch64::ANDSWri: | |||
1759 | case AArch64::ANDSXri: | |||
1760 | case AArch64::ANDWri: | |||
1761 | case AArch64::ANDXri: | |||
1762 | // We increment Depth only when we call the getUsefulBits | |||
1763 | return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, | |||
1764 | Depth); | |||
1765 | case AArch64::UBFMWri: | |||
1766 | case AArch64::UBFMXri: | |||
1767 | return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); | |||
1768 | ||||
1769 | case AArch64::ORRWrs: | |||
1770 | case AArch64::ORRXrs: | |||
1771 | if (UserNode->getOperand(1) != Orig) | |||
1772 | return; | |||
1773 | return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, | |||
1774 | Depth); | |||
1775 | case AArch64::BFMWri: | |||
1776 | case AArch64::BFMXri: | |||
1777 | return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); | |||
1778 | } | |||
1779 | } | |||
1780 | ||||
1781 | static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { | |||
1782 | if (Depth >= 6) | |||
1783 | return; | |||
1784 | // Initialize UsefulBits | |||
1785 | if (!Depth) { | |||
1786 | unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); | |||
1787 | // At the beginning, assume every produced bits is useful | |||
1788 | UsefulBits = APInt(Bitwidth, 0); | |||
1789 | UsefulBits.flipAllBits(); | |||
1790 | } | |||
1791 | APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); | |||
1792 | ||||
1793 | for (SDNode *Node : Op.getNode()->uses()) { | |||
1794 | // A use cannot produce useful bits | |||
1795 | APInt UsefulBitsForUse = APInt(UsefulBits); | |||
1796 | getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); | |||
1797 | UsersUsefulBits |= UsefulBitsForUse; | |||
1798 | } | |||
1799 | // UsefulBits contains the produced bits that are meaningful for the | |||
1800 | // current definition, thus a user cannot make a bit meaningful at | |||
1801 | // this point | |||
1802 | UsefulBits &= UsersUsefulBits; | |||
1803 | } | |||
1804 | ||||
1805 | /// Create a machine node performing a notional SHL of Op by ShlAmount. If | |||
1806 | /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is | |||
1807 | /// 0, return Op unchanged. | |||
1808 | static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { | |||
1809 | if (ShlAmount == 0) | |||
1810 | return Op; | |||
1811 | ||||
1812 | EVT VT = Op.getValueType(); | |||
1813 | unsigned BitWidth = VT.getSizeInBits(); | |||
1814 | unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; | |||
1815 | ||||
1816 | SDNode *ShiftNode; | |||
1817 | if (ShlAmount > 0) { | |||
1818 | // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt | |||
1819 | ShiftNode = CurDAG->getMachineNode( | |||
1820 | UBFMOpc, SDLoc(Op), VT, Op, | |||
1821 | CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), | |||
1822 | CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); | |||
1823 | } else { | |||
1824 | // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 | |||
1825 | assert(ShlAmount < 0 && "expected right shift")((ShlAmount < 0 && "expected right shift") ? static_cast <void> (0) : __assert_fail ("ShlAmount < 0 && \"expected right shift\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1825, __PRETTY_FUNCTION__)); | |||
1826 | int ShrAmount = -ShlAmount; | |||
1827 | ShiftNode = CurDAG->getMachineNode( | |||
1828 | UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), | |||
1829 | CurDAG->getTargetConstant(BitWidth - 1, VT)); | |||
1830 | } | |||
1831 | ||||
1832 | return SDValue(ShiftNode, 0); | |||
1833 | } | |||
1834 | ||||
1835 | /// Does this tree qualify as an attempt to move a bitfield into position, | |||
1836 | /// essentially "(and (shl VAL, N), Mask)". | |||
1837 | static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, | |||
1838 | SDValue &Src, int &ShiftAmount, | |||
1839 | int &MaskWidth) { | |||
1840 | EVT VT = Op.getValueType(); | |||
1841 | unsigned BitWidth = VT.getSizeInBits(); | |||
1842 | (void)BitWidth; | |||
1843 | assert(BitWidth == 32 || BitWidth == 64)((BitWidth == 32 || BitWidth == 64) ? static_cast<void> (0) : __assert_fail ("BitWidth == 32 || BitWidth == 64", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1843, __PRETTY_FUNCTION__)); | |||
1844 | ||||
1845 | APInt KnownZero, KnownOne; | |||
1846 | CurDAG->computeKnownBits(Op, KnownZero, KnownOne); | |||
1847 | ||||
1848 | // Non-zero in the sense that they're not provably zero, which is the key | |||
1849 | // point if we want to use this value | |||
1850 | uint64_t NonZeroBits = (~KnownZero).getZExtValue(); | |||
1851 | ||||
1852 | // Discard a constant AND mask if present. It's safe because the node will | |||
1853 | // already have been factored into the computeKnownBits calculation above. | |||
1854 | uint64_t AndImm; | |||
1855 | if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { | |||
1856 | assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0)(((~APInt(BitWidth, AndImm) & ~KnownZero) == 0) ? static_cast <void> (0) : __assert_fail ("(~APInt(BitWidth, AndImm) & ~KnownZero) == 0" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1856, __PRETTY_FUNCTION__)); | |||
1857 | Op = Op.getOperand(0); | |||
1858 | } | |||
1859 | ||||
1860 | uint64_t ShlImm; | |||
1861 | if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) | |||
1862 | return false; | |||
1863 | Op = Op.getOperand(0); | |||
1864 | ||||
1865 | if (!isShiftedMask_64(NonZeroBits)) | |||
1866 | return false; | |||
1867 | ||||
1868 | ShiftAmount = countTrailingZeros(NonZeroBits); | |||
1869 | MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); | |||
1870 | ||||
1871 | // BFI encompasses sufficiently many nodes that it's worth inserting an extra | |||
1872 | // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL | |||
1873 | // amount. | |||
1874 | Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); | |||
1875 | ||||
1876 | return true; | |||
1877 | } | |||
1878 | ||||
1879 | // Given a OR operation, check if we have the following pattern | |||
1880 | // ubfm c, b, imm, imm2 (or something that does the same jobs, see | |||
1881 | // isBitfieldExtractOp) | |||
1882 | // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and | |||
1883 | // countTrailingZeros(mask2) == imm2 - imm + 1 | |||
1884 | // f = d | c | |||
1885 | // if yes, given reference arguments will be update so that one can replace | |||
1886 | // the OR instruction with: | |||
1887 | // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 | |||
1888 | static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, | |||
1889 | SDValue &Src, unsigned &ImmR, | |||
1890 | unsigned &ImmS, SelectionDAG *CurDAG) { | |||
1891 | assert(N->getOpcode() == ISD::OR && "Expect a OR operation")((N->getOpcode() == ISD::OR && "Expect a OR operation" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Expect a OR operation\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1891, __PRETTY_FUNCTION__)); | |||
1892 | ||||
1893 | // Set Opc | |||
1894 | EVT VT = N->getValueType(0); | |||
1895 | if (VT == MVT::i32) | |||
1896 | Opc = AArch64::BFMWri; | |||
1897 | else if (VT == MVT::i64) | |||
1898 | Opc = AArch64::BFMXri; | |||
1899 | else | |||
1900 | return false; | |||
1901 | ||||
1902 | // Because of simplify-demanded-bits in DAGCombine, involved masks may not | |||
1903 | // have the expected shape. Try to undo that. | |||
1904 | APInt UsefulBits; | |||
1905 | getUsefulBits(SDValue(N, 0), UsefulBits); | |||
1906 | ||||
1907 | unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); | |||
1908 | unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); | |||
1909 | ||||
1910 | // OR is commutative, check both possibilities (does llvm provide a | |||
1911 | // way to do that directely, e.g., via code matcher?) | |||
1912 | SDValue OrOpd1Val = N->getOperand(1); | |||
1913 | SDNode *OrOpd0 = N->getOperand(0).getNode(); | |||
1914 | SDNode *OrOpd1 = N->getOperand(1).getNode(); | |||
1915 | for (int i = 0; i < 2; | |||
1916 | ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { | |||
1917 | unsigned BFXOpc; | |||
1918 | int DstLSB, Width; | |||
1919 | if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, | |||
1920 | NumberOfIgnoredLowBits, true)) { | |||
1921 | // Check that the returned opcode is compatible with the pattern, | |||
1922 | // i.e., same type and zero extended (U and not S) | |||
1923 | if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || | |||
1924 | (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) | |||
1925 | continue; | |||
1926 | ||||
1927 | // Compute the width of the bitfield insertion | |||
1928 | DstLSB = 0; | |||
1929 | Width = ImmS - ImmR + 1; | |||
1930 | // FIXME: This constraint is to catch bitfield insertion we may | |||
1931 | // want to widen the pattern if we want to grab general bitfied | |||
1932 | // move case | |||
1933 | if (Width <= 0) | |||
1934 | continue; | |||
1935 | ||||
1936 | // If the mask on the insertee is correct, we have a BFXIL operation. We | |||
1937 | // can share the ImmR and ImmS values from the already-computed UBFM. | |||
1938 | } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, | |||
1939 | DstLSB, Width)) { | |||
1940 | ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); | |||
1941 | ImmS = Width - 1; | |||
1942 | } else | |||
1943 | continue; | |||
1944 | ||||
1945 | // Check the second part of the pattern | |||
1946 | EVT VT = OrOpd1->getValueType(0); | |||
1947 | assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected OR operand\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1947, __PRETTY_FUNCTION__)); | |||
1948 | ||||
1949 | // Compute the Known Zero for the candidate of the first operand. | |||
1950 | // This allows to catch more general case than just looking for | |||
1951 | // AND with imm. Indeed, simplify-demanded-bits may have removed | |||
1952 | // the AND instruction because it proves it was useless. | |||
1953 | APInt KnownZero, KnownOne; | |||
1954 | CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); | |||
1955 | ||||
1956 | // Check if there is enough room for the second operand to appear | |||
1957 | // in the first one | |||
1958 | APInt BitsToBeInserted = | |||
1959 | APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); | |||
1960 | ||||
1961 | if ((BitsToBeInserted & ~KnownZero) != 0) | |||
1962 | continue; | |||
1963 | ||||
1964 | // Set the first operand | |||
1965 | uint64_t Imm; | |||
1966 | if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && | |||
1967 | isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) | |||
1968 | // In that case, we can eliminate the AND | |||
1969 | Dst = OrOpd1->getOperand(0); | |||
1970 | else | |||
1971 | // Maybe the AND has been removed by simplify-demanded-bits | |||
1972 | // or is useful because it discards more bits | |||
1973 | Dst = OrOpd1Val; | |||
1974 | ||||
1975 | // both parts match | |||
1976 | return true; | |||
1977 | } | |||
1978 | ||||
1979 | return false; | |||
1980 | } | |||
1981 | ||||
1982 | SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { | |||
1983 | if (N->getOpcode() != ISD::OR) | |||
1984 | return nullptr; | |||
1985 | ||||
1986 | unsigned Opc; | |||
1987 | unsigned LSB, MSB; | |||
1988 | SDValue Opd0, Opd1; | |||
1989 | ||||
1990 | if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) | |||
1991 | return nullptr; | |||
1992 | ||||
1993 | EVT VT = N->getValueType(0); | |||
1994 | SDValue Ops[] = { Opd0, | |||
1995 | Opd1, | |||
1996 | CurDAG->getTargetConstant(LSB, VT), | |||
1997 | CurDAG->getTargetConstant(MSB, VT) }; | |||
1998 | return CurDAG->SelectNodeTo(N, Opc, VT, Ops); | |||
1999 | } | |||
2000 | ||||
2001 | SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { | |||
2002 | EVT VT = N->getValueType(0); | |||
2003 | unsigned Variant; | |||
2004 | unsigned Opc; | |||
2005 | unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; | |||
2006 | ||||
2007 | if (VT == MVT::f32) { | |||
2008 | Variant = 0; | |||
2009 | } else if (VT == MVT::f64) { | |||
2010 | Variant = 1; | |||
2011 | } else | |||
2012 | return nullptr; // Unrecognized argument type. Fall back on default codegen. | |||
2013 | ||||
2014 | // Pick the FRINTX variant needed to set the flags. | |||
2015 | unsigned FRINTXOpc = FRINTXOpcs[Variant]; | |||
2016 | ||||
2017 | switch (N->getOpcode()) { | |||
2018 | default: | |||
2019 | return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. | |||
2020 | case ISD::FCEIL: { | |||
2021 | unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; | |||
2022 | Opc = FRINTPOpcs[Variant]; | |||
2023 | break; | |||
2024 | } | |||
2025 | case ISD::FFLOOR: { | |||
2026 | unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; | |||
2027 | Opc = FRINTMOpcs[Variant]; | |||
2028 | break; | |||
2029 | } | |||
2030 | case ISD::FTRUNC: { | |||
2031 | unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; | |||
2032 | Opc = FRINTZOpcs[Variant]; | |||
2033 | break; | |||
2034 | } | |||
2035 | case ISD::FROUND: { | |||
2036 | unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; | |||
2037 | Opc = FRINTAOpcs[Variant]; | |||
2038 | break; | |||
2039 | } | |||
2040 | } | |||
2041 | ||||
2042 | SDLoc dl(N); | |||
2043 | SDValue In = N->getOperand(0); | |||
2044 | SmallVector<SDValue, 2> Ops; | |||
2045 | Ops.push_back(In); | |||
2046 | ||||
2047 | if (!TM.Options.UnsafeFPMath) { | |||
2048 | SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); | |||
2049 | Ops.push_back(SDValue(FRINTX, 1)); | |||
2050 | } | |||
2051 | ||||
2052 | return CurDAG->getMachineNode(Opc, dl, VT, Ops); | |||
2053 | } | |||
2054 | ||||
2055 | bool | |||
2056 | AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, | |||
2057 | unsigned RegWidth) { | |||
2058 | APFloat FVal(0.0); | |||
2059 | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) | |||
2060 | FVal = CN->getValueAPF(); | |||
2061 | else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { | |||
2062 | // Some otherwise illegal constants are allowed in this case. | |||
2063 | if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || | |||
2064 | !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) | |||
2065 | return false; | |||
2066 | ||||
2067 | ConstantPoolSDNode *CN = | |||
2068 | dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); | |||
2069 | FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); | |||
2070 | } else | |||
2071 | return false; | |||
2072 | ||||
2073 | // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits | |||
2074 | // is between 1 and 32 for a destination w-register, or 1 and 64 for an | |||
2075 | // x-register. | |||
2076 | // | |||
2077 | // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we | |||
2078 | // want THIS_NODE to be 2^fbits. This is much easier to deal with using | |||
2079 | // integers. | |||
2080 | bool IsExact; | |||
2081 | ||||
2082 | // fbits is between 1 and 64 in the worst-case, which means the fmul | |||
2083 | // could have 2^64 as an actual operand. Need 65 bits of precision. | |||
2084 | APSInt IntVal(65, true); | |||
2085 | FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); | |||
2086 | ||||
2087 | // N.b. isPowerOf2 also checks for > 0. | |||
2088 | if (!IsExact || !IntVal.isPowerOf2()) return false; | |||
2089 | unsigned FBits = IntVal.logBase2(); | |||
2090 | ||||
2091 | // Checks above should have guaranteed that we haven't lost information in | |||
2092 | // finding FBits, but it must still be in range. | |||
2093 | if (FBits == 0 || FBits > RegWidth) return false; | |||
2094 | ||||
2095 | FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); | |||
2096 | return true; | |||
2097 | } | |||
2098 | ||||
2099 | SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { | |||
2100 | // Dump information about the Node being selected | |||
2101 | DEBUG(errs() << "Selecting: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "Selecting: "; } } while ( 0); | |||
2102 | DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Node->dump(CurDAG); } } while (0); | |||
2103 | DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "\n"; } } while (0); | |||
2104 | ||||
2105 | // If we have a custom node, we already have selected! | |||
2106 | if (Node->isMachineOpcode()) { | |||
2107 | DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "== "; Node->dump(CurDAG ); errs() << "\n"; } } while (0); | |||
2108 | Node->setNodeId(-1); | |||
2109 | return nullptr; | |||
2110 | } | |||
2111 | ||||
2112 | // Few custom selection stuff. | |||
2113 | SDNode *ResNode = nullptr; | |||
2114 | EVT VT = Node->getValueType(0); | |||
2115 | ||||
2116 | switch (Node->getOpcode()) { | |||
2117 | default: | |||
2118 | break; | |||
2119 | ||||
2120 | case ISD::ADD: | |||
2121 | if (SDNode *I = SelectMLAV64LaneV128(Node)) | |||
2122 | return I; | |||
2123 | break; | |||
2124 | ||||
2125 | case ISD::LOAD: { | |||
2126 | // Try to select as an indexed load. Fall through to normal processing | |||
2127 | // if we can't. | |||
2128 | bool Done = false; | |||
2129 | SDNode *I = SelectIndexedLoad(Node, Done); | |||
2130 | if (Done) | |||
2131 | return I; | |||
2132 | break; | |||
2133 | } | |||
2134 | ||||
2135 | case ISD::SRL: | |||
2136 | case ISD::AND: | |||
2137 | case ISD::SRA: | |||
2138 | if (SDNode *I = SelectBitfieldExtractOp(Node)) | |||
2139 | return I; | |||
2140 | break; | |||
2141 | ||||
2142 | case ISD::OR: | |||
2143 | if (SDNode *I = SelectBitfieldInsertOp(Node)) | |||
2144 | return I; | |||
2145 | break; | |||
2146 | ||||
2147 | case ISD::EXTRACT_VECTOR_ELT: { | |||
2148 | // Extracting lane zero is a special case where we can just use a plain | |||
2149 | // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for | |||
2150 | // the rest of the compiler, especially the register allocator and copyi | |||
2151 | // propagation, to reason about, so is preferred when it's possible to | |||
2152 | // use it. | |||
2153 | ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); | |||
2154 | // Bail and use the default Select() for non-zero lanes. | |||
2155 | if (LaneNode->getZExtValue() != 0) | |||
2156 | break; | |||
2157 | // If the element type is not the same as the result type, likewise | |||
2158 | // bail and use the default Select(), as there's more to do than just | |||
2159 | // a cross-class COPY. This catches extracts of i8 and i16 elements | |||
2160 | // since they will need an explicit zext. | |||
2161 | if (VT != Node->getOperand(0).getValueType().getVectorElementType()) | |||
2162 | break; | |||
2163 | unsigned SubReg; | |||
2164 | switch (Node->getOperand(0) | |||
2165 | .getValueType() | |||
2166 | .getVectorElementType() | |||
2167 | .getSizeInBits()) { | |||
2168 | default: | |||
2169 | llvm_unreachable("Unexpected vector element type!")::llvm::llvm_unreachable_internal("Unexpected vector element type!" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2169); | |||
2170 | case 64: | |||
2171 | SubReg = AArch64::dsub; | |||
2172 | break; | |||
2173 | case 32: | |||
2174 | SubReg = AArch64::ssub; | |||
2175 | break; | |||
2176 | case 16: | |||
2177 | SubReg = AArch64::hsub; | |||
2178 | break; | |||
2179 | case 8: | |||
2180 | llvm_unreachable("unexpected zext-requiring extract element!")::llvm::llvm_unreachable_internal("unexpected zext-requiring extract element!" , "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2180); | |||
2181 | } | |||
2182 | SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, | |||
2183 | Node->getOperand(0)); | |||
2184 | DEBUG(dbgs() << "ISEL: Custom selection!\n=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "ISEL: Custom selection!\n=> " ; } } while (0); | |||
2185 | DEBUG(Extract->dumpr(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Extract->dumpr(CurDAG); } } while (0); | |||
2186 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "\n"; } } while (0); | |||
2187 | return Extract.getNode(); | |||
2188 | } | |||
2189 | case ISD::Constant: { | |||
2190 | // Materialize zero constants as copies from WZR/XZR. This allows | |||
2191 | // the coalescer to propagate these into other instructions. | |||
2192 | ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); | |||
2193 | if (ConstNode->isNullValue()) { | |||
2194 | if (VT == MVT::i32) | |||
2195 | return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), | |||
2196 | AArch64::WZR, MVT::i32).getNode(); | |||
2197 | else if (VT == MVT::i64) | |||
2198 | return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), | |||
2199 | AArch64::XZR, MVT::i64).getNode(); | |||
2200 | } | |||
2201 | break; | |||
2202 | } | |||
2203 | ||||
2204 | case ISD::FrameIndex: { | |||
2205 | // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. | |||
2206 | int FI = cast<FrameIndexSDNode>(Node)->getIndex(); | |||
2207 | unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); | |||
2208 | const TargetLowering *TLI = getTargetLowering(); | |||
2209 | SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); | |||
2210 | SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), | |||
2211 | CurDAG->getTargetConstant(Shifter, MVT::i32) }; | |||
2212 | return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); | |||
2213 | } | |||
2214 | case ISD::INTRINSIC_W_CHAIN: { | |||
2215 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); | |||
2216 | switch (IntNo) { | |||
2217 | default: | |||
2218 | break; | |||
2219 | case Intrinsic::aarch64_ldaxp: | |||
2220 | case Intrinsic::aarch64_ldxp: { | |||
2221 | unsigned Op = | |||
2222 | IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; | |||
2223 | SDValue MemAddr = Node->getOperand(2); | |||
2224 | SDLoc DL(Node); | |||
2225 | SDValue Chain = Node->getOperand(0); | |||
2226 | ||||
2227 | SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, | |||
2228 | MVT::Other, MemAddr, Chain); | |||
2229 | ||||
2230 | // Transfer memoperands. | |||
2231 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
2232 | MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); | |||
2233 | cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); | |||
2234 | return Ld; | |||
2235 | } | |||
2236 | case Intrinsic::aarch64_stlxp: | |||
2237 | case Intrinsic::aarch64_stxp: { | |||
2238 | unsigned Op = | |||
2239 | IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; | |||
2240 | SDLoc DL(Node); | |||
2241 | SDValue Chain = Node->getOperand(0); | |||
2242 | SDValue ValLo = Node->getOperand(2); | |||
2243 | SDValue ValHi = Node->getOperand(3); | |||
2244 | SDValue MemAddr = Node->getOperand(4); | |||
2245 | ||||
2246 | // Place arguments in the right order. | |||
2247 | SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; | |||
2248 | ||||
2249 | SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); | |||
2250 | // Transfer memoperands. | |||
2251 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
2252 | MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); | |||
2253 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
2254 | ||||
2255 | return St; | |||
2256 | } | |||
2257 | case Intrinsic::aarch64_neon_ld1x2: | |||
2258 | if (VT == MVT::v8i8) | |||
2259 | return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); | |||
2260 | else if (VT == MVT::v16i8) | |||
2261 | return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); | |||
2262 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2263 | return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); | |||
2264 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2265 | return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); | |||
2266 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2267 | return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); | |||
2268 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2269 | return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); | |||
2270 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2271 | return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); | |||
2272 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2273 | return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); | |||
2274 | break; | |||
2275 | case Intrinsic::aarch64_neon_ld1x3: | |||
2276 | if (VT == MVT::v8i8) | |||
2277 | return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); | |||
2278 | else if (VT == MVT::v16i8) | |||
2279 | return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); | |||
2280 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2281 | return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); | |||
2282 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2283 | return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); | |||
2284 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2285 | return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); | |||
2286 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2287 | return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); | |||
2288 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2289 | return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); | |||
2290 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2291 | return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); | |||
2292 | break; | |||
2293 | case Intrinsic::aarch64_neon_ld1x4: | |||
2294 | if (VT == MVT::v8i8) | |||
2295 | return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); | |||
2296 | else if (VT == MVT::v16i8) | |||
2297 | return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); | |||
2298 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2299 | return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); | |||
2300 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2301 | return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); | |||
2302 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2303 | return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); | |||
2304 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2305 | return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); | |||
2306 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2307 | return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); | |||
2308 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2309 | return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); | |||
2310 | break; | |||
2311 | case Intrinsic::aarch64_neon_ld2: | |||
2312 | if (VT == MVT::v8i8) | |||
2313 | return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); | |||
2314 | else if (VT == MVT::v16i8) | |||
2315 | return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); | |||
2316 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2317 | return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); | |||
2318 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2319 | return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); | |||
2320 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2321 | return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); | |||
2322 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2323 | return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); | |||
2324 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2325 | return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); | |||
2326 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2327 | return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); | |||
2328 | break; | |||
2329 | case Intrinsic::aarch64_neon_ld3: | |||
2330 | if (VT == MVT::v8i8) | |||
2331 | return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); | |||
2332 | else if (VT == MVT::v16i8) | |||
2333 | return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); | |||
2334 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2335 | return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); | |||
2336 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2337 | return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); | |||
2338 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2339 | return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); | |||
2340 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2341 | return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); | |||
2342 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2343 | return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); | |||
2344 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2345 | return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); | |||
2346 | break; | |||
2347 | case Intrinsic::aarch64_neon_ld4: | |||
2348 | if (VT == MVT::v8i8) | |||
2349 | return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); | |||
2350 | else if (VT == MVT::v16i8) | |||
2351 | return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); | |||
2352 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2353 | return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); | |||
2354 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2355 | return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); | |||
2356 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2357 | return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); | |||
2358 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2359 | return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); | |||
2360 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2361 | return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); | |||
2362 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2363 | return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); | |||
2364 | break; | |||
2365 | case Intrinsic::aarch64_neon_ld2r: | |||
2366 | if (VT == MVT::v8i8) | |||
2367 | return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); | |||
2368 | else if (VT == MVT::v16i8) | |||
2369 | return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); | |||
2370 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2371 | return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); | |||
2372 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2373 | return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); | |||
2374 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2375 | return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); | |||
2376 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2377 | return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); | |||
2378 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2379 | return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); | |||
2380 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2381 | return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); | |||
2382 | break; | |||
2383 | case Intrinsic::aarch64_neon_ld3r: | |||
2384 | if (VT == MVT::v8i8) | |||
2385 | return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); | |||
2386 | else if (VT == MVT::v16i8) | |||
2387 | return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); | |||
2388 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2389 | return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); | |||
2390 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2391 | return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); | |||
2392 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2393 | return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); | |||
2394 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2395 | return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); | |||
2396 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2397 | return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); | |||
2398 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2399 | return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); | |||
2400 | break; | |||
2401 | case Intrinsic::aarch64_neon_ld4r: | |||
2402 | if (VT == MVT::v8i8) | |||
2403 | return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); | |||
2404 | else if (VT == MVT::v16i8) | |||
2405 | return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); | |||
2406 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2407 | return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); | |||
2408 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2409 | return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); | |||
2410 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2411 | return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); | |||
2412 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2413 | return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); | |||
2414 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2415 | return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); | |||
2416 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2417 | return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); | |||
2418 | break; | |||
2419 | case Intrinsic::aarch64_neon_ld2lane: | |||
2420 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2421 | return SelectLoadLane(Node, 2, AArch64::LD2i8); | |||
2422 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2423 | VT == MVT::v8f16) | |||
2424 | return SelectLoadLane(Node, 2, AArch64::LD2i16); | |||
2425 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2426 | VT == MVT::v2f32) | |||
2427 | return SelectLoadLane(Node, 2, AArch64::LD2i32); | |||
2428 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2429 | VT == MVT::v1f64) | |||
2430 | return SelectLoadLane(Node, 2, AArch64::LD2i64); | |||
2431 | break; | |||
2432 | case Intrinsic::aarch64_neon_ld3lane: | |||
2433 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2434 | return SelectLoadLane(Node, 3, AArch64::LD3i8); | |||
2435 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2436 | VT == MVT::v8f16) | |||
2437 | return SelectLoadLane(Node, 3, AArch64::LD3i16); | |||
2438 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2439 | VT == MVT::v2f32) | |||
2440 | return SelectLoadLane(Node, 3, AArch64::LD3i32); | |||
2441 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2442 | VT == MVT::v1f64) | |||
2443 | return SelectLoadLane(Node, 3, AArch64::LD3i64); | |||
2444 | break; | |||
2445 | case Intrinsic::aarch64_neon_ld4lane: | |||
2446 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2447 | return SelectLoadLane(Node, 4, AArch64::LD4i8); | |||
2448 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2449 | VT == MVT::v8f16) | |||
2450 | return SelectLoadLane(Node, 4, AArch64::LD4i16); | |||
2451 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2452 | VT == MVT::v2f32) | |||
2453 | return SelectLoadLane(Node, 4, AArch64::LD4i32); | |||
2454 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2455 | VT == MVT::v1f64) | |||
2456 | return SelectLoadLane(Node, 4, AArch64::LD4i64); | |||
2457 | break; | |||
2458 | } | |||
2459 | } break; | |||
2460 | case ISD::INTRINSIC_WO_CHAIN: { | |||
2461 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); | |||
2462 | switch (IntNo) { | |||
2463 | default: | |||
2464 | break; | |||
2465 | case Intrinsic::aarch64_neon_tbl2: | |||
2466 | return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two | |||
2467 | : AArch64::TBLv16i8Two, | |||
2468 | false); | |||
2469 | case Intrinsic::aarch64_neon_tbl3: | |||
2470 | return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three | |||
2471 | : AArch64::TBLv16i8Three, | |||
2472 | false); | |||
2473 | case Intrinsic::aarch64_neon_tbl4: | |||
2474 | return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four | |||
2475 | : AArch64::TBLv16i8Four, | |||
2476 | false); | |||
2477 | case Intrinsic::aarch64_neon_tbx2: | |||
2478 | return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two | |||
2479 | : AArch64::TBXv16i8Two, | |||
2480 | true); | |||
2481 | case Intrinsic::aarch64_neon_tbx3: | |||
2482 | return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three | |||
2483 | : AArch64::TBXv16i8Three, | |||
2484 | true); | |||
2485 | case Intrinsic::aarch64_neon_tbx4: | |||
2486 | return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four | |||
2487 | : AArch64::TBXv16i8Four, | |||
2488 | true); | |||
2489 | case Intrinsic::aarch64_neon_smull: | |||
2490 | case Intrinsic::aarch64_neon_umull: | |||
2491 | if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) | |||
2492 | return N; | |||
2493 | break; | |||
2494 | } | |||
2495 | break; | |||
2496 | } | |||
2497 | case ISD::INTRINSIC_VOID: { | |||
2498 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); | |||
2499 | if (Node->getNumOperands() >= 3) | |||
2500 | VT = Node->getOperand(2)->getValueType(0); | |||
2501 | switch (IntNo) { | |||
2502 | default: | |||
2503 | break; | |||
2504 | case Intrinsic::aarch64_neon_st1x2: { | |||
2505 | if (VT == MVT::v8i8) | |||
2506 | return SelectStore(Node, 2, AArch64::ST1Twov8b); | |||
2507 | else if (VT == MVT::v16i8) | |||
2508 | return SelectStore(Node, 2, AArch64::ST1Twov16b); | |||
2509 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2510 | return SelectStore(Node, 2, AArch64::ST1Twov4h); | |||
2511 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2512 | return SelectStore(Node, 2, AArch64::ST1Twov8h); | |||
2513 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2514 | return SelectStore(Node, 2, AArch64::ST1Twov2s); | |||
2515 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2516 | return SelectStore(Node, 2, AArch64::ST1Twov4s); | |||
2517 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2518 | return SelectStore(Node, 2, AArch64::ST1Twov2d); | |||
2519 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2520 | return SelectStore(Node, 2, AArch64::ST1Twov1d); | |||
2521 | break; | |||
2522 | } | |||
2523 | case Intrinsic::aarch64_neon_st1x3: { | |||
2524 | if (VT == MVT::v8i8) | |||
2525 | return SelectStore(Node, 3, AArch64::ST1Threev8b); | |||
2526 | else if (VT == MVT::v16i8) | |||
2527 | return SelectStore(Node, 3, AArch64::ST1Threev16b); | |||
2528 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2529 | return SelectStore(Node, 3, AArch64::ST1Threev4h); | |||
2530 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2531 | return SelectStore(Node, 3, AArch64::ST1Threev8h); | |||
2532 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2533 | return SelectStore(Node, 3, AArch64::ST1Threev2s); | |||
2534 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2535 | return SelectStore(Node, 3, AArch64::ST1Threev4s); | |||
2536 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2537 | return SelectStore(Node, 3, AArch64::ST1Threev2d); | |||
2538 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2539 | return SelectStore(Node, 3, AArch64::ST1Threev1d); | |||
2540 | break; | |||
2541 | } | |||
2542 | case Intrinsic::aarch64_neon_st1x4: { | |||
2543 | if (VT == MVT::v8i8) | |||
2544 | return SelectStore(Node, 4, AArch64::ST1Fourv8b); | |||
2545 | else if (VT == MVT::v16i8) | |||
2546 | return SelectStore(Node, 4, AArch64::ST1Fourv16b); | |||
2547 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2548 | return SelectStore(Node, 4, AArch64::ST1Fourv4h); | |||
2549 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2550 | return SelectStore(Node, 4, AArch64::ST1Fourv8h); | |||
2551 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2552 | return SelectStore(Node, 4, AArch64::ST1Fourv2s); | |||
2553 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2554 | return SelectStore(Node, 4, AArch64::ST1Fourv4s); | |||
2555 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2556 | return SelectStore(Node, 4, AArch64::ST1Fourv2d); | |||
2557 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2558 | return SelectStore(Node, 4, AArch64::ST1Fourv1d); | |||
2559 | break; | |||
2560 | } | |||
2561 | case Intrinsic::aarch64_neon_st2: { | |||
2562 | if (VT == MVT::v8i8) | |||
2563 | return SelectStore(Node, 2, AArch64::ST2Twov8b); | |||
2564 | else if (VT == MVT::v16i8) | |||
2565 | return SelectStore(Node, 2, AArch64::ST2Twov16b); | |||
2566 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2567 | return SelectStore(Node, 2, AArch64::ST2Twov4h); | |||
2568 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2569 | return SelectStore(Node, 2, AArch64::ST2Twov8h); | |||
2570 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2571 | return SelectStore(Node, 2, AArch64::ST2Twov2s); | |||
2572 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2573 | return SelectStore(Node, 2, AArch64::ST2Twov4s); | |||
2574 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2575 | return SelectStore(Node, 2, AArch64::ST2Twov2d); | |||
2576 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2577 | return SelectStore(Node, 2, AArch64::ST1Twov1d); | |||
2578 | break; | |||
2579 | } | |||
2580 | case Intrinsic::aarch64_neon_st3: { | |||
2581 | if (VT == MVT::v8i8) | |||
2582 | return SelectStore(Node, 3, AArch64::ST3Threev8b); | |||
2583 | else if (VT == MVT::v16i8) | |||
2584 | return SelectStore(Node, 3, AArch64::ST3Threev16b); | |||
2585 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2586 | return SelectStore(Node, 3, AArch64::ST3Threev4h); | |||
2587 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2588 | return SelectStore(Node, 3, AArch64::ST3Threev8h); | |||
2589 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2590 | return SelectStore(Node, 3, AArch64::ST3Threev2s); | |||
2591 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2592 | return SelectStore(Node, 3, AArch64::ST3Threev4s); | |||
2593 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2594 | return SelectStore(Node, 3, AArch64::ST3Threev2d); | |||
2595 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2596 | return SelectStore(Node, 3, AArch64::ST1Threev1d); | |||
2597 | break; | |||
2598 | } | |||
2599 | case Intrinsic::aarch64_neon_st4: { | |||
2600 | if (VT == MVT::v8i8) | |||
2601 | return SelectStore(Node, 4, AArch64::ST4Fourv8b); | |||
2602 | else if (VT == MVT::v16i8) | |||
2603 | return SelectStore(Node, 4, AArch64::ST4Fourv16b); | |||
2604 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2605 | return SelectStore(Node, 4, AArch64::ST4Fourv4h); | |||
2606 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2607 | return SelectStore(Node, 4, AArch64::ST4Fourv8h); | |||
2608 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2609 | return SelectStore(Node, 4, AArch64::ST4Fourv2s); | |||
2610 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2611 | return SelectStore(Node, 4, AArch64::ST4Fourv4s); | |||
2612 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2613 | return SelectStore(Node, 4, AArch64::ST4Fourv2d); | |||
2614 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2615 | return SelectStore(Node, 4, AArch64::ST1Fourv1d); | |||
2616 | break; | |||
2617 | } | |||
2618 | case Intrinsic::aarch64_neon_st2lane: { | |||
2619 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2620 | return SelectStoreLane(Node, 2, AArch64::ST2i8); | |||
2621 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2622 | VT == MVT::v8f16) | |||
2623 | return SelectStoreLane(Node, 2, AArch64::ST2i16); | |||
2624 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2625 | VT == MVT::v2f32) | |||
2626 | return SelectStoreLane(Node, 2, AArch64::ST2i32); | |||
2627 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2628 | VT == MVT::v1f64) | |||
2629 | return SelectStoreLane(Node, 2, AArch64::ST2i64); | |||
2630 | break; | |||
2631 | } | |||
2632 | case Intrinsic::aarch64_neon_st3lane: { | |||
2633 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2634 | return SelectStoreLane(Node, 3, AArch64::ST3i8); | |||
2635 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2636 | VT == MVT::v8f16) | |||
2637 | return SelectStoreLane(Node, 3, AArch64::ST3i16); | |||
2638 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2639 | VT == MVT::v2f32) | |||
2640 | return SelectStoreLane(Node, 3, AArch64::ST3i32); | |||
2641 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2642 | VT == MVT::v1f64) | |||
2643 | return SelectStoreLane(Node, 3, AArch64::ST3i64); | |||
2644 | break; | |||
2645 | } | |||
2646 | case Intrinsic::aarch64_neon_st4lane: { | |||
2647 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2648 | return SelectStoreLane(Node, 4, AArch64::ST4i8); | |||
2649 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2650 | VT == MVT::v8f16) | |||
2651 | return SelectStoreLane(Node, 4, AArch64::ST4i16); | |||
2652 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2653 | VT == MVT::v2f32) | |||
2654 | return SelectStoreLane(Node, 4, AArch64::ST4i32); | |||
2655 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2656 | VT == MVT::v1f64) | |||
2657 | return SelectStoreLane(Node, 4, AArch64::ST4i64); | |||
2658 | break; | |||
2659 | } | |||
2660 | } | |||
2661 | } | |||
2662 | case AArch64ISD::LD2post: { | |||
2663 | if (VT == MVT::v8i8) | |||
2664 | return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); | |||
2665 | else if (VT == MVT::v16i8) | |||
2666 | return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); | |||
2667 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2668 | return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); | |||
2669 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2670 | return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); | |||
2671 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2672 | return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); | |||
2673 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2674 | return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); | |||
2675 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2676 | return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); | |||
2677 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2678 | return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); | |||
2679 | break; | |||
2680 | } | |||
2681 | case AArch64ISD::LD3post: { | |||
2682 | if (VT == MVT::v8i8) | |||
2683 | return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); | |||
2684 | else if (VT == MVT::v16i8) | |||
2685 | return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); | |||
2686 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2687 | return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); | |||
2688 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2689 | return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); | |||
2690 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2691 | return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); | |||
2692 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2693 | return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); | |||
2694 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2695 | return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); | |||
2696 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2697 | return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); | |||
2698 | break; | |||
2699 | } | |||
2700 | case AArch64ISD::LD4post: { | |||
2701 | if (VT == MVT::v8i8) | |||
2702 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); | |||
2703 | else if (VT == MVT::v16i8) | |||
2704 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); | |||
2705 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2706 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); | |||
2707 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2708 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); | |||
2709 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2710 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); | |||
2711 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2712 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); | |||
2713 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2714 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); | |||
2715 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2716 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); | |||
2717 | break; | |||
2718 | } | |||
2719 | case AArch64ISD::LD1x2post: { | |||
2720 | if (VT == MVT::v8i8) | |||
2721 | return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); | |||
2722 | else if (VT == MVT::v16i8) | |||
2723 | return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); | |||
2724 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2725 | return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); | |||
2726 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2727 | return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); | |||
2728 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2729 | return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); | |||
2730 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2731 | return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); | |||
2732 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2733 | return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); | |||
2734 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2735 | return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); | |||
2736 | break; | |||
2737 | } | |||
2738 | case AArch64ISD::LD1x3post: { | |||
2739 | if (VT == MVT::v8i8) | |||
2740 | return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); | |||
2741 | else if (VT == MVT::v16i8) | |||
2742 | return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); | |||
2743 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2744 | return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); | |||
2745 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2746 | return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); | |||
2747 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2748 | return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); | |||
2749 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2750 | return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); | |||
2751 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2752 | return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); | |||
2753 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2754 | return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); | |||
2755 | break; | |||
2756 | } | |||
2757 | case AArch64ISD::LD1x4post: { | |||
2758 | if (VT == MVT::v8i8) | |||
2759 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); | |||
2760 | else if (VT == MVT::v16i8) | |||
2761 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); | |||
2762 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2763 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); | |||
2764 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2765 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); | |||
2766 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2767 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); | |||
2768 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2769 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); | |||
2770 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2771 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); | |||
2772 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2773 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); | |||
2774 | break; | |||
2775 | } | |||
2776 | case AArch64ISD::LD1DUPpost: { | |||
2777 | if (VT == MVT::v8i8) | |||
2778 | return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); | |||
2779 | else if (VT == MVT::v16i8) | |||
2780 | return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); | |||
2781 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2782 | return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); | |||
2783 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2784 | return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); | |||
2785 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2786 | return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); | |||
2787 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2788 | return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); | |||
2789 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2790 | return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); | |||
2791 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2792 | return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); | |||
2793 | break; | |||
2794 | } | |||
2795 | case AArch64ISD::LD2DUPpost: { | |||
2796 | if (VT == MVT::v8i8) | |||
2797 | return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); | |||
2798 | else if (VT == MVT::v16i8) | |||
2799 | return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); | |||
2800 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2801 | return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); | |||
2802 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2803 | return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); | |||
2804 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2805 | return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); | |||
2806 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2807 | return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); | |||
2808 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2809 | return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); | |||
2810 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2811 | return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); | |||
2812 | break; | |||
2813 | } | |||
2814 | case AArch64ISD::LD3DUPpost: { | |||
2815 | if (VT == MVT::v8i8) | |||
2816 | return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); | |||
2817 | else if (VT == MVT::v16i8) | |||
2818 | return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); | |||
2819 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2820 | return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); | |||
2821 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2822 | return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); | |||
2823 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2824 | return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); | |||
2825 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2826 | return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); | |||
2827 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2828 | return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); | |||
2829 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2830 | return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); | |||
2831 | break; | |||
2832 | } | |||
2833 | case AArch64ISD::LD4DUPpost: { | |||
2834 | if (VT == MVT::v8i8) | |||
2835 | return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); | |||
2836 | else if (VT == MVT::v16i8) | |||
2837 | return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); | |||
2838 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2839 | return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); | |||
2840 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2841 | return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); | |||
2842 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2843 | return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); | |||
2844 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2845 | return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); | |||
2846 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2847 | return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); | |||
2848 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2849 | return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); | |||
2850 | break; | |||
2851 | } | |||
2852 | case AArch64ISD::LD1LANEpost: { | |||
2853 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2854 | return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); | |||
2855 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2856 | VT == MVT::v8f16) | |||
2857 | return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); | |||
2858 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2859 | VT == MVT::v2f32) | |||
2860 | return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); | |||
2861 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2862 | VT == MVT::v1f64) | |||
2863 | return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); | |||
2864 | break; | |||
2865 | } | |||
2866 | case AArch64ISD::LD2LANEpost: { | |||
2867 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2868 | return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); | |||
2869 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2870 | VT == MVT::v8f16) | |||
2871 | return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); | |||
2872 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2873 | VT == MVT::v2f32) | |||
2874 | return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); | |||
2875 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2876 | VT == MVT::v1f64) | |||
2877 | return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); | |||
2878 | break; | |||
2879 | } | |||
2880 | case AArch64ISD::LD3LANEpost: { | |||
2881 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2882 | return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); | |||
2883 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2884 | VT == MVT::v8f16) | |||
2885 | return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); | |||
2886 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2887 | VT == MVT::v2f32) | |||
2888 | return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); | |||
2889 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2890 | VT == MVT::v1f64) | |||
2891 | return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); | |||
2892 | break; | |||
2893 | } | |||
2894 | case AArch64ISD::LD4LANEpost: { | |||
2895 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2896 | return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); | |||
2897 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2898 | VT == MVT::v8f16) | |||
2899 | return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); | |||
2900 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2901 | VT == MVT::v2f32) | |||
2902 | return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); | |||
2903 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2904 | VT == MVT::v1f64) | |||
2905 | return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); | |||
2906 | break; | |||
2907 | } | |||
2908 | case AArch64ISD::ST2post: { | |||
2909 | VT = Node->getOperand(1).getValueType(); | |||
2910 | if (VT == MVT::v8i8) | |||
2911 | return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); | |||
2912 | else if (VT == MVT::v16i8) | |||
2913 | return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); | |||
2914 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2915 | return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); | |||
2916 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2917 | return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); | |||
2918 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2919 | return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); | |||
2920 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2921 | return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); | |||
2922 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2923 | return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); | |||
2924 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2925 | return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); | |||
2926 | break; | |||
2927 | } | |||
2928 | case AArch64ISD::ST3post: { | |||
2929 | VT = Node->getOperand(1).getValueType(); | |||
2930 | if (VT == MVT::v8i8) | |||
2931 | return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); | |||
2932 | else if (VT == MVT::v16i8) | |||
2933 | return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); | |||
2934 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2935 | return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); | |||
2936 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2937 | return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); | |||
2938 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2939 | return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); | |||
2940 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2941 | return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); | |||
2942 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2943 | return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); | |||
2944 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2945 | return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); | |||
2946 | break; | |||
2947 | } | |||
2948 | case AArch64ISD::ST4post: { | |||
2949 | VT = Node->getOperand(1).getValueType(); | |||
2950 | if (VT == MVT::v8i8) | |||
2951 | return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); | |||
2952 | else if (VT == MVT::v16i8) | |||
2953 | return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); | |||
2954 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2955 | return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); | |||
2956 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2957 | return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); | |||
2958 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2959 | return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); | |||
2960 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2961 | return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); | |||
2962 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2963 | return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); | |||
2964 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2965 | return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); | |||
2966 | break; | |||
2967 | } | |||
2968 | case AArch64ISD::ST1x2post: { | |||
2969 | VT = Node->getOperand(1).getValueType(); | |||
2970 | if (VT == MVT::v8i8) | |||
2971 | return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); | |||
2972 | else if (VT == MVT::v16i8) | |||
2973 | return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); | |||
2974 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2975 | return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); | |||
2976 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2977 | return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); | |||
2978 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2979 | return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); | |||
2980 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2981 | return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); | |||
2982 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2983 | return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); | |||
2984 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2985 | return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); | |||
2986 | break; | |||
2987 | } | |||
2988 | case AArch64ISD::ST1x3post: { | |||
2989 | VT = Node->getOperand(1).getValueType(); | |||
2990 | if (VT == MVT::v8i8) | |||
2991 | return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); | |||
2992 | else if (VT == MVT::v16i8) | |||
2993 | return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); | |||
2994 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2995 | return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); | |||
2996 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2997 | return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); | |||
2998 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2999 | return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); | |||
3000 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3001 | return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); | |||
3002 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3003 | return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); | |||
3004 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3005 | return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); | |||
3006 | break; | |||
3007 | } | |||
3008 | case AArch64ISD::ST1x4post: { | |||
3009 | VT = Node->getOperand(1).getValueType(); | |||
3010 | if (VT == MVT::v8i8) | |||
3011 | return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); | |||
3012 | else if (VT == MVT::v16i8) | |||
3013 | return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); | |||
3014 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3015 | return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); | |||
3016 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3017 | return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); | |||
3018 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3019 | return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); | |||
3020 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3021 | return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); | |||
3022 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3023 | return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); | |||
3024 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3025 | return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); | |||
3026 | break; | |||
3027 | } | |||
3028 | case AArch64ISD::ST2LANEpost: { | |||
3029 | VT = Node->getOperand(1).getValueType(); | |||
3030 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3031 | return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); | |||
3032 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3033 | VT == MVT::v8f16) | |||
3034 | return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); | |||
3035 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3036 | VT == MVT::v2f32) | |||
3037 | return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); | |||
3038 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3039 | VT == MVT::v1f64) | |||
3040 | return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); | |||
3041 | break; | |||
3042 | } | |||
3043 | case AArch64ISD::ST3LANEpost: { | |||
3044 | VT = Node->getOperand(1).getValueType(); | |||
3045 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3046 | return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); | |||
3047 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3048 | VT == MVT::v8f16) | |||
3049 | return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); | |||
3050 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3051 | VT == MVT::v2f32) | |||
3052 | return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); | |||
3053 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3054 | VT == MVT::v1f64) | |||
3055 | return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); | |||
3056 | break; | |||
3057 | } | |||
3058 | case AArch64ISD::ST4LANEpost: { | |||
3059 | VT = Node->getOperand(1).getValueType(); | |||
3060 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3061 | return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); | |||
3062 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3063 | VT == MVT::v8f16) | |||
3064 | return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); | |||
3065 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3066 | VT == MVT::v2f32) | |||
3067 | return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); | |||
3068 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3069 | VT == MVT::v1f64) | |||
3070 | return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); | |||
3071 | break; | |||
3072 | } | |||
3073 | ||||
3074 | case ISD::FCEIL: | |||
3075 | case ISD::FFLOOR: | |||
3076 | case ISD::FTRUNC: | |||
3077 | case ISD::FROUND: | |||
3078 | if (SDNode *I = SelectLIBM(Node)) | |||
3079 | return I; | |||
3080 | break; | |||
3081 | } | |||
3082 | ||||
3083 | // Select the default instruction | |||
3084 | ResNode = SelectCode(Node); | |||
3085 | ||||
3086 | DEBUG(errs() << "=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "=> "; } } while (0); | |||
3087 | if (ResNode == nullptr || ResNode == Node) | |||
3088 | DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Node->dump(CurDAG); } } while (0); | |||
3089 | else | |||
3090 | DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { ResNode->dump(CurDAG); } } while (0); | |||
3091 | DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "\n"; } } while (0); | |||
3092 | ||||
3093 | return ResNode; | |||
3094 | } | |||
3095 | ||||
3096 | /// createAArch64ISelDag - This pass converts a legalized DAG into a | |||
3097 | /// AArch64-specific DAG, ready for instruction scheduling. | |||
3098 | FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, | |||
3099 | CodeGenOpt::Level OptLevel) { | |||
3100 | return new AArch64DAGToDAGISel(TM, OptLevel); | |||
3101 | } |