File: | lib/Target/AArch64/AArch64ISelDAGToDAG.cpp |
Location: | line 702, column 67 |
Description: | The result of the '<<' expression is undefined |
1 | //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file defines an instruction selector for the AArch64 target. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AArch64TargetMachine.h" | |||
15 | #include "MCTargetDesc/AArch64AddressingModes.h" | |||
16 | #include "llvm/ADT/APSInt.h" | |||
17 | #include "llvm/CodeGen/SelectionDAGISel.h" | |||
18 | #include "llvm/IR/Function.h" // To access function attributes. | |||
19 | #include "llvm/IR/GlobalValue.h" | |||
20 | #include "llvm/IR/Intrinsics.h" | |||
21 | #include "llvm/Support/Debug.h" | |||
22 | #include "llvm/Support/ErrorHandling.h" | |||
23 | #include "llvm/Support/MathExtras.h" | |||
24 | #include "llvm/Support/raw_ostream.h" | |||
25 | ||||
26 | using namespace llvm; | |||
27 | ||||
28 | #define DEBUG_TYPE"aarch64-isel" "aarch64-isel" | |||
29 | ||||
30 | //===--------------------------------------------------------------------===// | |||
31 | /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine | |||
32 | /// instructions for SelectionDAG operations. | |||
33 | /// | |||
34 | namespace { | |||
35 | ||||
36 | class AArch64DAGToDAGISel : public SelectionDAGISel { | |||
37 | ||||
38 | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can | |||
39 | /// make the right decision when generating code for different targets. | |||
40 | const AArch64Subtarget *Subtarget; | |||
41 | ||||
42 | bool ForCodeSize; | |||
43 | ||||
44 | public: | |||
45 | explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, | |||
46 | CodeGenOpt::Level OptLevel) | |||
47 | : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), | |||
48 | ForCodeSize(false) {} | |||
49 | ||||
50 | const char *getPassName() const override { | |||
51 | return "AArch64 Instruction Selection"; | |||
52 | } | |||
53 | ||||
54 | bool runOnMachineFunction(MachineFunction &MF) override { | |||
55 | ForCodeSize = MF.getFunction()->optForSize(); | |||
56 | Subtarget = &MF.getSubtarget<AArch64Subtarget>(); | |||
57 | return SelectionDAGISel::runOnMachineFunction(MF); | |||
58 | } | |||
59 | ||||
60 | SDNode *Select(SDNode *Node) override; | |||
61 | ||||
62 | /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for | |||
63 | /// inline asm expressions. | |||
64 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | |||
65 | unsigned ConstraintID, | |||
66 | std::vector<SDValue> &OutOps) override; | |||
67 | ||||
68 | SDNode *SelectMLAV64LaneV128(SDNode *N); | |||
69 | SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); | |||
70 | bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); | |||
71 | bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); | |||
72 | bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); | |||
73 | bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { | |||
74 | return SelectShiftedRegister(N, false, Reg, Shift); | |||
75 | } | |||
76 | bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { | |||
77 | return SelectShiftedRegister(N, true, Reg, Shift); | |||
78 | } | |||
79 | bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
80 | return SelectAddrModeIndexed7S(N, 1, Base, OffImm); | |||
81 | } | |||
82 | bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
83 | return SelectAddrModeIndexed7S(N, 2, Base, OffImm); | |||
84 | } | |||
85 | bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
86 | return SelectAddrModeIndexed7S(N, 4, Base, OffImm); | |||
87 | } | |||
88 | bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
89 | return SelectAddrModeIndexed7S(N, 8, Base, OffImm); | |||
90 | } | |||
91 | bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
92 | return SelectAddrModeIndexed7S(N, 16, Base, OffImm); | |||
93 | } | |||
94 | bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
95 | return SelectAddrModeIndexed(N, 1, Base, OffImm); | |||
96 | } | |||
97 | bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
98 | return SelectAddrModeIndexed(N, 2, Base, OffImm); | |||
99 | } | |||
100 | bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
101 | return SelectAddrModeIndexed(N, 4, Base, OffImm); | |||
102 | } | |||
103 | bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
104 | return SelectAddrModeIndexed(N, 8, Base, OffImm); | |||
105 | } | |||
106 | bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
107 | return SelectAddrModeIndexed(N, 16, Base, OffImm); | |||
108 | } | |||
109 | bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
110 | return SelectAddrModeUnscaled(N, 1, Base, OffImm); | |||
111 | } | |||
112 | bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
113 | return SelectAddrModeUnscaled(N, 2, Base, OffImm); | |||
114 | } | |||
115 | bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
116 | return SelectAddrModeUnscaled(N, 4, Base, OffImm); | |||
117 | } | |||
118 | bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
119 | return SelectAddrModeUnscaled(N, 8, Base, OffImm); | |||
120 | } | |||
121 | bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { | |||
122 | return SelectAddrModeUnscaled(N, 16, Base, OffImm); | |||
123 | } | |||
124 | ||||
125 | template<int Width> | |||
126 | bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, | |||
127 | SDValue &SignExtend, SDValue &DoShift) { | |||
128 | return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); | |||
129 | } | |||
130 | ||||
131 | template<int Width> | |||
132 | bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, | |||
133 | SDValue &SignExtend, SDValue &DoShift) { | |||
134 | return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); | |||
135 | } | |||
136 | ||||
137 | ||||
138 | /// Form sequences of consecutive 64/128-bit registers for use in NEON | |||
139 | /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have | |||
140 | /// between 1 and 4 elements. If it contains a single element that is returned | |||
141 | /// unchanged; otherwise a REG_SEQUENCE value is returned. | |||
142 | SDValue createDTuple(ArrayRef<SDValue> Vecs); | |||
143 | SDValue createQTuple(ArrayRef<SDValue> Vecs); | |||
144 | ||||
145 | /// Generic helper for the createDTuple/createQTuple | |||
146 | /// functions. Those should almost always be called instead. | |||
147 | SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], | |||
148 | const unsigned SubRegs[]); | |||
149 | ||||
150 | SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); | |||
151 | ||||
152 | SDNode *SelectIndexedLoad(SDNode *N, bool &Done); | |||
153 | ||||
154 | SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, | |||
155 | unsigned SubRegIdx); | |||
156 | SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, | |||
157 | unsigned SubRegIdx); | |||
158 | SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
159 | SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
160 | ||||
161 | SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
162 | SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
163 | SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
164 | SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); | |||
165 | ||||
166 | SDNode *SelectBitfieldExtractOp(SDNode *N); | |||
167 | SDNode *SelectBitfieldInsertOp(SDNode *N); | |||
168 | SDNode *SelectBitfieldInsertInZeroOp(SDNode *N); | |||
169 | ||||
170 | SDNode *SelectReadRegister(SDNode *N); | |||
171 | SDNode *SelectWriteRegister(SDNode *N); | |||
172 | ||||
173 | // Include the pieces autogenerated from the target description. | |||
174 | #include "AArch64GenDAGISel.inc" | |||
175 | ||||
176 | private: | |||
177 | bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, | |||
178 | SDValue &Shift); | |||
179 | bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, | |||
180 | SDValue &OffImm); | |||
181 | bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, | |||
182 | SDValue &OffImm); | |||
183 | bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, | |||
184 | SDValue &OffImm); | |||
185 | bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, | |||
186 | SDValue &Offset, SDValue &SignExtend, | |||
187 | SDValue &DoShift); | |||
188 | bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, | |||
189 | SDValue &Offset, SDValue &SignExtend, | |||
190 | SDValue &DoShift); | |||
191 | bool isWorthFolding(SDValue V) const; | |||
192 | bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, | |||
193 | SDValue &Offset, SDValue &SignExtend); | |||
194 | ||||
195 | template<unsigned RegWidth> | |||
196 | bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { | |||
197 | return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); | |||
198 | } | |||
199 | ||||
200 | bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); | |||
201 | }; | |||
202 | } // end anonymous namespace | |||
203 | ||||
204 | /// isIntImmediate - This method tests to see if the node is a constant | |||
205 | /// operand. If so Imm will receive the 32-bit value. | |||
206 | static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { | |||
207 | if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { | |||
208 | Imm = C->getZExtValue(); | |||
209 | return true; | |||
210 | } | |||
211 | return false; | |||
212 | } | |||
213 | ||||
214 | // isIntImmediate - This method tests to see if a constant operand. | |||
215 | // If so Imm will receive the value. | |||
216 | static bool isIntImmediate(SDValue N, uint64_t &Imm) { | |||
217 | return isIntImmediate(N.getNode(), Imm); | |||
218 | } | |||
219 | ||||
220 | // isOpcWithIntImmediate - This method tests to see if the node is a specific | |||
221 | // opcode and that it has a immediate integer right operand. | |||
222 | // If so Imm will receive the 32 bit value. | |||
223 | static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, | |||
224 | uint64_t &Imm) { | |||
225 | return N->getOpcode() == Opc && | |||
226 | isIntImmediate(N->getOperand(1).getNode(), Imm); | |||
227 | } | |||
228 | ||||
229 | bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( | |||
230 | const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { | |||
231 | switch(ConstraintID) { | |||
232 | default: | |||
233 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 233); | |||
234 | case InlineAsm::Constraint_i: | |||
235 | case InlineAsm::Constraint_m: | |||
236 | case InlineAsm::Constraint_Q: | |||
237 | // Require the address to be in a register. That is safe for all AArch64 | |||
238 | // variants and it is hard to do anything much smarter without knowing | |||
239 | // how the operand is used. | |||
240 | OutOps.push_back(Op); | |||
241 | return false; | |||
242 | } | |||
243 | return true; | |||
244 | } | |||
245 | ||||
246 | /// SelectArithImmed - Select an immediate value that can be represented as | |||
247 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with | |||
248 | /// Val set to the 12-bit value and Shift set to the shifter operand. | |||
249 | bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, | |||
250 | SDValue &Shift) { | |||
251 | // This function is called from the addsub_shifted_imm ComplexPattern, | |||
252 | // which lists [imm] as the list of opcode it's interested in, however | |||
253 | // we still need to check whether the operand is actually an immediate | |||
254 | // here because the ComplexPattern opcode list is only used in | |||
255 | // root-level opcode matching. | |||
256 | if (!isa<ConstantSDNode>(N.getNode())) | |||
257 | return false; | |||
258 | ||||
259 | uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); | |||
260 | unsigned ShiftAmt; | |||
261 | ||||
262 | if (Immed >> 12 == 0) { | |||
263 | ShiftAmt = 0; | |||
264 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { | |||
265 | ShiftAmt = 12; | |||
266 | Immed = Immed >> 12; | |||
267 | } else | |||
268 | return false; | |||
269 | ||||
270 | unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); | |||
271 | SDLoc dl(N); | |||
272 | Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); | |||
273 | Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); | |||
274 | return true; | |||
275 | } | |||
276 | ||||
277 | /// SelectNegArithImmed - As above, but negates the value before trying to | |||
278 | /// select it. | |||
279 | bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, | |||
280 | SDValue &Shift) { | |||
281 | // This function is called from the addsub_shifted_imm ComplexPattern, | |||
282 | // which lists [imm] as the list of opcode it's interested in, however | |||
283 | // we still need to check whether the operand is actually an immediate | |||
284 | // here because the ComplexPattern opcode list is only used in | |||
285 | // root-level opcode matching. | |||
286 | if (!isa<ConstantSDNode>(N.getNode())) | |||
287 | return false; | |||
288 | ||||
289 | // The immediate operand must be a 24-bit zero-extended immediate. | |||
290 | uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); | |||
291 | ||||
292 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" | |||
293 | // have the opposite effect on the C flag, so this pattern mustn't match under | |||
294 | // those circumstances. | |||
295 | if (Immed == 0) | |||
296 | return false; | |||
297 | ||||
298 | if (N.getValueType() == MVT::i32) | |||
299 | Immed = ~((uint32_t)Immed) + 1; | |||
300 | else | |||
301 | Immed = ~Immed + 1ULL; | |||
302 | if (Immed & 0xFFFFFFFFFF000000ULL) | |||
303 | return false; | |||
304 | ||||
305 | Immed &= 0xFFFFFFULL; | |||
306 | return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, | |||
307 | Shift); | |||
308 | } | |||
309 | ||||
310 | /// getShiftTypeForNode - Translate a shift node to the corresponding | |||
311 | /// ShiftType value. | |||
312 | static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { | |||
313 | switch (N.getOpcode()) { | |||
314 | default: | |||
315 | return AArch64_AM::InvalidShiftExtend; | |||
316 | case ISD::SHL: | |||
317 | return AArch64_AM::LSL; | |||
318 | case ISD::SRL: | |||
319 | return AArch64_AM::LSR; | |||
320 | case ISD::SRA: | |||
321 | return AArch64_AM::ASR; | |||
322 | case ISD::ROTR: | |||
323 | return AArch64_AM::ROR; | |||
324 | } | |||
325 | } | |||
326 | ||||
327 | /// \brief Determine whether it is worth to fold V into an extended register. | |||
328 | bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { | |||
329 | // it hurts if the value is used at least twice, unless we are optimizing | |||
330 | // for code size. | |||
331 | if (ForCodeSize || V.hasOneUse()) | |||
332 | return true; | |||
333 | return false; | |||
334 | } | |||
335 | ||||
336 | /// SelectShiftedRegister - Select a "shifted register" operand. If the value | |||
337 | /// is not shifted, set the Shift operand to default of "LSL 0". The logical | |||
338 | /// instructions allow the shifted register to be rotated, but the arithmetic | |||
339 | /// instructions do not. The AllowROR parameter specifies whether ROR is | |||
340 | /// supported. | |||
341 | bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, | |||
342 | SDValue &Reg, SDValue &Shift) { | |||
343 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); | |||
344 | if (ShType == AArch64_AM::InvalidShiftExtend) | |||
345 | return false; | |||
346 | if (!AllowROR && ShType == AArch64_AM::ROR) | |||
347 | return false; | |||
348 | ||||
349 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
350 | unsigned BitSize = N.getValueType().getSizeInBits(); | |||
351 | unsigned Val = RHS->getZExtValue() & (BitSize - 1); | |||
352 | unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); | |||
353 | ||||
354 | Reg = N.getOperand(0); | |||
355 | Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); | |||
356 | return isWorthFolding(N); | |||
357 | } | |||
358 | ||||
359 | return false; | |||
360 | } | |||
361 | ||||
362 | /// getExtendTypeForNode - Translate an extend node to the corresponding | |||
363 | /// ExtendType value. | |||
364 | static AArch64_AM::ShiftExtendType | |||
365 | getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { | |||
366 | if (N.getOpcode() == ISD::SIGN_EXTEND || | |||
367 | N.getOpcode() == ISD::SIGN_EXTEND_INREG) { | |||
368 | EVT SrcVT; | |||
369 | if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) | |||
370 | SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); | |||
371 | else | |||
372 | SrcVT = N.getOperand(0).getValueType(); | |||
373 | ||||
374 | if (!IsLoadStore && SrcVT == MVT::i8) | |||
375 | return AArch64_AM::SXTB; | |||
376 | else if (!IsLoadStore && SrcVT == MVT::i16) | |||
377 | return AArch64_AM::SXTH; | |||
378 | else if (SrcVT == MVT::i32) | |||
379 | return AArch64_AM::SXTW; | |||
380 | assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast <void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 380, __PRETTY_FUNCTION__)); | |||
381 | ||||
382 | return AArch64_AM::InvalidShiftExtend; | |||
383 | } else if (N.getOpcode() == ISD::ZERO_EXTEND || | |||
384 | N.getOpcode() == ISD::ANY_EXTEND) { | |||
385 | EVT SrcVT = N.getOperand(0).getValueType(); | |||
386 | if (!IsLoadStore && SrcVT == MVT::i8) | |||
387 | return AArch64_AM::UXTB; | |||
388 | else if (!IsLoadStore && SrcVT == MVT::i16) | |||
389 | return AArch64_AM::UXTH; | |||
390 | else if (SrcVT == MVT::i32) | |||
391 | return AArch64_AM::UXTW; | |||
392 | assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast <void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 392, __PRETTY_FUNCTION__)); | |||
393 | ||||
394 | return AArch64_AM::InvalidShiftExtend; | |||
395 | } else if (N.getOpcode() == ISD::AND) { | |||
396 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
397 | if (!CSD) | |||
398 | return AArch64_AM::InvalidShiftExtend; | |||
399 | uint64_t AndMask = CSD->getZExtValue(); | |||
400 | ||||
401 | switch (AndMask) { | |||
402 | default: | |||
403 | return AArch64_AM::InvalidShiftExtend; | |||
404 | case 0xFF: | |||
405 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; | |||
406 | case 0xFFFF: | |||
407 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; | |||
408 | case 0xFFFFFFFF: | |||
409 | return AArch64_AM::UXTW; | |||
410 | } | |||
411 | } | |||
412 | ||||
413 | return AArch64_AM::InvalidShiftExtend; | |||
414 | } | |||
415 | ||||
416 | // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. | |||
417 | static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { | |||
418 | if (DL->getOpcode() != AArch64ISD::DUPLANE16 && | |||
419 | DL->getOpcode() != AArch64ISD::DUPLANE32) | |||
420 | return false; | |||
421 | ||||
422 | SDValue SV = DL->getOperand(0); | |||
423 | if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) | |||
424 | return false; | |||
425 | ||||
426 | SDValue EV = SV.getOperand(1); | |||
427 | if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) | |||
428 | return false; | |||
429 | ||||
430 | ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); | |||
431 | ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); | |||
432 | LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); | |||
433 | LaneOp = EV.getOperand(0); | |||
434 | ||||
435 | return true; | |||
436 | } | |||
437 | ||||
438 | // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a | |||
439 | // high lane extract. | |||
440 | static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, | |||
441 | SDValue &LaneOp, int &LaneIdx) { | |||
442 | ||||
443 | if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { | |||
444 | std::swap(Op0, Op1); | |||
445 | if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) | |||
446 | return false; | |||
447 | } | |||
448 | StdOp = Op1; | |||
449 | return true; | |||
450 | } | |||
451 | ||||
452 | /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand | |||
453 | /// is a lane in the upper half of a 128-bit vector. Recognize and select this | |||
454 | /// so that we don't emit unnecessary lane extracts. | |||
455 | SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { | |||
456 | SDLoc dl(N); | |||
457 | SDValue Op0 = N->getOperand(0); | |||
458 | SDValue Op1 = N->getOperand(1); | |||
459 | SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. | |||
460 | SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. | |||
461 | int LaneIdx = -1; // Will hold the lane index. | |||
462 | ||||
463 | if (Op1.getOpcode() != ISD::MUL || | |||
464 | !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, | |||
465 | LaneIdx)) { | |||
466 | std::swap(Op0, Op1); | |||
467 | if (Op1.getOpcode() != ISD::MUL || | |||
468 | !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, | |||
469 | LaneIdx)) | |||
470 | return nullptr; | |||
471 | } | |||
472 | ||||
473 | SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); | |||
474 | ||||
475 | SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; | |||
476 | ||||
477 | unsigned MLAOpc = ~0U; | |||
478 | ||||
479 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
480 | default: | |||
481 | llvm_unreachable("Unrecognized MLA.")::llvm::llvm_unreachable_internal("Unrecognized MLA.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 481); | |||
482 | case MVT::v4i16: | |||
483 | MLAOpc = AArch64::MLAv4i16_indexed; | |||
484 | break; | |||
485 | case MVT::v8i16: | |||
486 | MLAOpc = AArch64::MLAv8i16_indexed; | |||
487 | break; | |||
488 | case MVT::v2i32: | |||
489 | MLAOpc = AArch64::MLAv2i32_indexed; | |||
490 | break; | |||
491 | case MVT::v4i32: | |||
492 | MLAOpc = AArch64::MLAv4i32_indexed; | |||
493 | break; | |||
494 | } | |||
495 | ||||
496 | return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops); | |||
497 | } | |||
498 | ||||
499 | SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { | |||
500 | SDLoc dl(N); | |||
501 | SDValue SMULLOp0; | |||
502 | SDValue SMULLOp1; | |||
503 | int LaneIdx; | |||
504 | ||||
505 | if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, | |||
506 | LaneIdx)) | |||
507 | return nullptr; | |||
508 | ||||
509 | SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); | |||
510 | ||||
511 | SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; | |||
512 | ||||
513 | unsigned SMULLOpc = ~0U; | |||
514 | ||||
515 | if (IntNo == Intrinsic::aarch64_neon_smull) { | |||
516 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
517 | default: | |||
518 | llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 518); | |||
519 | case MVT::v4i32: | |||
520 | SMULLOpc = AArch64::SMULLv4i16_indexed; | |||
521 | break; | |||
522 | case MVT::v2i64: | |||
523 | SMULLOpc = AArch64::SMULLv2i32_indexed; | |||
524 | break; | |||
525 | } | |||
526 | } else if (IntNo == Intrinsic::aarch64_neon_umull) { | |||
527 | switch (N->getSimpleValueType(0).SimpleTy) { | |||
528 | default: | |||
529 | llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 529); | |||
530 | case MVT::v4i32: | |||
531 | SMULLOpc = AArch64::UMULLv4i16_indexed; | |||
532 | break; | |||
533 | case MVT::v2i64: | |||
534 | SMULLOpc = AArch64::UMULLv2i32_indexed; | |||
535 | break; | |||
536 | } | |||
537 | } else | |||
538 | llvm_unreachable("Unrecognized intrinsic.")::llvm::llvm_unreachable_internal("Unrecognized intrinsic.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 538); | |||
539 | ||||
540 | return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops); | |||
541 | } | |||
542 | ||||
543 | /// Instructions that accept extend modifiers like UXTW expect the register | |||
544 | /// being extended to be a GPR32, but the incoming DAG might be acting on a | |||
545 | /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if | |||
546 | /// this is the case. | |||
547 | static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { | |||
548 | if (N.getValueType() == MVT::i32) | |||
549 | return N; | |||
550 | ||||
551 | SDLoc dl(N); | |||
552 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); | |||
553 | MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | |||
554 | dl, MVT::i32, N, SubReg); | |||
555 | return SDValue(Node, 0); | |||
556 | } | |||
557 | ||||
558 | ||||
559 | /// SelectArithExtendedRegister - Select a "extended register" operand. This | |||
560 | /// operand folds in an extend followed by an optional left shift. | |||
561 | bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, | |||
562 | SDValue &Shift) { | |||
563 | unsigned ShiftVal = 0; | |||
564 | AArch64_AM::ShiftExtendType Ext; | |||
565 | ||||
566 | if (N.getOpcode() == ISD::SHL) { | |||
567 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
568 | if (!CSD) | |||
569 | return false; | |||
570 | ShiftVal = CSD->getZExtValue(); | |||
571 | if (ShiftVal > 4) | |||
572 | return false; | |||
573 | ||||
574 | Ext = getExtendTypeForNode(N.getOperand(0)); | |||
575 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
576 | return false; | |||
577 | ||||
578 | Reg = N.getOperand(0).getOperand(0); | |||
579 | } else { | |||
580 | Ext = getExtendTypeForNode(N); | |||
581 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
582 | return false; | |||
583 | ||||
584 | Reg = N.getOperand(0); | |||
585 | } | |||
586 | ||||
587 | // AArch64 mandates that the RHS of the operation must use the smallest | |||
588 | // register class that could contain the size being extended from. Thus, | |||
589 | // if we're folding a (sext i8), we need the RHS to be a GPR32, even though | |||
590 | // there might not be an actual 32-bit value in the program. We can | |||
591 | // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. | |||
592 | assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)((Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX) ? static_cast<void> (0) : __assert_fail ("Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 592, __PRETTY_FUNCTION__)); | |||
593 | Reg = narrowIfNeeded(CurDAG, Reg); | |||
594 | Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), | |||
595 | MVT::i32); | |||
596 | return isWorthFolding(N); | |||
597 | } | |||
598 | ||||
599 | /// If there's a use of this ADDlow that's not itself a load/store then we'll | |||
600 | /// need to create a real ADD instruction from it anyway and there's no point in | |||
601 | /// folding it into the mem op. Theoretically, it shouldn't matter, but there's | |||
602 | /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding | |||
603 | /// leads to duplicated ADRP instructions. | |||
604 | static bool isWorthFoldingADDlow(SDValue N) { | |||
605 | for (auto Use : N->uses()) { | |||
606 | if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && | |||
607 | Use->getOpcode() != ISD::ATOMIC_LOAD && | |||
608 | Use->getOpcode() != ISD::ATOMIC_STORE) | |||
609 | return false; | |||
610 | ||||
611 | // ldar and stlr have much more restrictive addressing modes (just a | |||
612 | // register). | |||
613 | if (cast<MemSDNode>(Use)->getOrdering() > Monotonic) | |||
614 | return false; | |||
615 | } | |||
616 | ||||
617 | return true; | |||
618 | } | |||
619 | ||||
620 | /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit | |||
621 | /// immediate" address. The "Size" argument is the size in bytes of the memory | |||
622 | /// reference, which determines the scale. | |||
623 | bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, | |||
624 | SDValue &Base, | |||
625 | SDValue &OffImm) { | |||
626 | SDLoc dl(N); | |||
627 | const DataLayout &DL = CurDAG->getDataLayout(); | |||
628 | const TargetLowering *TLI = getTargetLowering(); | |||
629 | if (N.getOpcode() == ISD::FrameIndex) { | |||
630 | int FI = cast<FrameIndexSDNode>(N)->getIndex(); | |||
631 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); | |||
632 | OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); | |||
633 | return true; | |||
634 | } | |||
635 | ||||
636 | // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed | |||
637 | // selected here doesn't support labels/immediates, only base+offset. | |||
638 | ||||
639 | if (CurDAG->isBaseWithConstantOffset(N)) { | |||
640 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
641 | int64_t RHSC = RHS->getSExtValue(); | |||
642 | unsigned Scale = Log2_32(Size); | |||
643 | if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) && | |||
644 | RHSC < (0x40 << Scale)) { | |||
645 | Base = N.getOperand(0); | |||
646 | if (Base.getOpcode() == ISD::FrameIndex) { | |||
647 | int FI = cast<FrameIndexSDNode>(Base)->getIndex(); | |||
648 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); | |||
649 | } | |||
650 | OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); | |||
651 | return true; | |||
652 | } | |||
653 | } | |||
654 | } | |||
655 | ||||
656 | // Base only. The address will be materialized into a register before | |||
657 | // the memory is accessed. | |||
658 | // add x0, Xbase, #offset | |||
659 | // stp x1, x2, [x0] | |||
660 | Base = N; | |||
661 | OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); | |||
662 | return true; | |||
663 | } | |||
664 | ||||
665 | /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit | |||
666 | /// immediate" address. The "Size" argument is the size in bytes of the memory | |||
667 | /// reference, which determines the scale. | |||
668 | bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, | |||
669 | SDValue &Base, SDValue &OffImm) { | |||
670 | SDLoc dl(N); | |||
671 | const DataLayout &DL = CurDAG->getDataLayout(); | |||
672 | const TargetLowering *TLI = getTargetLowering(); | |||
673 | if (N.getOpcode() == ISD::FrameIndex) { | |||
| ||||
674 | int FI = cast<FrameIndexSDNode>(N)->getIndex(); | |||
675 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); | |||
676 | OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); | |||
677 | return true; | |||
678 | } | |||
679 | ||||
680 | if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { | |||
681 | GlobalAddressSDNode *GAN = | |||
682 | dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); | |||
683 | Base = N.getOperand(0); | |||
684 | OffImm = N.getOperand(1); | |||
685 | if (!GAN) | |||
686 | return true; | |||
687 | ||||
688 | const GlobalValue *GV = GAN->getGlobal(); | |||
689 | unsigned Alignment = GV->getAlignment(); | |||
690 | Type *Ty = GV->getType()->getElementType(); | |||
691 | if (Alignment == 0 && Ty->isSized()) | |||
692 | Alignment = DL.getABITypeAlignment(Ty); | |||
693 | ||||
694 | if (Alignment >= Size) | |||
695 | return true; | |||
696 | } | |||
697 | ||||
698 | if (CurDAG->isBaseWithConstantOffset(N)) { | |||
699 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
700 | int64_t RHSC = (int64_t)RHS->getZExtValue(); | |||
701 | unsigned Scale = Log2_32(Size); | |||
702 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { | |||
| ||||
703 | Base = N.getOperand(0); | |||
704 | if (Base.getOpcode() == ISD::FrameIndex) { | |||
705 | int FI = cast<FrameIndexSDNode>(Base)->getIndex(); | |||
706 | Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); | |||
707 | } | |||
708 | OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); | |||
709 | return true; | |||
710 | } | |||
711 | } | |||
712 | } | |||
713 | ||||
714 | // Before falling back to our general case, check if the unscaled | |||
715 | // instructions can handle this. If so, that's preferable. | |||
716 | if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) | |||
717 | return false; | |||
718 | ||||
719 | // Base only. The address will be materialized into a register before | |||
720 | // the memory is accessed. | |||
721 | // add x0, Xbase, #offset | |||
722 | // ldr x0, [x0] | |||
723 | Base = N; | |||
724 | OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); | |||
725 | return true; | |||
726 | } | |||
727 | ||||
728 | /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit | |||
729 | /// immediate" address. This should only match when there is an offset that | |||
730 | /// is not valid for a scaled immediate addressing mode. The "Size" argument | |||
731 | /// is the size in bytes of the memory reference, which is needed here to know | |||
732 | /// what is valid for a scaled immediate. | |||
733 | bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, | |||
734 | SDValue &Base, | |||
735 | SDValue &OffImm) { | |||
736 | if (!CurDAG->isBaseWithConstantOffset(N)) | |||
737 | return false; | |||
738 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | |||
739 | int64_t RHSC = RHS->getSExtValue(); | |||
740 | // If the offset is valid as a scaled immediate, don't match here. | |||
741 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && | |||
742 | RHSC < (0x1000 << Log2_32(Size))) | |||
743 | return false; | |||
744 | if (RHSC >= -256 && RHSC < 256) { | |||
745 | Base = N.getOperand(0); | |||
746 | if (Base.getOpcode() == ISD::FrameIndex) { | |||
747 | int FI = cast<FrameIndexSDNode>(Base)->getIndex(); | |||
748 | const TargetLowering *TLI = getTargetLowering(); | |||
749 | Base = CurDAG->getTargetFrameIndex( | |||
750 | FI, TLI->getPointerTy(CurDAG->getDataLayout())); | |||
751 | } | |||
752 | OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); | |||
753 | return true; | |||
754 | } | |||
755 | } | |||
756 | return false; | |||
757 | } | |||
758 | ||||
759 | static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { | |||
760 | SDLoc dl(N); | |||
761 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); | |||
762 | SDValue ImpDef = SDValue( | |||
763 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); | |||
764 | MachineSDNode *Node = CurDAG->getMachineNode( | |||
765 | TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); | |||
766 | return SDValue(Node, 0); | |||
767 | } | |||
768 | ||||
769 | /// \brief Check if the given SHL node (\p N), can be used to form an | |||
770 | /// extended register for an addressing mode. | |||
771 | bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, | |||
772 | bool WantExtend, SDValue &Offset, | |||
773 | SDValue &SignExtend) { | |||
774 | assert(N.getOpcode() == ISD::SHL && "Invalid opcode.")((N.getOpcode() == ISD::SHL && "Invalid opcode.") ? static_cast <void> (0) : __assert_fail ("N.getOpcode() == ISD::SHL && \"Invalid opcode.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 774, __PRETTY_FUNCTION__)); | |||
775 | ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); | |||
776 | if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) | |||
777 | return false; | |||
778 | ||||
779 | SDLoc dl(N); | |||
780 | if (WantExtend) { | |||
781 | AArch64_AM::ShiftExtendType Ext = | |||
782 | getExtendTypeForNode(N.getOperand(0), true); | |||
783 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
784 | return false; | |||
785 | ||||
786 | Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); | |||
787 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, | |||
788 | MVT::i32); | |||
789 | } else { | |||
790 | Offset = N.getOperand(0); | |||
791 | SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); | |||
792 | } | |||
793 | ||||
794 | unsigned LegalShiftVal = Log2_32(Size); | |||
795 | unsigned ShiftVal = CSD->getZExtValue(); | |||
796 | ||||
797 | if (ShiftVal != 0 && ShiftVal != LegalShiftVal) | |||
798 | return false; | |||
799 | ||||
800 | if (isWorthFolding(N)) | |||
801 | return true; | |||
802 | ||||
803 | return false; | |||
804 | } | |||
805 | ||||
806 | bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, | |||
807 | SDValue &Base, SDValue &Offset, | |||
808 | SDValue &SignExtend, | |||
809 | SDValue &DoShift) { | |||
810 | if (N.getOpcode() != ISD::ADD) | |||
811 | return false; | |||
812 | SDValue LHS = N.getOperand(0); | |||
813 | SDValue RHS = N.getOperand(1); | |||
814 | SDLoc dl(N); | |||
815 | ||||
816 | // We don't want to match immediate adds here, because they are better lowered | |||
817 | // to the register-immediate addressing modes. | |||
818 | if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) | |||
819 | return false; | |||
820 | ||||
821 | // Check if this particular node is reused in any non-memory related | |||
822 | // operation. If yes, do not try to fold this node into the address | |||
823 | // computation, since the computation will be kept. | |||
824 | const SDNode *Node = N.getNode(); | |||
825 | for (SDNode *UI : Node->uses()) { | |||
826 | if (!isa<MemSDNode>(*UI)) | |||
827 | return false; | |||
828 | } | |||
829 | ||||
830 | // Remember if it is worth folding N when it produces extended register. | |||
831 | bool IsExtendedRegisterWorthFolding = isWorthFolding(N); | |||
832 | ||||
833 | // Try to match a shifted extend on the RHS. | |||
834 | if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && | |||
835 | SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { | |||
836 | Base = LHS; | |||
837 | DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); | |||
838 | return true; | |||
839 | } | |||
840 | ||||
841 | // Try to match a shifted extend on the LHS. | |||
842 | if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && | |||
843 | SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { | |||
844 | Base = RHS; | |||
845 | DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); | |||
846 | return true; | |||
847 | } | |||
848 | ||||
849 | // There was no shift, whatever else we find. | |||
850 | DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); | |||
851 | ||||
852 | AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; | |||
853 | // Try to match an unshifted extend on the LHS. | |||
854 | if (IsExtendedRegisterWorthFolding && | |||
855 | (Ext = getExtendTypeForNode(LHS, true)) != | |||
856 | AArch64_AM::InvalidShiftExtend) { | |||
857 | Base = RHS; | |||
858 | Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); | |||
859 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, | |||
860 | MVT::i32); | |||
861 | if (isWorthFolding(LHS)) | |||
862 | return true; | |||
863 | } | |||
864 | ||||
865 | // Try to match an unshifted extend on the RHS. | |||
866 | if (IsExtendedRegisterWorthFolding && | |||
867 | (Ext = getExtendTypeForNode(RHS, true)) != | |||
868 | AArch64_AM::InvalidShiftExtend) { | |||
869 | Base = LHS; | |||
870 | Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); | |||
871 | SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, | |||
872 | MVT::i32); | |||
873 | if (isWorthFolding(RHS)) | |||
874 | return true; | |||
875 | } | |||
876 | ||||
877 | return false; | |||
878 | } | |||
879 | ||||
880 | // Check if the given immediate is preferred by ADD. If an immediate can be | |||
881 | // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be | |||
882 | // encoded by one MOVZ, return true. | |||
883 | static bool isPreferredADD(int64_t ImmOff) { | |||
884 | // Constant in [0x0, 0xfff] can be encoded in ADD. | |||
885 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) | |||
886 | return true; | |||
887 | // Check if it can be encoded in an "ADD LSL #12". | |||
888 | if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) | |||
889 | // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. | |||
890 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && | |||
891 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; | |||
892 | return false; | |||
893 | } | |||
894 | ||||
895 | bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, | |||
896 | SDValue &Base, SDValue &Offset, | |||
897 | SDValue &SignExtend, | |||
898 | SDValue &DoShift) { | |||
899 | if (N.getOpcode() != ISD::ADD) | |||
900 | return false; | |||
901 | SDValue LHS = N.getOperand(0); | |||
902 | SDValue RHS = N.getOperand(1); | |||
903 | SDLoc DL(N); | |||
904 | ||||
905 | // Check if this particular node is reused in any non-memory related | |||
906 | // operation. If yes, do not try to fold this node into the address | |||
907 | // computation, since the computation will be kept. | |||
908 | const SDNode *Node = N.getNode(); | |||
909 | for (SDNode *UI : Node->uses()) { | |||
910 | if (!isa<MemSDNode>(*UI)) | |||
911 | return false; | |||
912 | } | |||
913 | ||||
914 | // Watch out if RHS is a wide immediate, it can not be selected into | |||
915 | // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into | |||
916 | // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate | |||
917 | // instructions like: | |||
918 | // MOV X0, WideImmediate | |||
919 | // ADD X1, BaseReg, X0 | |||
920 | // LDR X2, [X1, 0] | |||
921 | // For such situation, using [BaseReg, XReg] addressing mode can save one | |||
922 | // ADD/SUB: | |||
923 | // MOV X0, WideImmediate | |||
924 | // LDR X2, [BaseReg, X0] | |||
925 | if (isa<ConstantSDNode>(RHS)) { | |||
926 | int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); | |||
927 | unsigned Scale = Log2_32(Size); | |||
928 | // Skip the immediate can be selected by load/store addressing mode. | |||
929 | // Also skip the immediate can be encoded by a single ADD (SUB is also | |||
930 | // checked by using -ImmOff). | |||
931 | if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || | |||
932 | isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) | |||
933 | return false; | |||
934 | ||||
935 | SDValue Ops[] = { RHS }; | |||
936 | SDNode *MOVI = | |||
937 | CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); | |||
938 | SDValue MOVIV = SDValue(MOVI, 0); | |||
939 | // This ADD of two X register will be selected into [Reg+Reg] mode. | |||
940 | N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); | |||
941 | } | |||
942 | ||||
943 | // Remember if it is worth folding N when it produces extended register. | |||
944 | bool IsExtendedRegisterWorthFolding = isWorthFolding(N); | |||
945 | ||||
946 | // Try to match a shifted extend on the RHS. | |||
947 | if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && | |||
948 | SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { | |||
949 | Base = LHS; | |||
950 | DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); | |||
951 | return true; | |||
952 | } | |||
953 | ||||
954 | // Try to match a shifted extend on the LHS. | |||
955 | if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && | |||
956 | SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { | |||
957 | Base = RHS; | |||
958 | DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); | |||
959 | return true; | |||
960 | } | |||
961 | ||||
962 | // Match any non-shifted, non-extend, non-immediate add expression. | |||
963 | Base = LHS; | |||
964 | Offset = RHS; | |||
965 | SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); | |||
966 | DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); | |||
967 | // Reg1 + Reg2 is free: no check needed. | |||
968 | return true; | |||
969 | } | |||
970 | ||||
971 | SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { | |||
972 | static const unsigned RegClassIDs[] = { | |||
973 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; | |||
974 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, | |||
975 | AArch64::dsub2, AArch64::dsub3}; | |||
976 | ||||
977 | return createTuple(Regs, RegClassIDs, SubRegs); | |||
978 | } | |||
979 | ||||
980 | SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { | |||
981 | static const unsigned RegClassIDs[] = { | |||
982 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; | |||
983 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, | |||
984 | AArch64::qsub2, AArch64::qsub3}; | |||
985 | ||||
986 | return createTuple(Regs, RegClassIDs, SubRegs); | |||
987 | } | |||
988 | ||||
989 | SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, | |||
990 | const unsigned RegClassIDs[], | |||
991 | const unsigned SubRegs[]) { | |||
992 | // There's no special register-class for a vector-list of 1 element: it's just | |||
993 | // a vector. | |||
994 | if (Regs.size() == 1) | |||
995 | return Regs[0]; | |||
996 | ||||
997 | assert(Regs.size() >= 2 && Regs.size() <= 4)((Regs.size() >= 2 && Regs.size() <= 4) ? static_cast <void> (0) : __assert_fail ("Regs.size() >= 2 && Regs.size() <= 4" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 997, __PRETTY_FUNCTION__)); | |||
998 | ||||
999 | SDLoc DL(Regs[0]); | |||
1000 | ||||
1001 | SmallVector<SDValue, 4> Ops; | |||
1002 | ||||
1003 | // First operand of REG_SEQUENCE is the desired RegClass. | |||
1004 | Ops.push_back( | |||
1005 | CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); | |||
1006 | ||||
1007 | // Then we get pairs of source & subregister-position for the components. | |||
1008 | for (unsigned i = 0; i < Regs.size(); ++i) { | |||
1009 | Ops.push_back(Regs[i]); | |||
1010 | Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); | |||
1011 | } | |||
1012 | ||||
1013 | SDNode *N = | |||
1014 | CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); | |||
1015 | return SDValue(N, 0); | |||
1016 | } | |||
1017 | ||||
1018 | SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, | |||
1019 | unsigned Opc, bool isExt) { | |||
1020 | SDLoc dl(N); | |||
1021 | EVT VT = N->getValueType(0); | |||
1022 | ||||
1023 | unsigned ExtOff = isExt; | |||
1024 | ||||
1025 | // Form a REG_SEQUENCE to force register allocation. | |||
1026 | unsigned Vec0Off = ExtOff + 1; | |||
1027 | SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, | |||
1028 | N->op_begin() + Vec0Off + NumVecs); | |||
1029 | SDValue RegSeq = createQTuple(Regs); | |||
1030 | ||||
1031 | SmallVector<SDValue, 6> Ops; | |||
1032 | if (isExt) | |||
1033 | Ops.push_back(N->getOperand(1)); | |||
1034 | Ops.push_back(RegSeq); | |||
1035 | Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); | |||
1036 | return CurDAG->getMachineNode(Opc, dl, VT, Ops); | |||
1037 | } | |||
1038 | ||||
1039 | SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { | |||
1040 | LoadSDNode *LD = cast<LoadSDNode>(N); | |||
1041 | if (LD->isUnindexed()) | |||
1042 | return nullptr; | |||
1043 | EVT VT = LD->getMemoryVT(); | |||
1044 | EVT DstVT = N->getValueType(0); | |||
1045 | ISD::MemIndexedMode AM = LD->getAddressingMode(); | |||
1046 | bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; | |||
1047 | ||||
1048 | // We're not doing validity checking here. That was done when checking | |||
1049 | // if we should mark the load as indexed or not. We're just selecting | |||
1050 | // the right instruction. | |||
1051 | unsigned Opcode = 0; | |||
1052 | ||||
1053 | ISD::LoadExtType ExtType = LD->getExtensionType(); | |||
1054 | bool InsertTo64 = false; | |||
1055 | if (VT == MVT::i64) | |||
1056 | Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; | |||
1057 | else if (VT == MVT::i32) { | |||
1058 | if (ExtType == ISD::NON_EXTLOAD) | |||
1059 | Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; | |||
1060 | else if (ExtType == ISD::SEXTLOAD) | |||
1061 | Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; | |||
1062 | else { | |||
1063 | Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; | |||
1064 | InsertTo64 = true; | |||
1065 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
1066 | // it into an i64. | |||
1067 | DstVT = MVT::i32; | |||
1068 | } | |||
1069 | } else if (VT == MVT::i16) { | |||
1070 | if (ExtType == ISD::SEXTLOAD) { | |||
1071 | if (DstVT == MVT::i64) | |||
1072 | Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; | |||
1073 | else | |||
1074 | Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; | |||
1075 | } else { | |||
1076 | Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; | |||
1077 | InsertTo64 = DstVT == MVT::i64; | |||
1078 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
1079 | // it into an i64. | |||
1080 | DstVT = MVT::i32; | |||
1081 | } | |||
1082 | } else if (VT == MVT::i8) { | |||
1083 | if (ExtType == ISD::SEXTLOAD) { | |||
1084 | if (DstVT == MVT::i64) | |||
1085 | Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; | |||
1086 | else | |||
1087 | Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; | |||
1088 | } else { | |||
1089 | Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; | |||
1090 | InsertTo64 = DstVT == MVT::i64; | |||
1091 | // The result of the load is only i32. It's the subreg_to_reg that makes | |||
1092 | // it into an i64. | |||
1093 | DstVT = MVT::i32; | |||
1094 | } | |||
1095 | } else if (VT == MVT::f16) { | |||
1096 | Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; | |||
1097 | } else if (VT == MVT::f32) { | |||
1098 | Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; | |||
1099 | } else if (VT == MVT::f64 || VT.is64BitVector()) { | |||
1100 | Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; | |||
1101 | } else if (VT.is128BitVector()) { | |||
1102 | Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; | |||
1103 | } else | |||
1104 | return nullptr; | |||
1105 | SDValue Chain = LD->getChain(); | |||
1106 | SDValue Base = LD->getBasePtr(); | |||
1107 | ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); | |||
1108 | int OffsetVal = (int)OffsetOp->getZExtValue(); | |||
1109 | SDLoc dl(N); | |||
1110 | SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); | |||
1111 | SDValue Ops[] = { Base, Offset, Chain }; | |||
1112 | SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, | |||
1113 | MVT::Other, Ops); | |||
1114 | // Either way, we're replacing the node, so tell the caller that. | |||
1115 | Done = true; | |||
1116 | SDValue LoadedVal = SDValue(Res, 1); | |||
1117 | if (InsertTo64) { | |||
1118 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); | |||
1119 | LoadedVal = | |||
1120 | SDValue(CurDAG->getMachineNode( | |||
1121 | AArch64::SUBREG_TO_REG, dl, MVT::i64, | |||
1122 | CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, | |||
1123 | SubReg), | |||
1124 | 0); | |||
1125 | } | |||
1126 | ||||
1127 | ReplaceUses(SDValue(N, 0), LoadedVal); | |||
1128 | ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); | |||
1129 | ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); | |||
1130 | ||||
1131 | return nullptr; | |||
1132 | } | |||
1133 | ||||
1134 | SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, | |||
1135 | unsigned Opc, unsigned SubRegIdx) { | |||
1136 | SDLoc dl(N); | |||
1137 | EVT VT = N->getValueType(0); | |||
1138 | SDValue Chain = N->getOperand(0); | |||
1139 | ||||
1140 | SDValue Ops[] = {N->getOperand(2), // Mem operand; | |||
1141 | Chain}; | |||
1142 | ||||
1143 | const EVT ResTys[] = {MVT::Untyped, MVT::Other}; | |||
1144 | ||||
1145 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1146 | SDValue SuperReg = SDValue(Ld, 0); | |||
1147 | for (unsigned i = 0; i < NumVecs; ++i) | |||
1148 | ReplaceUses(SDValue(N, i), | |||
1149 | CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); | |||
1150 | ||||
1151 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); | |||
1152 | return nullptr; | |||
1153 | } | |||
1154 | ||||
1155 | SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, | |||
1156 | unsigned Opc, unsigned SubRegIdx) { | |||
1157 | SDLoc dl(N); | |||
1158 | EVT VT = N->getValueType(0); | |||
1159 | SDValue Chain = N->getOperand(0); | |||
1160 | ||||
1161 | SDValue Ops[] = {N->getOperand(1), // Mem operand | |||
1162 | N->getOperand(2), // Incremental | |||
1163 | Chain}; | |||
1164 | ||||
1165 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1166 | MVT::Untyped, MVT::Other}; | |||
1167 | ||||
1168 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1169 | ||||
1170 | // Update uses of write back register | |||
1171 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); | |||
1172 | ||||
1173 | // Update uses of vector list | |||
1174 | SDValue SuperReg = SDValue(Ld, 1); | |||
1175 | if (NumVecs == 1) | |||
1176 | ReplaceUses(SDValue(N, 0), SuperReg); | |||
1177 | else | |||
1178 | for (unsigned i = 0; i < NumVecs; ++i) | |||
1179 | ReplaceUses(SDValue(N, i), | |||
1180 | CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); | |||
1181 | ||||
1182 | // Update the chain | |||
1183 | ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); | |||
1184 | return nullptr; | |||
1185 | } | |||
1186 | ||||
1187 | SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, | |||
1188 | unsigned Opc) { | |||
1189 | SDLoc dl(N); | |||
1190 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1191 | ||||
1192 | // Form a REG_SEQUENCE to force register allocation. | |||
1193 | bool Is128Bit = VT.getSizeInBits() == 128; | |||
1194 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1195 | SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); | |||
1196 | ||||
1197 | SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; | |||
1198 | SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); | |||
1199 | ||||
1200 | return St; | |||
1201 | } | |||
1202 | ||||
1203 | SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, | |||
1204 | unsigned Opc) { | |||
1205 | SDLoc dl(N); | |||
1206 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1207 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1208 | MVT::Other}; // Type for the Chain | |||
1209 | ||||
1210 | // Form a REG_SEQUENCE to force register allocation. | |||
1211 | bool Is128Bit = VT.getSizeInBits() == 128; | |||
1212 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1213 | SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); | |||
1214 | ||||
1215 | SDValue Ops[] = {RegSeq, | |||
1216 | N->getOperand(NumVecs + 1), // base register | |||
1217 | N->getOperand(NumVecs + 2), // Incremental | |||
1218 | N->getOperand(0)}; // Chain | |||
1219 | SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1220 | ||||
1221 | return St; | |||
1222 | } | |||
1223 | ||||
1224 | namespace { | |||
1225 | /// WidenVector - Given a value in the V64 register class, produce the | |||
1226 | /// equivalent value in the V128 register class. | |||
1227 | class WidenVector { | |||
1228 | SelectionDAG &DAG; | |||
1229 | ||||
1230 | public: | |||
1231 | WidenVector(SelectionDAG &DAG) : DAG(DAG) {} | |||
1232 | ||||
1233 | SDValue operator()(SDValue V64Reg) { | |||
1234 | EVT VT = V64Reg.getValueType(); | |||
1235 | unsigned NarrowSize = VT.getVectorNumElements(); | |||
1236 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); | |||
1237 | MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); | |||
1238 | SDLoc DL(V64Reg); | |||
1239 | ||||
1240 | SDValue Undef = | |||
1241 | SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); | |||
1242 | return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); | |||
1243 | } | |||
1244 | }; | |||
1245 | } // namespace | |||
1246 | ||||
1247 | /// NarrowVector - Given a value in the V128 register class, produce the | |||
1248 | /// equivalent value in the V64 register class. | |||
1249 | static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { | |||
1250 | EVT VT = V128Reg.getValueType(); | |||
1251 | unsigned WideSize = VT.getVectorNumElements(); | |||
1252 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); | |||
1253 | MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); | |||
1254 | ||||
1255 | return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, | |||
1256 | V128Reg); | |||
1257 | } | |||
1258 | ||||
1259 | SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, | |||
1260 | unsigned Opc) { | |||
1261 | SDLoc dl(N); | |||
1262 | EVT VT = N->getValueType(0); | |||
1263 | bool Narrow = VT.getSizeInBits() == 64; | |||
1264 | ||||
1265 | // Form a REG_SEQUENCE to force register allocation. | |||
1266 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1267 | ||||
1268 | if (Narrow) | |||
1269 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1270 | WidenVector(*CurDAG)); | |||
1271 | ||||
1272 | SDValue RegSeq = createQTuple(Regs); | |||
1273 | ||||
1274 | const EVT ResTys[] = {MVT::Untyped, MVT::Other}; | |||
1275 | ||||
1276 | unsigned LaneNo = | |||
1277 | cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); | |||
1278 | ||||
1279 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), | |||
1280 | N->getOperand(NumVecs + 3), N->getOperand(0)}; | |||
1281 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1282 | SDValue SuperReg = SDValue(Ld, 0); | |||
1283 | ||||
1284 | EVT WideVT = RegSeq.getOperand(1)->getValueType(0); | |||
1285 | static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, | |||
1286 | AArch64::qsub2, AArch64::qsub3 }; | |||
1287 | for (unsigned i = 0; i < NumVecs; ++i) { | |||
1288 | SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); | |||
1289 | if (Narrow) | |||
1290 | NV = NarrowVector(NV, *CurDAG); | |||
1291 | ReplaceUses(SDValue(N, i), NV); | |||
1292 | } | |||
1293 | ||||
1294 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); | |||
1295 | ||||
1296 | return Ld; | |||
1297 | } | |||
1298 | ||||
1299 | SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, | |||
1300 | unsigned Opc) { | |||
1301 | SDLoc dl(N); | |||
1302 | EVT VT = N->getValueType(0); | |||
1303 | bool Narrow = VT.getSizeInBits() == 64; | |||
1304 | ||||
1305 | // Form a REG_SEQUENCE to force register allocation. | |||
1306 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1307 | ||||
1308 | if (Narrow) | |||
1309 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1310 | WidenVector(*CurDAG)); | |||
1311 | ||||
1312 | SDValue RegSeq = createQTuple(Regs); | |||
1313 | ||||
1314 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1315 | RegSeq->getValueType(0), MVT::Other}; | |||
1316 | ||||
1317 | unsigned LaneNo = | |||
1318 | cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); | |||
1319 | ||||
1320 | SDValue Ops[] = {RegSeq, | |||
1321 | CurDAG->getTargetConstant(LaneNo, dl, | |||
1322 | MVT::i64), // Lane Number | |||
1323 | N->getOperand(NumVecs + 2), // Base register | |||
1324 | N->getOperand(NumVecs + 3), // Incremental | |||
1325 | N->getOperand(0)}; | |||
1326 | SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1327 | ||||
1328 | // Update uses of the write back register | |||
1329 | ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); | |||
1330 | ||||
1331 | // Update uses of the vector list | |||
1332 | SDValue SuperReg = SDValue(Ld, 1); | |||
1333 | if (NumVecs == 1) { | |||
1334 | ReplaceUses(SDValue(N, 0), | |||
1335 | Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); | |||
1336 | } else { | |||
1337 | EVT WideVT = RegSeq.getOperand(1)->getValueType(0); | |||
1338 | static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, | |||
1339 | AArch64::qsub2, AArch64::qsub3 }; | |||
1340 | for (unsigned i = 0; i < NumVecs; ++i) { | |||
1341 | SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, | |||
1342 | SuperReg); | |||
1343 | if (Narrow) | |||
1344 | NV = NarrowVector(NV, *CurDAG); | |||
1345 | ReplaceUses(SDValue(N, i), NV); | |||
1346 | } | |||
1347 | } | |||
1348 | ||||
1349 | // Update the Chain | |||
1350 | ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); | |||
1351 | ||||
1352 | return Ld; | |||
1353 | } | |||
1354 | ||||
1355 | SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, | |||
1356 | unsigned Opc) { | |||
1357 | SDLoc dl(N); | |||
1358 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1359 | bool Narrow = VT.getSizeInBits() == 64; | |||
1360 | ||||
1361 | // Form a REG_SEQUENCE to force register allocation. | |||
1362 | SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); | |||
1363 | ||||
1364 | if (Narrow) | |||
1365 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1366 | WidenVector(*CurDAG)); | |||
1367 | ||||
1368 | SDValue RegSeq = createQTuple(Regs); | |||
1369 | ||||
1370 | unsigned LaneNo = | |||
1371 | cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); | |||
1372 | ||||
1373 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), | |||
1374 | N->getOperand(NumVecs + 3), N->getOperand(0)}; | |||
1375 | SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); | |||
1376 | ||||
1377 | // Transfer memoperands. | |||
1378 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
1379 | MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); | |||
1380 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
1381 | ||||
1382 | return St; | |||
1383 | } | |||
1384 | ||||
1385 | SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, | |||
1386 | unsigned Opc) { | |||
1387 | SDLoc dl(N); | |||
1388 | EVT VT = N->getOperand(2)->getValueType(0); | |||
1389 | bool Narrow = VT.getSizeInBits() == 64; | |||
1390 | ||||
1391 | // Form a REG_SEQUENCE to force register allocation. | |||
1392 | SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); | |||
1393 | ||||
1394 | if (Narrow) | |||
1395 | std::transform(Regs.begin(), Regs.end(), Regs.begin(), | |||
1396 | WidenVector(*CurDAG)); | |||
1397 | ||||
1398 | SDValue RegSeq = createQTuple(Regs); | |||
1399 | ||||
1400 | const EVT ResTys[] = {MVT::i64, // Type of the write back register | |||
1401 | MVT::Other}; | |||
1402 | ||||
1403 | unsigned LaneNo = | |||
1404 | cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); | |||
1405 | ||||
1406 | SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), | |||
1407 | N->getOperand(NumVecs + 2), // Base Register | |||
1408 | N->getOperand(NumVecs + 3), // Incremental | |||
1409 | N->getOperand(0)}; | |||
1410 | SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); | |||
1411 | ||||
1412 | // Transfer memoperands. | |||
1413 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
1414 | MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); | |||
1415 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
1416 | ||||
1417 | return St; | |||
1418 | } | |||
1419 | ||||
1420 | static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, | |||
1421 | unsigned &Opc, SDValue &Opd0, | |||
1422 | unsigned &LSB, unsigned &MSB, | |||
1423 | unsigned NumberOfIgnoredLowBits, | |||
1424 | bool BiggerPattern) { | |||
1425 | assert(N->getOpcode() == ISD::AND &&((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1426, __PRETTY_FUNCTION__)) | |||
1426 | "N must be a AND operation to call this function")((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1426, __PRETTY_FUNCTION__)); | |||
1427 | ||||
1428 | EVT VT = N->getValueType(0); | |||
1429 | ||||
1430 | // Here we can test the type of VT and return false when the type does not | |||
1431 | // match, but since it is done prior to that call in the current context | |||
1432 | // we turned that into an assert to avoid redundant code. | |||
1433 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1434, __PRETTY_FUNCTION__)) | |||
1434 | "Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1434, __PRETTY_FUNCTION__)); | |||
1435 | ||||
1436 | // FIXME: simplify-demanded-bits in DAGCombine will probably have | |||
1437 | // changed the AND node to a 32-bit mask operation. We'll have to | |||
1438 | // undo that as part of the transform here if we want to catch all | |||
1439 | // the opportunities. | |||
1440 | // Currently the NumberOfIgnoredLowBits argument helps to recover | |||
1441 | // form these situations when matching bigger pattern (bitfield insert). | |||
1442 | ||||
1443 | // For unsigned extracts, check for a shift right and mask | |||
1444 | uint64_t And_imm = 0; | |||
1445 | if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) | |||
1446 | return false; | |||
1447 | ||||
1448 | const SDNode *Op0 = N->getOperand(0).getNode(); | |||
1449 | ||||
1450 | // Because of simplify-demanded-bits in DAGCombine, the mask may have been | |||
1451 | // simplified. Try to undo that | |||
1452 | And_imm |= (1 << NumberOfIgnoredLowBits) - 1; | |||
1453 | ||||
1454 | // The immediate is a mask of the low bits iff imm & (imm+1) == 0 | |||
1455 | if (And_imm & (And_imm + 1)) | |||
1456 | return false; | |||
1457 | ||||
1458 | bool ClampMSB = false; | |||
1459 | uint64_t Srl_imm = 0; | |||
1460 | // Handle the SRL + ANY_EXTEND case. | |||
1461 | if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && | |||
1462 | isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { | |||
1463 | // Extend the incoming operand of the SRL to 64-bit. | |||
1464 | Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); | |||
1465 | // Make sure to clamp the MSB so that we preserve the semantics of the | |||
1466 | // original operations. | |||
1467 | ClampMSB = true; | |||
1468 | } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && | |||
1469 | isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, | |||
1470 | Srl_imm)) { | |||
1471 | // If the shift result was truncated, we can still combine them. | |||
1472 | Opd0 = Op0->getOperand(0).getOperand(0); | |||
1473 | ||||
1474 | // Use the type of SRL node. | |||
1475 | VT = Opd0->getValueType(0); | |||
1476 | } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { | |||
1477 | Opd0 = Op0->getOperand(0); | |||
1478 | } else if (BiggerPattern) { | |||
1479 | // Let's pretend a 0 shift right has been performed. | |||
1480 | // The resulting code will be at least as good as the original one | |||
1481 | // plus it may expose more opportunities for bitfield insert pattern. | |||
1482 | // FIXME: Currently we limit this to the bigger pattern, because | |||
1483 | // some optimizations expect AND and not UBFM. | |||
1484 | Opd0 = N->getOperand(0); | |||
1485 | } else | |||
1486 | return false; | |||
1487 | ||||
1488 | // Bail out on large immediates. This happens when no proper | |||
1489 | // combining/constant folding was performed. | |||
1490 | if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) { | |||
1491 | DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0) | |||
1492 | << ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0); | |||
1493 | return false; | |||
1494 | } | |||
1495 | ||||
1496 | LSB = Srl_imm; | |||
1497 | MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm) | |||
1498 | : countTrailingOnes<uint64_t>(And_imm)) - | |||
1499 | 1; | |||
1500 | if (ClampMSB) | |||
1501 | // Since we're moving the extend before the right shift operation, we need | |||
1502 | // to clamp the MSB to make sure we don't shift in undefined bits instead of | |||
1503 | // the zeros which would get shifted in with the original right shift | |||
1504 | // operation. | |||
1505 | MSB = MSB > 31 ? 31 : MSB; | |||
1506 | ||||
1507 | Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; | |||
1508 | return true; | |||
1509 | } | |||
1510 | ||||
1511 | static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, | |||
1512 | SDValue &Opd0, unsigned &LSB, | |||
1513 | unsigned &MSB) { | |||
1514 | // We are looking for the following pattern which basically extracts several | |||
1515 | // continuous bits from the source value and places it from the LSB of the | |||
1516 | // destination value, all other bits of the destination value or set to zero: | |||
1517 | // | |||
1518 | // Value2 = AND Value, MaskImm | |||
1519 | // SRL Value2, ShiftImm | |||
1520 | // | |||
1521 | // with MaskImm >> ShiftImm to search for the bit width. | |||
1522 | // | |||
1523 | // This gets selected into a single UBFM: | |||
1524 | // | |||
1525 | // UBFM Value, ShiftImm, BitWide + Srl_imm -1 | |||
1526 | // | |||
1527 | ||||
1528 | if (N->getOpcode() != ISD::SRL) | |||
1529 | return false; | |||
1530 | ||||
1531 | uint64_t And_mask = 0; | |||
1532 | if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) | |||
1533 | return false; | |||
1534 | ||||
1535 | Opd0 = N->getOperand(0).getOperand(0); | |||
1536 | ||||
1537 | uint64_t Srl_imm = 0; | |||
1538 | if (!isIntImmediate(N->getOperand(1), Srl_imm)) | |||
1539 | return false; | |||
1540 | ||||
1541 | // Check whether we really have several bits extract here. | |||
1542 | unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm)); | |||
1543 | if (BitWide && isMask_64(And_mask >> Srl_imm)) { | |||
1544 | if (N->getValueType(0) == MVT::i32) | |||
1545 | Opc = AArch64::UBFMWri; | |||
1546 | else | |||
1547 | Opc = AArch64::UBFMXri; | |||
1548 | ||||
1549 | LSB = Srl_imm; | |||
1550 | MSB = BitWide + Srl_imm - 1; | |||
1551 | return true; | |||
1552 | } | |||
1553 | ||||
1554 | return false; | |||
1555 | } | |||
1556 | ||||
1557 | static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, | |||
1558 | unsigned &Immr, unsigned &Imms, | |||
1559 | bool BiggerPattern) { | |||
1560 | assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD:: SRL) && "N must be a SHR/SRA operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1561, __PRETTY_FUNCTION__)) | |||
1561 | "N must be a SHR/SRA operation to call this function")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD:: SRL) && "N must be a SHR/SRA operation to call this function" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1561, __PRETTY_FUNCTION__)); | |||
1562 | ||||
1563 | EVT VT = N->getValueType(0); | |||
1564 | ||||
1565 | // Here we can test the type of VT and return false when the type does not | |||
1566 | // match, but since it is done prior to that call in the current context | |||
1567 | // we turned that into an assert to avoid redundant code. | |||
1568 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1569, __PRETTY_FUNCTION__)) | |||
1569 | "Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1569, __PRETTY_FUNCTION__)); | |||
1570 | ||||
1571 | // Check for AND + SRL doing several bits extract. | |||
1572 | if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) | |||
1573 | return true; | |||
1574 | ||||
1575 | // we're looking for a shift of a shift | |||
1576 | uint64_t Shl_imm = 0; | |||
1577 | uint64_t Trunc_bits = 0; | |||
1578 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { | |||
1579 | Opd0 = N->getOperand(0).getOperand(0); | |||
1580 | } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && | |||
1581 | N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { | |||
1582 | // We are looking for a shift of truncate. Truncate from i64 to i32 could | |||
1583 | // be considered as setting high 32 bits as zero. Our strategy here is to | |||
1584 | // always generate 64bit UBFM. This consistency will help the CSE pass | |||
1585 | // later find more redundancy. | |||
1586 | Opd0 = N->getOperand(0).getOperand(0); | |||
1587 | Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); | |||
1588 | VT = Opd0->getValueType(0); | |||
1589 | assert(VT == MVT::i64 && "the promoted type should be i64")((VT == MVT::i64 && "the promoted type should be i64" ) ? static_cast<void> (0) : __assert_fail ("VT == MVT::i64 && \"the promoted type should be i64\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1589, __PRETTY_FUNCTION__)); | |||
1590 | } else if (BiggerPattern) { | |||
1591 | // Let's pretend a 0 shift left has been performed. | |||
1592 | // FIXME: Currently we limit this to the bigger pattern case, | |||
1593 | // because some optimizations expect AND and not UBFM | |||
1594 | Opd0 = N->getOperand(0); | |||
1595 | } else | |||
1596 | return false; | |||
1597 | ||||
1598 | // Missing combines/constant folding may have left us with strange | |||
1599 | // constants. | |||
1600 | if (Shl_imm >= VT.getSizeInBits()) { | |||
1601 | DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0) | |||
1602 | << ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n" ); } } while (0); | |||
1603 | return false; | |||
1604 | } | |||
1605 | ||||
1606 | uint64_t Srl_imm = 0; | |||
1607 | if (!isIntImmediate(N->getOperand(1), Srl_imm)) | |||
1608 | return false; | |||
1609 | ||||
1610 | assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && "bad amount in shift node!") ? static_cast<void> (0) : __assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1611, __PRETTY_FUNCTION__)) | |||
1611 | "bad amount in shift node!")((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && "bad amount in shift node!") ? static_cast<void> (0) : __assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1611, __PRETTY_FUNCTION__)); | |||
1612 | int immr = Srl_imm - Shl_imm; | |||
1613 | Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; | |||
1614 | Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1; | |||
1615 | // SRA requires a signed extraction | |||
1616 | if (VT == MVT::i32) | |||
1617 | Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; | |||
1618 | else | |||
1619 | Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; | |||
1620 | return true; | |||
1621 | } | |||
1622 | ||||
1623 | static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, | |||
1624 | SDValue &Opd0, unsigned &Immr, unsigned &Imms, | |||
1625 | unsigned NumberOfIgnoredLowBits = 0, | |||
1626 | bool BiggerPattern = false) { | |||
1627 | if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) | |||
1628 | return false; | |||
1629 | ||||
1630 | switch (N->getOpcode()) { | |||
1631 | default: | |||
1632 | if (!N->isMachineOpcode()) | |||
1633 | return false; | |||
1634 | break; | |||
1635 | case ISD::AND: | |||
1636 | return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, | |||
1637 | NumberOfIgnoredLowBits, BiggerPattern); | |||
1638 | case ISD::SRL: | |||
1639 | case ISD::SRA: | |||
1640 | return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); | |||
1641 | } | |||
1642 | ||||
1643 | unsigned NOpc = N->getMachineOpcode(); | |||
1644 | switch (NOpc) { | |||
1645 | default: | |||
1646 | return false; | |||
1647 | case AArch64::SBFMWri: | |||
1648 | case AArch64::UBFMWri: | |||
1649 | case AArch64::SBFMXri: | |||
1650 | case AArch64::UBFMXri: | |||
1651 | Opc = NOpc; | |||
1652 | Opd0 = N->getOperand(0); | |||
1653 | Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); | |||
1654 | Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); | |||
1655 | return true; | |||
1656 | } | |||
1657 | // Unreachable | |||
1658 | return false; | |||
1659 | } | |||
1660 | ||||
1661 | SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { | |||
1662 | unsigned Opc, Immr, Imms; | |||
1663 | SDValue Opd0; | |||
1664 | if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) | |||
1665 | return nullptr; | |||
1666 | ||||
1667 | EVT VT = N->getValueType(0); | |||
1668 | SDLoc dl(N); | |||
1669 | ||||
1670 | // If the bit extract operation is 64bit but the original type is 32bit, we | |||
1671 | // need to add one EXTRACT_SUBREG. | |||
1672 | if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { | |||
1673 | SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), | |||
1674 | CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; | |||
1675 | ||||
1676 | SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); | |||
1677 | SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); | |||
1678 | MachineSDNode *Node = | |||
1679 | CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32, | |||
1680 | SDValue(BFM, 0), SubReg); | |||
1681 | return Node; | |||
1682 | } | |||
1683 | ||||
1684 | SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), | |||
1685 | CurDAG->getTargetConstant(Imms, dl, VT)}; | |||
1686 | return CurDAG->SelectNodeTo(N, Opc, VT, Ops); | |||
1687 | } | |||
1688 | ||||
1689 | /// Does DstMask form a complementary pair with the mask provided by | |||
1690 | /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, | |||
1691 | /// this asks whether DstMask zeroes precisely those bits that will be set by | |||
1692 | /// the other half. | |||
1693 | static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, | |||
1694 | unsigned NumberOfIgnoredHighBits, EVT VT) { | |||
1695 | assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1696, __PRETTY_FUNCTION__)) | |||
1696 | "i32 or i64 mask type expected!")(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1696, __PRETTY_FUNCTION__)); | |||
1697 | unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; | |||
1698 | ||||
1699 | APInt SignificantDstMask = APInt(BitWidth, DstMask); | |||
1700 | APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); | |||
1701 | ||||
1702 | return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && | |||
1703 | (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); | |||
1704 | } | |||
1705 | ||||
1706 | // Look for bits that will be useful for later uses. | |||
1707 | // A bit is consider useless as soon as it is dropped and never used | |||
1708 | // before it as been dropped. | |||
1709 | // E.g., looking for useful bit of x | |||
1710 | // 1. y = x & 0x7 | |||
1711 | // 2. z = y >> 2 | |||
1712 | // After #1, x useful bits are 0x7, then the useful bits of x, live through | |||
1713 | // y. | |||
1714 | // After #2, the useful bits of x are 0x4. | |||
1715 | // However, if x is used on an unpredicatable instruction, then all its bits | |||
1716 | // are useful. | |||
1717 | // E.g. | |||
1718 | // 1. y = x & 0x7 | |||
1719 | // 2. z = y >> 2 | |||
1720 | // 3. str x, [@x] | |||
1721 | static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); | |||
1722 | ||||
1723 | static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, | |||
1724 | unsigned Depth) { | |||
1725 | uint64_t Imm = | |||
1726 | cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); | |||
1727 | Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); | |||
1728 | UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); | |||
1729 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1730 | } | |||
1731 | ||||
1732 | static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, | |||
1733 | uint64_t Imm, uint64_t MSB, | |||
1734 | unsigned Depth) { | |||
1735 | // inherit the bitwidth value | |||
1736 | APInt OpUsefulBits(UsefulBits); | |||
1737 | OpUsefulBits = 1; | |||
1738 | ||||
1739 | if (MSB >= Imm) { | |||
1740 | OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); | |||
1741 | --OpUsefulBits; | |||
1742 | // The interesting part will be in the lower part of the result | |||
1743 | getUsefulBits(Op, OpUsefulBits, Depth + 1); | |||
1744 | // The interesting part was starting at Imm in the argument | |||
1745 | OpUsefulBits = OpUsefulBits.shl(Imm); | |||
1746 | } else { | |||
1747 | OpUsefulBits = OpUsefulBits.shl(MSB + 1); | |||
1748 | --OpUsefulBits; | |||
1749 | // The interesting part will be shifted in the result | |||
1750 | OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); | |||
1751 | getUsefulBits(Op, OpUsefulBits, Depth + 1); | |||
1752 | // The interesting part was at zero in the argument | |||
1753 | OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); | |||
1754 | } | |||
1755 | ||||
1756 | UsefulBits &= OpUsefulBits; | |||
1757 | } | |||
1758 | ||||
1759 | static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, | |||
1760 | unsigned Depth) { | |||
1761 | uint64_t Imm = | |||
1762 | cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); | |||
1763 | uint64_t MSB = | |||
1764 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1765 | ||||
1766 | getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); | |||
1767 | } | |||
1768 | ||||
1769 | static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, | |||
1770 | unsigned Depth) { | |||
1771 | uint64_t ShiftTypeAndValue = | |||
1772 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1773 | APInt Mask(UsefulBits); | |||
1774 | Mask.clearAllBits(); | |||
1775 | Mask.flipAllBits(); | |||
1776 | ||||
1777 | if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { | |||
1778 | // Shift Left | |||
1779 | uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); | |||
1780 | Mask = Mask.shl(ShiftAmt); | |||
1781 | getUsefulBits(Op, Mask, Depth + 1); | |||
1782 | Mask = Mask.lshr(ShiftAmt); | |||
1783 | } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { | |||
1784 | // Shift Right | |||
1785 | // We do not handle AArch64_AM::ASR, because the sign will change the | |||
1786 | // number of useful bits | |||
1787 | uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); | |||
1788 | Mask = Mask.lshr(ShiftAmt); | |||
1789 | getUsefulBits(Op, Mask, Depth + 1); | |||
1790 | Mask = Mask.shl(ShiftAmt); | |||
1791 | } else | |||
1792 | return; | |||
1793 | ||||
1794 | UsefulBits &= Mask; | |||
1795 | } | |||
1796 | ||||
1797 | static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, | |||
1798 | unsigned Depth) { | |||
1799 | uint64_t Imm = | |||
1800 | cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); | |||
1801 | uint64_t MSB = | |||
1802 | cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); | |||
1803 | ||||
1804 | if (Op.getOperand(1) == Orig) | |||
1805 | return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); | |||
1806 | ||||
1807 | APInt OpUsefulBits(UsefulBits); | |||
1808 | OpUsefulBits = 1; | |||
1809 | ||||
1810 | if (MSB >= Imm) { | |||
1811 | OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); | |||
1812 | --OpUsefulBits; | |||
1813 | UsefulBits &= ~OpUsefulBits; | |||
1814 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1815 | } else { | |||
1816 | OpUsefulBits = OpUsefulBits.shl(MSB + 1); | |||
1817 | --OpUsefulBits; | |||
1818 | UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); | |||
1819 | getUsefulBits(Op, UsefulBits, Depth + 1); | |||
1820 | } | |||
1821 | } | |||
1822 | ||||
1823 | static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, | |||
1824 | SDValue Orig, unsigned Depth) { | |||
1825 | ||||
1826 | // Users of this node should have already been instruction selected | |||
1827 | // FIXME: Can we turn that into an assert? | |||
1828 | if (!UserNode->isMachineOpcode()) | |||
1829 | return; | |||
1830 | ||||
1831 | switch (UserNode->getMachineOpcode()) { | |||
1832 | default: | |||
1833 | return; | |||
1834 | case AArch64::ANDSWri: | |||
1835 | case AArch64::ANDSXri: | |||
1836 | case AArch64::ANDWri: | |||
1837 | case AArch64::ANDXri: | |||
1838 | // We increment Depth only when we call the getUsefulBits | |||
1839 | return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, | |||
1840 | Depth); | |||
1841 | case AArch64::UBFMWri: | |||
1842 | case AArch64::UBFMXri: | |||
1843 | return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); | |||
1844 | ||||
1845 | case AArch64::ORRWrs: | |||
1846 | case AArch64::ORRXrs: | |||
1847 | if (UserNode->getOperand(1) != Orig) | |||
1848 | return; | |||
1849 | return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, | |||
1850 | Depth); | |||
1851 | case AArch64::BFMWri: | |||
1852 | case AArch64::BFMXri: | |||
1853 | return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); | |||
1854 | } | |||
1855 | } | |||
1856 | ||||
1857 | static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { | |||
1858 | if (Depth >= 6) | |||
1859 | return; | |||
1860 | // Initialize UsefulBits | |||
1861 | if (!Depth) { | |||
1862 | unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); | |||
1863 | // At the beginning, assume every produced bits is useful | |||
1864 | UsefulBits = APInt(Bitwidth, 0); | |||
1865 | UsefulBits.flipAllBits(); | |||
1866 | } | |||
1867 | APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); | |||
1868 | ||||
1869 | for (SDNode *Node : Op.getNode()->uses()) { | |||
1870 | // A use cannot produce useful bits | |||
1871 | APInt UsefulBitsForUse = APInt(UsefulBits); | |||
1872 | getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); | |||
1873 | UsersUsefulBits |= UsefulBitsForUse; | |||
1874 | } | |||
1875 | // UsefulBits contains the produced bits that are meaningful for the | |||
1876 | // current definition, thus a user cannot make a bit meaningful at | |||
1877 | // this point | |||
1878 | UsefulBits &= UsersUsefulBits; | |||
1879 | } | |||
1880 | ||||
1881 | /// Create a machine node performing a notional SHL of Op by ShlAmount. If | |||
1882 | /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is | |||
1883 | /// 0, return Op unchanged. | |||
1884 | static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { | |||
1885 | if (ShlAmount == 0) | |||
1886 | return Op; | |||
1887 | ||||
1888 | EVT VT = Op.getValueType(); | |||
1889 | SDLoc dl(Op); | |||
1890 | unsigned BitWidth = VT.getSizeInBits(); | |||
1891 | unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; | |||
1892 | ||||
1893 | SDNode *ShiftNode; | |||
1894 | if (ShlAmount > 0) { | |||
1895 | // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt | |||
1896 | ShiftNode = CurDAG->getMachineNode( | |||
1897 | UBFMOpc, dl, VT, Op, | |||
1898 | CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), | |||
1899 | CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); | |||
1900 | } else { | |||
1901 | // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 | |||
1902 | assert(ShlAmount < 0 && "expected right shift")((ShlAmount < 0 && "expected right shift") ? static_cast <void> (0) : __assert_fail ("ShlAmount < 0 && \"expected right shift\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1902, __PRETTY_FUNCTION__)); | |||
1903 | int ShrAmount = -ShlAmount; | |||
1904 | ShiftNode = CurDAG->getMachineNode( | |||
1905 | UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), | |||
1906 | CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); | |||
1907 | } | |||
1908 | ||||
1909 | return SDValue(ShiftNode, 0); | |||
1910 | } | |||
1911 | ||||
1912 | /// Does this tree qualify as an attempt to move a bitfield into position, | |||
1913 | /// essentially "(and (shl VAL, N), Mask)". | |||
1914 | static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, | |||
1915 | bool BiggerPattern, | |||
1916 | SDValue &Src, int &ShiftAmount, | |||
1917 | int &MaskWidth) { | |||
1918 | EVT VT = Op.getValueType(); | |||
1919 | unsigned BitWidth = VT.getSizeInBits(); | |||
1920 | (void)BitWidth; | |||
1921 | assert(BitWidth == 32 || BitWidth == 64)((BitWidth == 32 || BitWidth == 64) ? static_cast<void> (0) : __assert_fail ("BitWidth == 32 || BitWidth == 64", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1921, __PRETTY_FUNCTION__)); | |||
1922 | ||||
1923 | APInt KnownZero, KnownOne; | |||
1924 | CurDAG->computeKnownBits(Op, KnownZero, KnownOne); | |||
1925 | ||||
1926 | // Non-zero in the sense that they're not provably zero, which is the key | |||
1927 | // point if we want to use this value | |||
1928 | uint64_t NonZeroBits = (~KnownZero).getZExtValue(); | |||
1929 | ||||
1930 | // Discard a constant AND mask if present. It's safe because the node will | |||
1931 | // already have been factored into the computeKnownBits calculation above. | |||
1932 | uint64_t AndImm; | |||
1933 | if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { | |||
1934 | assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0)(((~APInt(BitWidth, AndImm) & ~KnownZero) == 0) ? static_cast <void> (0) : __assert_fail ("(~APInt(BitWidth, AndImm) & ~KnownZero) == 0" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1934, __PRETTY_FUNCTION__)); | |||
1935 | Op = Op.getOperand(0); | |||
1936 | } | |||
1937 | ||||
1938 | // Don't match if the SHL has more than one use, since then we'll end up | |||
1939 | // generating SHL+UBFIZ instead of just keeping SHL+AND. | |||
1940 | if (!BiggerPattern && !Op.hasOneUse()) | |||
1941 | return false; | |||
1942 | ||||
1943 | uint64_t ShlImm; | |||
1944 | if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) | |||
1945 | return false; | |||
1946 | Op = Op.getOperand(0); | |||
1947 | ||||
1948 | if (!isShiftedMask_64(NonZeroBits)) | |||
1949 | return false; | |||
1950 | ||||
1951 | ShiftAmount = countTrailingZeros(NonZeroBits); | |||
1952 | MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); | |||
1953 | ||||
1954 | // BFI encompasses sufficiently many nodes that it's worth inserting an extra | |||
1955 | // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL | |||
1956 | // amount. BiggerPattern is true when this pattern is being matched for BFI, | |||
1957 | // BiggerPattern is false when this pattern is being matched for UBFIZ, in | |||
1958 | // which case it is not profitable to insert an extra shift. | |||
1959 | if (ShlImm - ShiftAmount != 0 && !BiggerPattern) | |||
1960 | return false; | |||
1961 | Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); | |||
1962 | ||||
1963 | return true; | |||
1964 | } | |||
1965 | ||||
1966 | // Given a OR operation, check if we have the following pattern | |||
1967 | // ubfm c, b, imm, imm2 (or something that does the same jobs, see | |||
1968 | // isBitfieldExtractOp) | |||
1969 | // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and | |||
1970 | // countTrailingZeros(mask2) == imm2 - imm + 1 | |||
1971 | // f = d | c | |||
1972 | // if yes, given reference arguments will be update so that one can replace | |||
1973 | // the OR instruction with: | |||
1974 | // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 | |||
1975 | static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, | |||
1976 | SDValue &Src, unsigned &ImmR, | |||
1977 | unsigned &ImmS, const APInt &UsefulBits, | |||
1978 | SelectionDAG *CurDAG) { | |||
1979 | assert(N->getOpcode() == ISD::OR && "Expect a OR operation")((N->getOpcode() == ISD::OR && "Expect a OR operation" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Expect a OR operation\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 1979, __PRETTY_FUNCTION__)); | |||
1980 | ||||
1981 | // Set Opc | |||
1982 | EVT VT = N->getValueType(0); | |||
1983 | if (VT == MVT::i32) | |||
1984 | Opc = AArch64::BFMWri; | |||
1985 | else if (VT == MVT::i64) | |||
1986 | Opc = AArch64::BFMXri; | |||
1987 | else | |||
1988 | return false; | |||
1989 | ||||
1990 | // Because of simplify-demanded-bits in DAGCombine, involved masks may not | |||
1991 | // have the expected shape. Try to undo that. | |||
1992 | ||||
1993 | unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); | |||
1994 | unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); | |||
1995 | ||||
1996 | // OR is commutative, check all combinations of operand order and values of | |||
1997 | // BiggerPattern, i.e. | |||
1998 | // Opd0, Opd1, BiggerPattern=false | |||
1999 | // Opd1, Opd0, BiggerPattern=false | |||
2000 | // Opd0, Opd1, BiggerPattern=true | |||
2001 | // Opd1, Opd0, BiggerPattern=true | |||
2002 | // Several of these combinations may match, so check with BiggerPattern=false | |||
2003 | // first since that will produce better results by matching more instructions | |||
2004 | // and/or inserting fewer extra instructions. | |||
2005 | for (int I = 0; I < 4; ++I) { | |||
2006 | ||||
2007 | bool BiggerPattern = I / 2; | |||
2008 | SDNode *OrOpd0 = N->getOperand(I % 2).getNode(); | |||
2009 | SDValue OrOpd1Val = N->getOperand((I + 1) % 2); | |||
2010 | SDNode *OrOpd1 = OrOpd1Val.getNode(); | |||
2011 | ||||
2012 | unsigned BFXOpc; | |||
2013 | int DstLSB, Width; | |||
2014 | if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, | |||
2015 | NumberOfIgnoredLowBits, BiggerPattern)) { | |||
2016 | // Check that the returned opcode is compatible with the pattern, | |||
2017 | // i.e., same type and zero extended (U and not S) | |||
2018 | if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || | |||
2019 | (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) | |||
2020 | continue; | |||
2021 | ||||
2022 | // Compute the width of the bitfield insertion | |||
2023 | DstLSB = 0; | |||
2024 | Width = ImmS - ImmR + 1; | |||
2025 | // FIXME: This constraint is to catch bitfield insertion we may | |||
2026 | // want to widen the pattern if we want to grab general bitfied | |||
2027 | // move case | |||
2028 | if (Width <= 0) | |||
2029 | continue; | |||
2030 | ||||
2031 | // If the mask on the insertee is correct, we have a BFXIL operation. We | |||
2032 | // can share the ImmR and ImmS values from the already-computed UBFM. | |||
2033 | } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), | |||
2034 | BiggerPattern, | |||
2035 | Src, DstLSB, Width)) { | |||
2036 | ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); | |||
2037 | ImmS = Width - 1; | |||
2038 | } else | |||
2039 | continue; | |||
2040 | ||||
2041 | // Check the second part of the pattern | |||
2042 | EVT VT = OrOpd1->getValueType(0); | |||
2043 | assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand" ) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected OR operand\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2043, __PRETTY_FUNCTION__)); | |||
2044 | ||||
2045 | // Compute the Known Zero for the candidate of the first operand. | |||
2046 | // This allows to catch more general case than just looking for | |||
2047 | // AND with imm. Indeed, simplify-demanded-bits may have removed | |||
2048 | // the AND instruction because it proves it was useless. | |||
2049 | APInt KnownZero, KnownOne; | |||
2050 | CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); | |||
2051 | ||||
2052 | // Check if there is enough room for the second operand to appear | |||
2053 | // in the first one | |||
2054 | APInt BitsToBeInserted = | |||
2055 | APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); | |||
2056 | ||||
2057 | if ((BitsToBeInserted & ~KnownZero) != 0) | |||
2058 | continue; | |||
2059 | ||||
2060 | // Set the first operand | |||
2061 | uint64_t Imm; | |||
2062 | if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && | |||
2063 | isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) | |||
2064 | // In that case, we can eliminate the AND | |||
2065 | Dst = OrOpd1->getOperand(0); | |||
2066 | else | |||
2067 | // Maybe the AND has been removed by simplify-demanded-bits | |||
2068 | // or is useful because it discards more bits | |||
2069 | Dst = OrOpd1Val; | |||
2070 | ||||
2071 | // both parts match | |||
2072 | return true; | |||
2073 | } | |||
2074 | ||||
2075 | return false; | |||
2076 | } | |||
2077 | ||||
2078 | SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { | |||
2079 | if (N->getOpcode() != ISD::OR) | |||
2080 | return nullptr; | |||
2081 | ||||
2082 | unsigned Opc; | |||
2083 | unsigned LSB, MSB; | |||
2084 | SDValue Opd0, Opd1; | |||
2085 | EVT VT = N->getValueType(0); | |||
2086 | APInt NUsefulBits; | |||
2087 | getUsefulBits(SDValue(N, 0), NUsefulBits); | |||
2088 | ||||
2089 | // If all bits are not useful, just return UNDEF. | |||
2090 | if (!NUsefulBits) | |||
2091 | return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT); | |||
2092 | ||||
2093 | if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits, | |||
2094 | CurDAG)) | |||
2095 | return nullptr; | |||
2096 | ||||
2097 | SDLoc dl(N); | |||
2098 | SDValue Ops[] = { Opd0, | |||
2099 | Opd1, | |||
2100 | CurDAG->getTargetConstant(LSB, dl, VT), | |||
2101 | CurDAG->getTargetConstant(MSB, dl, VT) }; | |||
2102 | return CurDAG->SelectNodeTo(N, Opc, VT, Ops); | |||
2103 | } | |||
2104 | ||||
2105 | /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the | |||
2106 | /// equivalent of a left shift by a constant amount followed by an and masking | |||
2107 | /// out a contiguous set of bits. | |||
2108 | SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) { | |||
2109 | if (N->getOpcode() != ISD::AND) | |||
2110 | return nullptr; | |||
2111 | ||||
2112 | EVT VT = N->getValueType(0); | |||
2113 | unsigned Opc; | |||
2114 | if (VT == MVT::i32) | |||
2115 | Opc = AArch64::UBFMWri; | |||
2116 | else if (VT == MVT::i64) | |||
2117 | Opc = AArch64::UBFMXri; | |||
2118 | else | |||
2119 | return nullptr; | |||
2120 | ||||
2121 | SDValue Op0; | |||
2122 | int DstLSB, Width; | |||
2123 | if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, | |||
2124 | Op0, DstLSB, Width)) | |||
2125 | return nullptr; | |||
2126 | ||||
2127 | // ImmR is the rotate right amount. | |||
2128 | unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); | |||
2129 | // ImmS is the most significant bit of the source to be moved. | |||
2130 | unsigned ImmS = Width - 1; | |||
2131 | ||||
2132 | SDLoc DL(N); | |||
2133 | SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), | |||
2134 | CurDAG->getTargetConstant(ImmS, DL, VT)}; | |||
2135 | return CurDAG->SelectNodeTo(N, Opc, VT, Ops); | |||
2136 | } | |||
2137 | ||||
2138 | bool | |||
2139 | AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, | |||
2140 | unsigned RegWidth) { | |||
2141 | APFloat FVal(0.0); | |||
2142 | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) | |||
2143 | FVal = CN->getValueAPF(); | |||
2144 | else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { | |||
2145 | // Some otherwise illegal constants are allowed in this case. | |||
2146 | if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || | |||
2147 | !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) | |||
2148 | return false; | |||
2149 | ||||
2150 | ConstantPoolSDNode *CN = | |||
2151 | dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); | |||
2152 | FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); | |||
2153 | } else | |||
2154 | return false; | |||
2155 | ||||
2156 | // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits | |||
2157 | // is between 1 and 32 for a destination w-register, or 1 and 64 for an | |||
2158 | // x-register. | |||
2159 | // | |||
2160 | // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we | |||
2161 | // want THIS_NODE to be 2^fbits. This is much easier to deal with using | |||
2162 | // integers. | |||
2163 | bool IsExact; | |||
2164 | ||||
2165 | // fbits is between 1 and 64 in the worst-case, which means the fmul | |||
2166 | // could have 2^64 as an actual operand. Need 65 bits of precision. | |||
2167 | APSInt IntVal(65, true); | |||
2168 | FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); | |||
2169 | ||||
2170 | // N.b. isPowerOf2 also checks for > 0. | |||
2171 | if (!IsExact || !IntVal.isPowerOf2()) return false; | |||
2172 | unsigned FBits = IntVal.logBase2(); | |||
2173 | ||||
2174 | // Checks above should have guaranteed that we haven't lost information in | |||
2175 | // finding FBits, but it must still be in range. | |||
2176 | if (FBits == 0 || FBits > RegWidth) return false; | |||
2177 | ||||
2178 | FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); | |||
2179 | return true; | |||
2180 | } | |||
2181 | ||||
2182 | // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields | |||
2183 | // of the string and obtains the integer values from them and combines these | |||
2184 | // into a single value to be used in the MRS/MSR instruction. | |||
2185 | static int getIntOperandFromRegisterString(StringRef RegString) { | |||
2186 | SmallVector<StringRef, 5> Fields; | |||
2187 | RegString.split(Fields, ':'); | |||
2188 | ||||
2189 | if (Fields.size() == 1) | |||
2190 | return -1; | |||
2191 | ||||
2192 | assert(Fields.size() == 5((Fields.size() == 5 && "Invalid number of fields in read register string" ) ? static_cast<void> (0) : __assert_fail ("Fields.size() == 5 && \"Invalid number of fields in read register string\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2193, __PRETTY_FUNCTION__)) | |||
2193 | && "Invalid number of fields in read register string")((Fields.size() == 5 && "Invalid number of fields in read register string" ) ? static_cast<void> (0) : __assert_fail ("Fields.size() == 5 && \"Invalid number of fields in read register string\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2193, __PRETTY_FUNCTION__)); | |||
2194 | ||||
2195 | SmallVector<int, 5> Ops; | |||
2196 | bool AllIntFields = true; | |||
2197 | ||||
2198 | for (StringRef Field : Fields) { | |||
2199 | unsigned IntField; | |||
2200 | AllIntFields &= !Field.getAsInteger(10, IntField); | |||
2201 | Ops.push_back(IntField); | |||
2202 | } | |||
2203 | ||||
2204 | assert(AllIntFields &&((AllIntFields && "Unexpected non-integer value in special register string." ) ? static_cast<void> (0) : __assert_fail ("AllIntFields && \"Unexpected non-integer value in special register string.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2205, __PRETTY_FUNCTION__)) | |||
2205 | "Unexpected non-integer value in special register string.")((AllIntFields && "Unexpected non-integer value in special register string." ) ? static_cast<void> (0) : __assert_fail ("AllIntFields && \"Unexpected non-integer value in special register string.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2205, __PRETTY_FUNCTION__)); | |||
2206 | ||||
2207 | // Need to combine the integer fields of the string into a single value | |||
2208 | // based on the bit encoding of MRS/MSR instruction. | |||
2209 | return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | | |||
2210 | (Ops[3] << 3) | (Ops[4]); | |||
2211 | } | |||
2212 | ||||
2213 | // Lower the read_register intrinsic to an MRS instruction node if the special | |||
2214 | // register string argument is either of the form detailed in the ALCE (the | |||
2215 | // form described in getIntOperandsFromRegsterString) or is a named register | |||
2216 | // known by the MRS SysReg mapper. | |||
2217 | SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) { | |||
2218 | const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); | |||
2219 | const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); | |||
2220 | SDLoc DL(N); | |||
2221 | ||||
2222 | int Reg = getIntOperandFromRegisterString(RegString->getString()); | |||
2223 | if (Reg != -1) | |||
2224 | return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), | |||
2225 | MVT::Other, | |||
2226 | CurDAG->getTargetConstant(Reg, DL, MVT::i32), | |||
2227 | N->getOperand(0)); | |||
2228 | ||||
2229 | // Use the sysreg mapper to map the remaining possible strings to the | |||
2230 | // value for the register to be used for the instruction operand. | |||
2231 | AArch64SysReg::MRSMapper mapper; | |||
2232 | bool IsValidSpecialReg; | |||
2233 | Reg = mapper.fromString(RegString->getString(), | |||
2234 | Subtarget->getFeatureBits(), | |||
2235 | IsValidSpecialReg); | |||
2236 | if (IsValidSpecialReg) | |||
2237 | return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), | |||
2238 | MVT::Other, | |||
2239 | CurDAG->getTargetConstant(Reg, DL, MVT::i32), | |||
2240 | N->getOperand(0)); | |||
2241 | ||||
2242 | return nullptr; | |||
2243 | } | |||
2244 | ||||
2245 | // Lower the write_register intrinsic to an MSR instruction node if the special | |||
2246 | // register string argument is either of the form detailed in the ALCE (the | |||
2247 | // form described in getIntOperandsFromRegsterString) or is a named register | |||
2248 | // known by the MSR SysReg mapper. | |||
2249 | SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) { | |||
2250 | const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); | |||
2251 | const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); | |||
2252 | SDLoc DL(N); | |||
2253 | ||||
2254 | int Reg = getIntOperandFromRegisterString(RegString->getString()); | |||
2255 | if (Reg != -1) | |||
2256 | return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, | |||
2257 | CurDAG->getTargetConstant(Reg, DL, MVT::i32), | |||
2258 | N->getOperand(2), N->getOperand(0)); | |||
2259 | ||||
2260 | // Check if the register was one of those allowed as the pstatefield value in | |||
2261 | // the MSR (immediate) instruction. To accept the values allowed in the | |||
2262 | // pstatefield for the MSR (immediate) instruction, we also require that an | |||
2263 | // immediate value has been provided as an argument, we know that this is | |||
2264 | // the case as it has been ensured by semantic checking. | |||
2265 | AArch64PState::PStateMapper PMapper; | |||
2266 | bool IsValidSpecialReg; | |||
2267 | Reg = PMapper.fromString(RegString->getString(), | |||
2268 | Subtarget->getFeatureBits(), | |||
2269 | IsValidSpecialReg); | |||
2270 | if (IsValidSpecialReg) { | |||
2271 | assert (isa<ConstantSDNode>(N->getOperand(2))((isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression." ) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2)) && \"Expected a constant integer expression.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2272, __PRETTY_FUNCTION__)) | |||
2272 | && "Expected a constant integer expression.")((isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression." ) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2)) && \"Expected a constant integer expression.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2272, __PRETTY_FUNCTION__)); | |||
2273 | uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); | |||
2274 | unsigned State; | |||
2275 | if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) { | |||
2276 | assert(Immed < 2 && "Bad imm")((Immed < 2 && "Bad imm") ? static_cast<void> (0) : __assert_fail ("Immed < 2 && \"Bad imm\"", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2276, __PRETTY_FUNCTION__)); | |||
2277 | State = AArch64::MSRpstateImm1; | |||
2278 | } else { | |||
2279 | assert(Immed < 16 && "Bad imm")((Immed < 16 && "Bad imm") ? static_cast<void> (0) : __assert_fail ("Immed < 16 && \"Bad imm\"", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2279, __PRETTY_FUNCTION__)); | |||
2280 | State = AArch64::MSRpstateImm4; | |||
2281 | } | |||
2282 | return CurDAG->getMachineNode(State, DL, MVT::Other, | |||
2283 | CurDAG->getTargetConstant(Reg, DL, MVT::i32), | |||
2284 | CurDAG->getTargetConstant(Immed, DL, MVT::i16), | |||
2285 | N->getOperand(0)); | |||
2286 | } | |||
2287 | ||||
2288 | // Use the sysreg mapper to attempt to map the remaining possible strings | |||
2289 | // to the value for the register to be used for the MSR (register) | |||
2290 | // instruction operand. | |||
2291 | AArch64SysReg::MSRMapper Mapper; | |||
2292 | Reg = Mapper.fromString(RegString->getString(), | |||
2293 | Subtarget->getFeatureBits(), | |||
2294 | IsValidSpecialReg); | |||
2295 | ||||
2296 | if (IsValidSpecialReg) | |||
2297 | return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, | |||
2298 | CurDAG->getTargetConstant(Reg, DL, MVT::i32), | |||
2299 | N->getOperand(2), N->getOperand(0)); | |||
2300 | ||||
2301 | return nullptr; | |||
2302 | } | |||
2303 | ||||
2304 | SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { | |||
2305 | // Dump information about the Node being selected | |||
2306 | DEBUG(errs() << "Selecting: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "Selecting: "; } } while ( 0); | |||
2307 | DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Node->dump(CurDAG); } } while (0); | |||
2308 | DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "\n"; } } while (0); | |||
2309 | ||||
2310 | // If we have a custom node, we already have selected! | |||
2311 | if (Node->isMachineOpcode()) { | |||
2312 | DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "== "; Node->dump(CurDAG ); errs() << "\n"; } } while (0); | |||
2313 | Node->setNodeId(-1); | |||
2314 | return nullptr; | |||
2315 | } | |||
2316 | ||||
2317 | // Few custom selection stuff. | |||
2318 | SDNode *ResNode = nullptr; | |||
2319 | EVT VT = Node->getValueType(0); | |||
2320 | ||||
2321 | switch (Node->getOpcode()) { | |||
2322 | default: | |||
2323 | break; | |||
2324 | ||||
2325 | case ISD::READ_REGISTER: | |||
2326 | if (SDNode *Res = SelectReadRegister(Node)) | |||
2327 | return Res; | |||
2328 | break; | |||
2329 | ||||
2330 | case ISD::WRITE_REGISTER: | |||
2331 | if (SDNode *Res = SelectWriteRegister(Node)) | |||
2332 | return Res; | |||
2333 | break; | |||
2334 | ||||
2335 | case ISD::ADD: | |||
2336 | if (SDNode *I = SelectMLAV64LaneV128(Node)) | |||
2337 | return I; | |||
2338 | break; | |||
2339 | ||||
2340 | case ISD::LOAD: { | |||
2341 | // Try to select as an indexed load. Fall through to normal processing | |||
2342 | // if we can't. | |||
2343 | bool Done = false; | |||
2344 | SDNode *I = SelectIndexedLoad(Node, Done); | |||
2345 | if (Done) | |||
2346 | return I; | |||
2347 | break; | |||
2348 | } | |||
2349 | ||||
2350 | case ISD::SRL: | |||
2351 | case ISD::AND: | |||
2352 | case ISD::SRA: | |||
2353 | if (SDNode *I = SelectBitfieldExtractOp(Node)) | |||
2354 | return I; | |||
2355 | if (SDNode *I = SelectBitfieldInsertInZeroOp(Node)) | |||
2356 | return I; | |||
2357 | break; | |||
2358 | ||||
2359 | case ISD::OR: | |||
2360 | if (SDNode *I = SelectBitfieldInsertOp(Node)) | |||
2361 | return I; | |||
2362 | break; | |||
2363 | ||||
2364 | case ISD::EXTRACT_VECTOR_ELT: { | |||
2365 | // Extracting lane zero is a special case where we can just use a plain | |||
2366 | // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for | |||
2367 | // the rest of the compiler, especially the register allocator and copyi | |||
2368 | // propagation, to reason about, so is preferred when it's possible to | |||
2369 | // use it. | |||
2370 | ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); | |||
2371 | // Bail and use the default Select() for non-zero lanes. | |||
2372 | if (LaneNode->getZExtValue() != 0) | |||
2373 | break; | |||
2374 | // If the element type is not the same as the result type, likewise | |||
2375 | // bail and use the default Select(), as there's more to do than just | |||
2376 | // a cross-class COPY. This catches extracts of i8 and i16 elements | |||
2377 | // since they will need an explicit zext. | |||
2378 | if (VT != Node->getOperand(0).getValueType().getVectorElementType()) | |||
2379 | break; | |||
2380 | unsigned SubReg; | |||
2381 | switch (Node->getOperand(0) | |||
2382 | .getValueType() | |||
2383 | .getVectorElementType() | |||
2384 | .getSizeInBits()) { | |||
2385 | default: | |||
2386 | llvm_unreachable("Unexpected vector element type!")::llvm::llvm_unreachable_internal("Unexpected vector element type!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2386); | |||
2387 | case 64: | |||
2388 | SubReg = AArch64::dsub; | |||
2389 | break; | |||
2390 | case 32: | |||
2391 | SubReg = AArch64::ssub; | |||
2392 | break; | |||
2393 | case 16: | |||
2394 | SubReg = AArch64::hsub; | |||
2395 | break; | |||
2396 | case 8: | |||
2397 | llvm_unreachable("unexpected zext-requiring extract element!")::llvm::llvm_unreachable_internal("unexpected zext-requiring extract element!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp" , 2397); | |||
2398 | } | |||
2399 | SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, | |||
2400 | Node->getOperand(0)); | |||
2401 | DEBUG(dbgs() << "ISEL: Custom selection!\n=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "ISEL: Custom selection!\n=> " ; } } while (0); | |||
2402 | DEBUG(Extract->dumpr(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Extract->dumpr(CurDAG); } } while (0); | |||
2403 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "\n"; } } while (0); | |||
2404 | return Extract.getNode(); | |||
2405 | } | |||
2406 | case ISD::Constant: { | |||
2407 | // Materialize zero constants as copies from WZR/XZR. This allows | |||
2408 | // the coalescer to propagate these into other instructions. | |||
2409 | ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); | |||
2410 | if (ConstNode->isNullValue()) { | |||
2411 | if (VT == MVT::i32) | |||
2412 | return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), | |||
2413 | AArch64::WZR, MVT::i32).getNode(); | |||
2414 | else if (VT == MVT::i64) | |||
2415 | return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), | |||
2416 | AArch64::XZR, MVT::i64).getNode(); | |||
2417 | } | |||
2418 | break; | |||
2419 | } | |||
2420 | ||||
2421 | case ISD::FrameIndex: { | |||
2422 | // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. | |||
2423 | int FI = cast<FrameIndexSDNode>(Node)->getIndex(); | |||
2424 | unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); | |||
2425 | const TargetLowering *TLI = getTargetLowering(); | |||
2426 | SDValue TFI = CurDAG->getTargetFrameIndex( | |||
2427 | FI, TLI->getPointerTy(CurDAG->getDataLayout())); | |||
2428 | SDLoc DL(Node); | |||
2429 | SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), | |||
2430 | CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; | |||
2431 | return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); | |||
2432 | } | |||
2433 | case ISD::INTRINSIC_W_CHAIN: { | |||
2434 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); | |||
2435 | switch (IntNo) { | |||
2436 | default: | |||
2437 | break; | |||
2438 | case Intrinsic::aarch64_ldaxp: | |||
2439 | case Intrinsic::aarch64_ldxp: { | |||
2440 | unsigned Op = | |||
2441 | IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; | |||
2442 | SDValue MemAddr = Node->getOperand(2); | |||
2443 | SDLoc DL(Node); | |||
2444 | SDValue Chain = Node->getOperand(0); | |||
2445 | ||||
2446 | SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, | |||
2447 | MVT::Other, MemAddr, Chain); | |||
2448 | ||||
2449 | // Transfer memoperands. | |||
2450 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
2451 | MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); | |||
2452 | cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); | |||
2453 | return Ld; | |||
2454 | } | |||
2455 | case Intrinsic::aarch64_stlxp: | |||
2456 | case Intrinsic::aarch64_stxp: { | |||
2457 | unsigned Op = | |||
2458 | IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; | |||
2459 | SDLoc DL(Node); | |||
2460 | SDValue Chain = Node->getOperand(0); | |||
2461 | SDValue ValLo = Node->getOperand(2); | |||
2462 | SDValue ValHi = Node->getOperand(3); | |||
2463 | SDValue MemAddr = Node->getOperand(4); | |||
2464 | ||||
2465 | // Place arguments in the right order. | |||
2466 | SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; | |||
2467 | ||||
2468 | SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); | |||
2469 | // Transfer memoperands. | |||
2470 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); | |||
2471 | MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); | |||
2472 | cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); | |||
2473 | ||||
2474 | return St; | |||
2475 | } | |||
2476 | case Intrinsic::aarch64_neon_ld1x2: | |||
2477 | if (VT == MVT::v8i8) | |||
2478 | return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); | |||
2479 | else if (VT == MVT::v16i8) | |||
2480 | return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); | |||
2481 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2482 | return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); | |||
2483 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2484 | return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); | |||
2485 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2486 | return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); | |||
2487 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2488 | return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); | |||
2489 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2490 | return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); | |||
2491 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2492 | return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); | |||
2493 | break; | |||
2494 | case Intrinsic::aarch64_neon_ld1x3: | |||
2495 | if (VT == MVT::v8i8) | |||
2496 | return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); | |||
2497 | else if (VT == MVT::v16i8) | |||
2498 | return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); | |||
2499 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2500 | return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); | |||
2501 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2502 | return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); | |||
2503 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2504 | return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); | |||
2505 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2506 | return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); | |||
2507 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2508 | return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); | |||
2509 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2510 | return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); | |||
2511 | break; | |||
2512 | case Intrinsic::aarch64_neon_ld1x4: | |||
2513 | if (VT == MVT::v8i8) | |||
2514 | return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); | |||
2515 | else if (VT == MVT::v16i8) | |||
2516 | return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); | |||
2517 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2518 | return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); | |||
2519 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2520 | return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); | |||
2521 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2522 | return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); | |||
2523 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2524 | return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); | |||
2525 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2526 | return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); | |||
2527 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2528 | return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); | |||
2529 | break; | |||
2530 | case Intrinsic::aarch64_neon_ld2: | |||
2531 | if (VT == MVT::v8i8) | |||
2532 | return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); | |||
2533 | else if (VT == MVT::v16i8) | |||
2534 | return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); | |||
2535 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2536 | return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); | |||
2537 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2538 | return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); | |||
2539 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2540 | return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); | |||
2541 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2542 | return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); | |||
2543 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2544 | return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); | |||
2545 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2546 | return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); | |||
2547 | break; | |||
2548 | case Intrinsic::aarch64_neon_ld3: | |||
2549 | if (VT == MVT::v8i8) | |||
2550 | return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); | |||
2551 | else if (VT == MVT::v16i8) | |||
2552 | return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); | |||
2553 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2554 | return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); | |||
2555 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2556 | return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); | |||
2557 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2558 | return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); | |||
2559 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2560 | return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); | |||
2561 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2562 | return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); | |||
2563 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2564 | return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); | |||
2565 | break; | |||
2566 | case Intrinsic::aarch64_neon_ld4: | |||
2567 | if (VT == MVT::v8i8) | |||
2568 | return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); | |||
2569 | else if (VT == MVT::v16i8) | |||
2570 | return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); | |||
2571 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2572 | return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); | |||
2573 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2574 | return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); | |||
2575 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2576 | return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); | |||
2577 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2578 | return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); | |||
2579 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2580 | return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); | |||
2581 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2582 | return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); | |||
2583 | break; | |||
2584 | case Intrinsic::aarch64_neon_ld2r: | |||
2585 | if (VT == MVT::v8i8) | |||
2586 | return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); | |||
2587 | else if (VT == MVT::v16i8) | |||
2588 | return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); | |||
2589 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2590 | return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); | |||
2591 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2592 | return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); | |||
2593 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2594 | return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); | |||
2595 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2596 | return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); | |||
2597 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2598 | return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); | |||
2599 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2600 | return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); | |||
2601 | break; | |||
2602 | case Intrinsic::aarch64_neon_ld3r: | |||
2603 | if (VT == MVT::v8i8) | |||
2604 | return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); | |||
2605 | else if (VT == MVT::v16i8) | |||
2606 | return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); | |||
2607 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2608 | return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); | |||
2609 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2610 | return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); | |||
2611 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2612 | return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); | |||
2613 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2614 | return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); | |||
2615 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2616 | return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); | |||
2617 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2618 | return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); | |||
2619 | break; | |||
2620 | case Intrinsic::aarch64_neon_ld4r: | |||
2621 | if (VT == MVT::v8i8) | |||
2622 | return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); | |||
2623 | else if (VT == MVT::v16i8) | |||
2624 | return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); | |||
2625 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2626 | return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); | |||
2627 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2628 | return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); | |||
2629 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2630 | return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); | |||
2631 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2632 | return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); | |||
2633 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2634 | return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); | |||
2635 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2636 | return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); | |||
2637 | break; | |||
2638 | case Intrinsic::aarch64_neon_ld2lane: | |||
2639 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2640 | return SelectLoadLane(Node, 2, AArch64::LD2i8); | |||
2641 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2642 | VT == MVT::v8f16) | |||
2643 | return SelectLoadLane(Node, 2, AArch64::LD2i16); | |||
2644 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2645 | VT == MVT::v2f32) | |||
2646 | return SelectLoadLane(Node, 2, AArch64::LD2i32); | |||
2647 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2648 | VT == MVT::v1f64) | |||
2649 | return SelectLoadLane(Node, 2, AArch64::LD2i64); | |||
2650 | break; | |||
2651 | case Intrinsic::aarch64_neon_ld3lane: | |||
2652 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2653 | return SelectLoadLane(Node, 3, AArch64::LD3i8); | |||
2654 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2655 | VT == MVT::v8f16) | |||
2656 | return SelectLoadLane(Node, 3, AArch64::LD3i16); | |||
2657 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2658 | VT == MVT::v2f32) | |||
2659 | return SelectLoadLane(Node, 3, AArch64::LD3i32); | |||
2660 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2661 | VT == MVT::v1f64) | |||
2662 | return SelectLoadLane(Node, 3, AArch64::LD3i64); | |||
2663 | break; | |||
2664 | case Intrinsic::aarch64_neon_ld4lane: | |||
2665 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2666 | return SelectLoadLane(Node, 4, AArch64::LD4i8); | |||
2667 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2668 | VT == MVT::v8f16) | |||
2669 | return SelectLoadLane(Node, 4, AArch64::LD4i16); | |||
2670 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2671 | VT == MVT::v2f32) | |||
2672 | return SelectLoadLane(Node, 4, AArch64::LD4i32); | |||
2673 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2674 | VT == MVT::v1f64) | |||
2675 | return SelectLoadLane(Node, 4, AArch64::LD4i64); | |||
2676 | break; | |||
2677 | } | |||
2678 | } break; | |||
2679 | case ISD::INTRINSIC_WO_CHAIN: { | |||
2680 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); | |||
2681 | switch (IntNo) { | |||
2682 | default: | |||
2683 | break; | |||
2684 | case Intrinsic::aarch64_neon_tbl2: | |||
2685 | return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two | |||
2686 | : AArch64::TBLv16i8Two, | |||
2687 | false); | |||
2688 | case Intrinsic::aarch64_neon_tbl3: | |||
2689 | return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three | |||
2690 | : AArch64::TBLv16i8Three, | |||
2691 | false); | |||
2692 | case Intrinsic::aarch64_neon_tbl4: | |||
2693 | return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four | |||
2694 | : AArch64::TBLv16i8Four, | |||
2695 | false); | |||
2696 | case Intrinsic::aarch64_neon_tbx2: | |||
2697 | return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two | |||
2698 | : AArch64::TBXv16i8Two, | |||
2699 | true); | |||
2700 | case Intrinsic::aarch64_neon_tbx3: | |||
2701 | return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three | |||
2702 | : AArch64::TBXv16i8Three, | |||
2703 | true); | |||
2704 | case Intrinsic::aarch64_neon_tbx4: | |||
2705 | return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four | |||
2706 | : AArch64::TBXv16i8Four, | |||
2707 | true); | |||
2708 | case Intrinsic::aarch64_neon_smull: | |||
2709 | case Intrinsic::aarch64_neon_umull: | |||
2710 | if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) | |||
2711 | return N; | |||
2712 | break; | |||
2713 | } | |||
2714 | break; | |||
2715 | } | |||
2716 | case ISD::INTRINSIC_VOID: { | |||
2717 | unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); | |||
2718 | if (Node->getNumOperands() >= 3) | |||
2719 | VT = Node->getOperand(2)->getValueType(0); | |||
2720 | switch (IntNo) { | |||
2721 | default: | |||
2722 | break; | |||
2723 | case Intrinsic::aarch64_neon_st1x2: { | |||
2724 | if (VT == MVT::v8i8) | |||
2725 | return SelectStore(Node, 2, AArch64::ST1Twov8b); | |||
2726 | else if (VT == MVT::v16i8) | |||
2727 | return SelectStore(Node, 2, AArch64::ST1Twov16b); | |||
2728 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2729 | return SelectStore(Node, 2, AArch64::ST1Twov4h); | |||
2730 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2731 | return SelectStore(Node, 2, AArch64::ST1Twov8h); | |||
2732 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2733 | return SelectStore(Node, 2, AArch64::ST1Twov2s); | |||
2734 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2735 | return SelectStore(Node, 2, AArch64::ST1Twov4s); | |||
2736 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2737 | return SelectStore(Node, 2, AArch64::ST1Twov2d); | |||
2738 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2739 | return SelectStore(Node, 2, AArch64::ST1Twov1d); | |||
2740 | break; | |||
2741 | } | |||
2742 | case Intrinsic::aarch64_neon_st1x3: { | |||
2743 | if (VT == MVT::v8i8) | |||
2744 | return SelectStore(Node, 3, AArch64::ST1Threev8b); | |||
2745 | else if (VT == MVT::v16i8) | |||
2746 | return SelectStore(Node, 3, AArch64::ST1Threev16b); | |||
2747 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2748 | return SelectStore(Node, 3, AArch64::ST1Threev4h); | |||
2749 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2750 | return SelectStore(Node, 3, AArch64::ST1Threev8h); | |||
2751 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2752 | return SelectStore(Node, 3, AArch64::ST1Threev2s); | |||
2753 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2754 | return SelectStore(Node, 3, AArch64::ST1Threev4s); | |||
2755 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2756 | return SelectStore(Node, 3, AArch64::ST1Threev2d); | |||
2757 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2758 | return SelectStore(Node, 3, AArch64::ST1Threev1d); | |||
2759 | break; | |||
2760 | } | |||
2761 | case Intrinsic::aarch64_neon_st1x4: { | |||
2762 | if (VT == MVT::v8i8) | |||
2763 | return SelectStore(Node, 4, AArch64::ST1Fourv8b); | |||
2764 | else if (VT == MVT::v16i8) | |||
2765 | return SelectStore(Node, 4, AArch64::ST1Fourv16b); | |||
2766 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2767 | return SelectStore(Node, 4, AArch64::ST1Fourv4h); | |||
2768 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2769 | return SelectStore(Node, 4, AArch64::ST1Fourv8h); | |||
2770 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2771 | return SelectStore(Node, 4, AArch64::ST1Fourv2s); | |||
2772 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2773 | return SelectStore(Node, 4, AArch64::ST1Fourv4s); | |||
2774 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2775 | return SelectStore(Node, 4, AArch64::ST1Fourv2d); | |||
2776 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2777 | return SelectStore(Node, 4, AArch64::ST1Fourv1d); | |||
2778 | break; | |||
2779 | } | |||
2780 | case Intrinsic::aarch64_neon_st2: { | |||
2781 | if (VT == MVT::v8i8) | |||
2782 | return SelectStore(Node, 2, AArch64::ST2Twov8b); | |||
2783 | else if (VT == MVT::v16i8) | |||
2784 | return SelectStore(Node, 2, AArch64::ST2Twov16b); | |||
2785 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2786 | return SelectStore(Node, 2, AArch64::ST2Twov4h); | |||
2787 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2788 | return SelectStore(Node, 2, AArch64::ST2Twov8h); | |||
2789 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2790 | return SelectStore(Node, 2, AArch64::ST2Twov2s); | |||
2791 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2792 | return SelectStore(Node, 2, AArch64::ST2Twov4s); | |||
2793 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2794 | return SelectStore(Node, 2, AArch64::ST2Twov2d); | |||
2795 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2796 | return SelectStore(Node, 2, AArch64::ST1Twov1d); | |||
2797 | break; | |||
2798 | } | |||
2799 | case Intrinsic::aarch64_neon_st3: { | |||
2800 | if (VT == MVT::v8i8) | |||
2801 | return SelectStore(Node, 3, AArch64::ST3Threev8b); | |||
2802 | else if (VT == MVT::v16i8) | |||
2803 | return SelectStore(Node, 3, AArch64::ST3Threev16b); | |||
2804 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2805 | return SelectStore(Node, 3, AArch64::ST3Threev4h); | |||
2806 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2807 | return SelectStore(Node, 3, AArch64::ST3Threev8h); | |||
2808 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2809 | return SelectStore(Node, 3, AArch64::ST3Threev2s); | |||
2810 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2811 | return SelectStore(Node, 3, AArch64::ST3Threev4s); | |||
2812 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2813 | return SelectStore(Node, 3, AArch64::ST3Threev2d); | |||
2814 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2815 | return SelectStore(Node, 3, AArch64::ST1Threev1d); | |||
2816 | break; | |||
2817 | } | |||
2818 | case Intrinsic::aarch64_neon_st4: { | |||
2819 | if (VT == MVT::v8i8) | |||
2820 | return SelectStore(Node, 4, AArch64::ST4Fourv8b); | |||
2821 | else if (VT == MVT::v16i8) | |||
2822 | return SelectStore(Node, 4, AArch64::ST4Fourv16b); | |||
2823 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2824 | return SelectStore(Node, 4, AArch64::ST4Fourv4h); | |||
2825 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2826 | return SelectStore(Node, 4, AArch64::ST4Fourv8h); | |||
2827 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2828 | return SelectStore(Node, 4, AArch64::ST4Fourv2s); | |||
2829 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2830 | return SelectStore(Node, 4, AArch64::ST4Fourv4s); | |||
2831 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2832 | return SelectStore(Node, 4, AArch64::ST4Fourv2d); | |||
2833 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2834 | return SelectStore(Node, 4, AArch64::ST1Fourv1d); | |||
2835 | break; | |||
2836 | } | |||
2837 | case Intrinsic::aarch64_neon_st2lane: { | |||
2838 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2839 | return SelectStoreLane(Node, 2, AArch64::ST2i8); | |||
2840 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2841 | VT == MVT::v8f16) | |||
2842 | return SelectStoreLane(Node, 2, AArch64::ST2i16); | |||
2843 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2844 | VT == MVT::v2f32) | |||
2845 | return SelectStoreLane(Node, 2, AArch64::ST2i32); | |||
2846 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2847 | VT == MVT::v1f64) | |||
2848 | return SelectStoreLane(Node, 2, AArch64::ST2i64); | |||
2849 | break; | |||
2850 | } | |||
2851 | case Intrinsic::aarch64_neon_st3lane: { | |||
2852 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2853 | return SelectStoreLane(Node, 3, AArch64::ST3i8); | |||
2854 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2855 | VT == MVT::v8f16) | |||
2856 | return SelectStoreLane(Node, 3, AArch64::ST3i16); | |||
2857 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2858 | VT == MVT::v2f32) | |||
2859 | return SelectStoreLane(Node, 3, AArch64::ST3i32); | |||
2860 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2861 | VT == MVT::v1f64) | |||
2862 | return SelectStoreLane(Node, 3, AArch64::ST3i64); | |||
2863 | break; | |||
2864 | } | |||
2865 | case Intrinsic::aarch64_neon_st4lane: { | |||
2866 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
2867 | return SelectStoreLane(Node, 4, AArch64::ST4i8); | |||
2868 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
2869 | VT == MVT::v8f16) | |||
2870 | return SelectStoreLane(Node, 4, AArch64::ST4i16); | |||
2871 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
2872 | VT == MVT::v2f32) | |||
2873 | return SelectStoreLane(Node, 4, AArch64::ST4i32); | |||
2874 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
2875 | VT == MVT::v1f64) | |||
2876 | return SelectStoreLane(Node, 4, AArch64::ST4i64); | |||
2877 | break; | |||
2878 | } | |||
2879 | } | |||
2880 | break; | |||
2881 | } | |||
2882 | case AArch64ISD::LD2post: { | |||
2883 | if (VT == MVT::v8i8) | |||
2884 | return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); | |||
2885 | else if (VT == MVT::v16i8) | |||
2886 | return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); | |||
2887 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2888 | return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); | |||
2889 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2890 | return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); | |||
2891 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2892 | return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); | |||
2893 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2894 | return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); | |||
2895 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2896 | return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); | |||
2897 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2898 | return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); | |||
2899 | break; | |||
2900 | } | |||
2901 | case AArch64ISD::LD3post: { | |||
2902 | if (VT == MVT::v8i8) | |||
2903 | return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); | |||
2904 | else if (VT == MVT::v16i8) | |||
2905 | return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); | |||
2906 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2907 | return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); | |||
2908 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2909 | return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); | |||
2910 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2911 | return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); | |||
2912 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2913 | return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); | |||
2914 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2915 | return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); | |||
2916 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2917 | return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); | |||
2918 | break; | |||
2919 | } | |||
2920 | case AArch64ISD::LD4post: { | |||
2921 | if (VT == MVT::v8i8) | |||
2922 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); | |||
2923 | else if (VT == MVT::v16i8) | |||
2924 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); | |||
2925 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2926 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); | |||
2927 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2928 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); | |||
2929 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2930 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); | |||
2931 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2932 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); | |||
2933 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2934 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); | |||
2935 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2936 | return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); | |||
2937 | break; | |||
2938 | } | |||
2939 | case AArch64ISD::LD1x2post: { | |||
2940 | if (VT == MVT::v8i8) | |||
2941 | return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); | |||
2942 | else if (VT == MVT::v16i8) | |||
2943 | return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); | |||
2944 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2945 | return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); | |||
2946 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2947 | return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); | |||
2948 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2949 | return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); | |||
2950 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2951 | return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); | |||
2952 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2953 | return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); | |||
2954 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2955 | return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); | |||
2956 | break; | |||
2957 | } | |||
2958 | case AArch64ISD::LD1x3post: { | |||
2959 | if (VT == MVT::v8i8) | |||
2960 | return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); | |||
2961 | else if (VT == MVT::v16i8) | |||
2962 | return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); | |||
2963 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2964 | return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); | |||
2965 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2966 | return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); | |||
2967 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2968 | return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); | |||
2969 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2970 | return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); | |||
2971 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2972 | return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); | |||
2973 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2974 | return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); | |||
2975 | break; | |||
2976 | } | |||
2977 | case AArch64ISD::LD1x4post: { | |||
2978 | if (VT == MVT::v8i8) | |||
2979 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); | |||
2980 | else if (VT == MVT::v16i8) | |||
2981 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); | |||
2982 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
2983 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); | |||
2984 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
2985 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); | |||
2986 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
2987 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); | |||
2988 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
2989 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); | |||
2990 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
2991 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); | |||
2992 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
2993 | return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); | |||
2994 | break; | |||
2995 | } | |||
2996 | case AArch64ISD::LD1DUPpost: { | |||
2997 | if (VT == MVT::v8i8) | |||
2998 | return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); | |||
2999 | else if (VT == MVT::v16i8) | |||
3000 | return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); | |||
3001 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3002 | return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); | |||
3003 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3004 | return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); | |||
3005 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3006 | return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); | |||
3007 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3008 | return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); | |||
3009 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3010 | return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); | |||
3011 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3012 | return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); | |||
3013 | break; | |||
3014 | } | |||
3015 | case AArch64ISD::LD2DUPpost: { | |||
3016 | if (VT == MVT::v8i8) | |||
3017 | return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); | |||
3018 | else if (VT == MVT::v16i8) | |||
3019 | return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); | |||
3020 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3021 | return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); | |||
3022 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3023 | return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); | |||
3024 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3025 | return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); | |||
3026 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3027 | return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); | |||
3028 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3029 | return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); | |||
3030 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3031 | return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); | |||
3032 | break; | |||
3033 | } | |||
3034 | case AArch64ISD::LD3DUPpost: { | |||
3035 | if (VT == MVT::v8i8) | |||
3036 | return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); | |||
3037 | else if (VT == MVT::v16i8) | |||
3038 | return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); | |||
3039 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3040 | return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); | |||
3041 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3042 | return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); | |||
3043 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3044 | return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); | |||
3045 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3046 | return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); | |||
3047 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3048 | return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); | |||
3049 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3050 | return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); | |||
3051 | break; | |||
3052 | } | |||
3053 | case AArch64ISD::LD4DUPpost: { | |||
3054 | if (VT == MVT::v8i8) | |||
3055 | return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); | |||
3056 | else if (VT == MVT::v16i8) | |||
3057 | return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); | |||
3058 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3059 | return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); | |||
3060 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3061 | return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); | |||
3062 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3063 | return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); | |||
3064 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3065 | return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); | |||
3066 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3067 | return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); | |||
3068 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3069 | return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); | |||
3070 | break; | |||
3071 | } | |||
3072 | case AArch64ISD::LD1LANEpost: { | |||
3073 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3074 | return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); | |||
3075 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3076 | VT == MVT::v8f16) | |||
3077 | return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); | |||
3078 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3079 | VT == MVT::v2f32) | |||
3080 | return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); | |||
3081 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3082 | VT == MVT::v1f64) | |||
3083 | return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); | |||
3084 | break; | |||
3085 | } | |||
3086 | case AArch64ISD::LD2LANEpost: { | |||
3087 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3088 | return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); | |||
3089 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3090 | VT == MVT::v8f16) | |||
3091 | return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); | |||
3092 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3093 | VT == MVT::v2f32) | |||
3094 | return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); | |||
3095 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3096 | VT == MVT::v1f64) | |||
3097 | return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); | |||
3098 | break; | |||
3099 | } | |||
3100 | case AArch64ISD::LD3LANEpost: { | |||
3101 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3102 | return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); | |||
3103 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3104 | VT == MVT::v8f16) | |||
3105 | return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); | |||
3106 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3107 | VT == MVT::v2f32) | |||
3108 | return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); | |||
3109 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3110 | VT == MVT::v1f64) | |||
3111 | return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); | |||
3112 | break; | |||
3113 | } | |||
3114 | case AArch64ISD::LD4LANEpost: { | |||
3115 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3116 | return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); | |||
3117 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3118 | VT == MVT::v8f16) | |||
3119 | return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); | |||
3120 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3121 | VT == MVT::v2f32) | |||
3122 | return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); | |||
3123 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3124 | VT == MVT::v1f64) | |||
3125 | return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); | |||
3126 | break; | |||
3127 | } | |||
3128 | case AArch64ISD::ST2post: { | |||
3129 | VT = Node->getOperand(1).getValueType(); | |||
3130 | if (VT == MVT::v8i8) | |||
3131 | return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); | |||
3132 | else if (VT == MVT::v16i8) | |||
3133 | return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); | |||
3134 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3135 | return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); | |||
3136 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3137 | return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); | |||
3138 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3139 | return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); | |||
3140 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3141 | return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); | |||
3142 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3143 | return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); | |||
3144 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3145 | return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); | |||
3146 | break; | |||
3147 | } | |||
3148 | case AArch64ISD::ST3post: { | |||
3149 | VT = Node->getOperand(1).getValueType(); | |||
3150 | if (VT == MVT::v8i8) | |||
3151 | return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); | |||
3152 | else if (VT == MVT::v16i8) | |||
3153 | return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); | |||
3154 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3155 | return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); | |||
3156 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3157 | return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); | |||
3158 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3159 | return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); | |||
3160 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3161 | return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); | |||
3162 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3163 | return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); | |||
3164 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3165 | return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); | |||
3166 | break; | |||
3167 | } | |||
3168 | case AArch64ISD::ST4post: { | |||
3169 | VT = Node->getOperand(1).getValueType(); | |||
3170 | if (VT == MVT::v8i8) | |||
3171 | return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); | |||
3172 | else if (VT == MVT::v16i8) | |||
3173 | return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); | |||
3174 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3175 | return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); | |||
3176 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3177 | return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); | |||
3178 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3179 | return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); | |||
3180 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3181 | return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); | |||
3182 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3183 | return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); | |||
3184 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3185 | return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); | |||
3186 | break; | |||
3187 | } | |||
3188 | case AArch64ISD::ST1x2post: { | |||
3189 | VT = Node->getOperand(1).getValueType(); | |||
3190 | if (VT == MVT::v8i8) | |||
3191 | return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); | |||
3192 | else if (VT == MVT::v16i8) | |||
3193 | return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); | |||
3194 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3195 | return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); | |||
3196 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3197 | return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); | |||
3198 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3199 | return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); | |||
3200 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3201 | return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); | |||
3202 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3203 | return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); | |||
3204 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3205 | return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); | |||
3206 | break; | |||
3207 | } | |||
3208 | case AArch64ISD::ST1x3post: { | |||
3209 | VT = Node->getOperand(1).getValueType(); | |||
3210 | if (VT == MVT::v8i8) | |||
3211 | return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); | |||
3212 | else if (VT == MVT::v16i8) | |||
3213 | return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); | |||
3214 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3215 | return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); | |||
3216 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3217 | return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); | |||
3218 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3219 | return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); | |||
3220 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3221 | return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); | |||
3222 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3223 | return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); | |||
3224 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3225 | return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); | |||
3226 | break; | |||
3227 | } | |||
3228 | case AArch64ISD::ST1x4post: { | |||
3229 | VT = Node->getOperand(1).getValueType(); | |||
3230 | if (VT == MVT::v8i8) | |||
3231 | return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); | |||
3232 | else if (VT == MVT::v16i8) | |||
3233 | return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); | |||
3234 | else if (VT == MVT::v4i16 || VT == MVT::v4f16) | |||
3235 | return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); | |||
3236 | else if (VT == MVT::v8i16 || VT == MVT::v8f16) | |||
3237 | return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); | |||
3238 | else if (VT == MVT::v2i32 || VT == MVT::v2f32) | |||
3239 | return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); | |||
3240 | else if (VT == MVT::v4i32 || VT == MVT::v4f32) | |||
3241 | return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); | |||
3242 | else if (VT == MVT::v1i64 || VT == MVT::v1f64) | |||
3243 | return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); | |||
3244 | else if (VT == MVT::v2i64 || VT == MVT::v2f64) | |||
3245 | return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); | |||
3246 | break; | |||
3247 | } | |||
3248 | case AArch64ISD::ST2LANEpost: { | |||
3249 | VT = Node->getOperand(1).getValueType(); | |||
3250 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3251 | return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); | |||
3252 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3253 | VT == MVT::v8f16) | |||
3254 | return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); | |||
3255 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3256 | VT == MVT::v2f32) | |||
3257 | return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); | |||
3258 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3259 | VT == MVT::v1f64) | |||
3260 | return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); | |||
3261 | break; | |||
3262 | } | |||
3263 | case AArch64ISD::ST3LANEpost: { | |||
3264 | VT = Node->getOperand(1).getValueType(); | |||
3265 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3266 | return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); | |||
3267 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3268 | VT == MVT::v8f16) | |||
3269 | return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); | |||
3270 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3271 | VT == MVT::v2f32) | |||
3272 | return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); | |||
3273 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3274 | VT == MVT::v1f64) | |||
3275 | return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); | |||
3276 | break; | |||
3277 | } | |||
3278 | case AArch64ISD::ST4LANEpost: { | |||
3279 | VT = Node->getOperand(1).getValueType(); | |||
3280 | if (VT == MVT::v16i8 || VT == MVT::v8i8) | |||
3281 | return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); | |||
3282 | else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || | |||
3283 | VT == MVT::v8f16) | |||
3284 | return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); | |||
3285 | else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || | |||
3286 | VT == MVT::v2f32) | |||
3287 | return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); | |||
3288 | else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || | |||
3289 | VT == MVT::v1f64) | |||
3290 | return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); | |||
3291 | break; | |||
3292 | } | |||
3293 | } | |||
3294 | ||||
3295 | // Select the default instruction | |||
3296 | ResNode = SelectCode(Node); | |||
3297 | ||||
3298 | DEBUG(errs() << "=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "=> "; } } while (0); | |||
3299 | if (ResNode == nullptr || ResNode == Node) | |||
3300 | DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { Node->dump(CurDAG); } } while (0); | |||
3301 | else | |||
3302 | DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { ResNode->dump(CurDAG); } } while (0); | |||
3303 | DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { errs() << "\n"; } } while (0); | |||
3304 | ||||
3305 | return ResNode; | |||
3306 | } | |||
3307 | ||||
3308 | /// createAArch64ISelDag - This pass converts a legalized DAG into a | |||
3309 | /// AArch64-specific DAG, ready for instruction scheduling. | |||
3310 | FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, | |||
3311 | CodeGenOpt::Level OptLevel) { | |||
3312 | return new AArch64DAGToDAGISel(TM, OptLevel); | |||
3313 | } |