Line data Source code
1 : //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines an instruction selector for the ARM target.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "ARM.h"
15 : #include "ARMBaseInstrInfo.h"
16 : #include "ARMTargetMachine.h"
17 : #include "MCTargetDesc/ARMAddressingModes.h"
18 : #include "Utils/ARMBaseInfo.h"
19 : #include "llvm/ADT/StringSwitch.h"
20 : #include "llvm/CodeGen/MachineFrameInfo.h"
21 : #include "llvm/CodeGen/MachineFunction.h"
22 : #include "llvm/CodeGen/MachineInstrBuilder.h"
23 : #include "llvm/CodeGen/MachineRegisterInfo.h"
24 : #include "llvm/CodeGen/SelectionDAG.h"
25 : #include "llvm/CodeGen/SelectionDAGISel.h"
26 : #include "llvm/CodeGen/TargetLowering.h"
27 : #include "llvm/IR/CallingConv.h"
28 : #include "llvm/IR/Constants.h"
29 : #include "llvm/IR/DerivedTypes.h"
30 : #include "llvm/IR/Function.h"
31 : #include "llvm/IR/Intrinsics.h"
32 : #include "llvm/IR/LLVMContext.h"
33 : #include "llvm/Support/CommandLine.h"
34 : #include "llvm/Support/Debug.h"
35 : #include "llvm/Support/ErrorHandling.h"
36 : #include "llvm/Target/TargetOptions.h"
37 :
38 : using namespace llvm;
39 :
40 : #define DEBUG_TYPE "arm-isel"
41 :
42 : static cl::opt<bool>
43 : DisableShifterOp("disable-shifter-op", cl::Hidden,
44 : cl::desc("Disable isel of shifter-op"),
45 : cl::init(false));
46 :
47 : //===--------------------------------------------------------------------===//
48 : /// ARMDAGToDAGISel - ARM specific code to select ARM machine
49 : /// instructions for SelectionDAG operations.
50 : ///
51 : namespace {
52 :
53 : class ARMDAGToDAGISel : public SelectionDAGISel {
54 : /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
55 : /// make the right decision when generating code for different targets.
56 : const ARMSubtarget *Subtarget;
57 :
58 : public:
59 : explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
60 2801 : : SelectionDAGISel(tm, OptLevel) {}
61 :
62 14446 : bool runOnMachineFunction(MachineFunction &MF) override {
63 : // Reset the subtarget each time through.
64 14446 : Subtarget = &MF.getSubtarget<ARMSubtarget>();
65 14446 : SelectionDAGISel::runOnMachineFunction(MF);
66 14419 : return true;
67 : }
68 :
69 5 : StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 :
71 : void PreprocessISelDAG() override;
72 :
73 : /// getI32Imm - Return a target constant of type i32 with the specified
74 : /// value.
75 0 : inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
76 0 : return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
77 : }
78 :
79 : void Select(SDNode *N) override;
80 :
81 : bool hasNoVMLxHazardUse(SDNode *N) const;
82 : bool isShifterOpProfitable(const SDValue &Shift,
83 : ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
84 : bool SelectRegShifterOperand(SDValue N, SDValue &A,
85 : SDValue &B, SDValue &C,
86 : bool CheckProfitability = true);
87 : bool SelectImmShifterOperand(SDValue N, SDValue &A,
88 : SDValue &B, bool CheckProfitability = true);
89 : bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
90 : SDValue &B, SDValue &C) {
91 : // Don't apply the profitability check
92 428 : return SelectRegShifterOperand(N, A, B, C, false);
93 : }
94 : bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
95 : SDValue &B) {
96 : // Don't apply the profitability check
97 6874 : return SelectImmShifterOperand(N, A, B, false);
98 : }
99 :
100 : bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 :
102 : bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
103 : bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 :
105 0 : bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
106 : const ConstantSDNode *CN = cast<ConstantSDNode>(N);
107 0 : Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
108 0 : Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
109 0 : return true;
110 : }
111 :
112 : bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
113 : SDValue &Offset, SDValue &Opc);
114 : bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
115 : SDValue &Offset, SDValue &Opc);
116 : bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
117 : SDValue &Offset, SDValue &Opc);
118 : bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
119 : bool SelectAddrMode3(SDValue N, SDValue &Base,
120 : SDValue &Offset, SDValue &Opc);
121 : bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
122 : SDValue &Offset, SDValue &Opc);
123 : bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
124 : int Lwb, int Upb, bool FP16);
125 : bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
126 : bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
127 : bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
128 : bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
129 :
130 : bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
131 :
132 : // Thumb Addressing Modes:
133 : bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
134 : bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135 : SDValue &OffImm);
136 : bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137 : SDValue &OffImm);
138 : bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139 : SDValue &OffImm);
140 : bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141 : SDValue &OffImm);
142 : bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 :
144 : // Thumb 2 Addressing Modes:
145 : bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
146 : bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
147 : SDValue &OffImm);
148 : bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
149 : SDValue &OffImm);
150 : bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
151 : SDValue &OffReg, SDValue &ShImm);
152 : bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
153 :
154 : inline bool is_so_imm(unsigned Imm) const {
155 : return ARM_AM::getSOImmVal(Imm) != -1;
156 : }
157 :
158 : inline bool is_so_imm_not(unsigned Imm) const {
159 : return ARM_AM::getSOImmVal(~Imm) != -1;
160 : }
161 :
162 0 : inline bool is_t2_so_imm(unsigned Imm) const {
163 440 : return ARM_AM::getT2SOImmVal(Imm) != -1;
164 : }
165 :
166 0 : inline bool is_t2_so_imm_not(unsigned Imm) const {
167 217 : return ARM_AM::getT2SOImmVal(~Imm) != -1;
168 : }
169 :
170 : // Include the pieces autogenerated from the target description.
171 : #include "ARMGenDAGISel.inc"
172 :
173 : private:
174 : void transferMemOperands(SDNode *Src, SDNode *Dst);
175 :
176 : /// Indexed (pre/post inc/dec) load matching code for ARM.
177 : bool tryARMIndexedLoad(SDNode *N);
178 : bool tryT1IndexedLoad(SDNode *N);
179 : bool tryT2IndexedLoad(SDNode *N);
180 :
181 : /// SelectVLD - Select NEON load intrinsics. NumVecs should be
182 : /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
183 : /// loads of D registers and even subregs and odd subregs of Q registers.
184 : /// For NumVecs <= 2, QOpcodes1 is not used.
185 : void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
186 : const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
187 : const uint16_t *QOpcodes1);
188 :
189 : /// SelectVST - Select NEON store intrinsics. NumVecs should
190 : /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
191 : /// stores of D registers and even subregs and odd subregs of Q registers.
192 : /// For NumVecs <= 2, QOpcodes1 is not used.
193 : void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
194 : const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
195 : const uint16_t *QOpcodes1);
196 :
197 : /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
198 : /// be 2, 3 or 4. The opcode arrays specify the instructions used for
199 : /// load/store of D registers and Q registers.
200 : void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
201 : unsigned NumVecs, const uint16_t *DOpcodes,
202 : const uint16_t *QOpcodes);
203 :
204 : /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
205 : /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
206 : /// for loading D registers.
207 : void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
208 : unsigned NumVecs, const uint16_t *DOpcodes,
209 : const uint16_t *QOpcodes0 = nullptr,
210 : const uint16_t *QOpcodes1 = nullptr);
211 :
212 : /// Try to select SBFX/UBFX instructions for ARM.
213 : bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
214 :
215 : // Select special operations if node forms integer ABS pattern
216 : bool tryABSOp(SDNode *N);
217 :
218 : bool tryReadRegister(SDNode *N);
219 : bool tryWriteRegister(SDNode *N);
220 :
221 : bool tryInlineAsm(SDNode *N);
222 :
223 : void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
224 :
225 : void SelectCMP_SWAP(SDNode *N);
226 :
227 : /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
228 : /// inline asm expressions.
229 : bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
230 : std::vector<SDValue> &OutOps) override;
231 :
232 : // Form pairs of consecutive R, S, D, or Q registers.
233 : SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
234 : SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
235 : SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
236 : SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
237 :
238 : // Form sequences of 4 consecutive S, D, or Q registers.
239 : SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
240 : SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
241 : SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
242 :
243 : // Get the alignment operand for a NEON VLD or VST instruction.
244 : SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
245 : bool is64BitVector);
246 :
247 : /// Returns the number of instructions required to materialize the given
248 : /// constant in a register, or 3 if a literal pool load is needed.
249 : unsigned ConstantMaterializationCost(unsigned Val) const;
250 :
251 : /// Checks if N is a multiplication by a constant where we can extract out a
252 : /// power of two from the constant so that it can be used in a shift, but only
253 : /// if it simplifies the materialization of the constant. Returns true if it
254 : /// is, and assigns to PowerOfTwo the power of two that should be extracted
255 : /// out and to NewMulConst the new constant to be multiplied by.
256 : bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
257 : unsigned &PowerOfTwo, SDValue &NewMulConst) const;
258 :
259 : /// Replace N with M in CurDAG, in a way that also ensures that M gets
260 : /// selected when N would have been selected.
261 : void replaceDAGValue(const SDValue &N, SDValue M);
262 : };
263 : }
264 :
265 : /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
266 : /// operand. If so Imm will receive the 32-bit value.
267 : static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
268 853 : if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
269 975 : Imm = cast<ConstantSDNode>(N)->getZExtValue();
270 : return true;
271 : }
272 : return false;
273 : }
274 :
275 : // isInt32Immediate - This method tests to see if a constant operand.
276 : // If so Imm will receive the 32 bit value.
277 0 : static bool isInt32Immediate(SDValue N, unsigned &Imm) {
278 0 : return isInt32Immediate(N.getNode(), Imm);
279 : }
280 :
281 : // isOpcWithIntImmediate - This method tests to see if the node is a specific
282 : // opcode and that it has a immediate integer right operand.
283 : // If so Imm will receive the 32 bit value.
284 : static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
285 7550 : return N->getOpcode() == Opc &&
286 998 : isInt32Immediate(N->getOperand(1).getNode(), Imm);
287 : }
288 :
289 : /// Check whether a particular node is a constant value representable as
290 : /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
291 : ///
292 : /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
293 0 : static bool isScaledConstantInRange(SDValue Node, int Scale,
294 : int RangeMin, int RangeMax,
295 : int &ScaledConstant) {
296 : assert(Scale > 0 && "Invalid scale!");
297 :
298 : // Check that this is a constant.
299 : const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
300 : if (!C)
301 0 : return false;
302 :
303 3349 : ScaledConstant = (int) C->getZExtValue();
304 264 : if ((ScaledConstant % Scale) != 0)
305 0 : return false;
306 :
307 264 : ScaledConstant /= Scale;
308 3349 : return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
309 : }
310 :
311 18821 : void ARMDAGToDAGISel::PreprocessISelDAG() {
312 18821 : if (!Subtarget->hasV6T2Ops())
313 : return;
314 :
315 12329 : bool isThumb2 = Subtarget->isThumb();
316 12329 : for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
317 235378 : E = CurDAG->allnodes_end(); I != E; ) {
318 : SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
319 :
320 223049 : if (N->getOpcode() != ISD::ADD)
321 223046 : continue;
322 :
323 : // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
324 : // leading zeros, followed by consecutive set bits, followed by 1 or 2
325 : // trailing zeros, e.g. 1020.
326 : // Transform the expression to
327 : // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
328 : // of trailing zeros of c2. The left shift would be folded as an shifter
329 : // operand of 'add' and the 'and' and 'srl' would become a bits extraction
330 : // node (UBFX).
331 :
332 6160 : SDValue N0 = N->getOperand(0);
333 6160 : SDValue N1 = N->getOperand(1);
334 : unsigned And_imm = 0;
335 6160 : if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
336 : if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
337 : std::swap(N0, N1);
338 : }
339 6160 : if (!And_imm)
340 : continue;
341 :
342 : // Check if the AND mask is an immediate of the form: 000.....1111111100
343 91 : unsigned TZ = countTrailingZeros(And_imm);
344 91 : if (TZ != 1 && TZ != 2)
345 : // Be conservative here. Shifter operands aren't always free. e.g. On
346 : // Swift, left shifter operand of 1 / 2 for free but others are not.
347 : // e.g.
348 : // ubfx r3, r1, #16, #8
349 : // ldr.w r3, [r0, r3, lsl #2]
350 : // vs.
351 : // mov.w r9, #1020
352 : // and.w r2, r9, r1, lsr #14
353 : // ldr r2, [r0, r2]
354 : continue;
355 9 : And_imm >>= TZ;
356 9 : if (And_imm & (And_imm + 1))
357 : continue;
358 :
359 : // Look for (and (srl X, c1), c2).
360 9 : SDValue Srl = N1.getOperand(0);
361 : unsigned Srl_imm = 0;
362 9 : if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
363 : (Srl_imm <= 2))
364 : continue;
365 :
366 : // Make sure first operand is not a shifter operand which would prevent
367 : // folding of the left shift.
368 3 : SDValue CPTmp0;
369 3 : SDValue CPTmp1;
370 3 : SDValue CPTmp2;
371 3 : if (isThumb2) {
372 0 : if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
373 : continue;
374 : } else {
375 6 : if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
376 3 : SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
377 0 : continue;
378 : }
379 :
380 : // Now make the transformation.
381 6 : Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
382 : Srl.getOperand(0),
383 6 : CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
384 9 : MVT::i32));
385 6 : N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
386 : Srl,
387 9 : CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
388 6 : N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
389 9 : N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
390 3 : CurDAG->UpdateNodeOperands(N, N0, N1);
391 : }
392 : }
393 :
394 : /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
395 : /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
396 : /// least on current ARM implementations) which should be avoidded.
397 647 : bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
398 647 : if (OptLevel == CodeGenOpt::None)
399 : return true;
400 :
401 631 : if (!Subtarget->hasVMLxHazards())
402 : return true;
403 :
404 : if (!N->hasOneUse())
405 : return false;
406 :
407 : SDNode *Use = *N->use_begin();
408 172 : if (Use->getOpcode() == ISD::CopyToReg)
409 : return true;
410 161 : if (Use->isMachineOpcode()) {
411 : const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
412 322 : CurDAG->getSubtarget().getInstrInfo());
413 :
414 322 : const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
415 322 : if (MCID.mayStore())
416 : return true;
417 147 : unsigned Opcode = MCID.getOpcode();
418 147 : if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
419 : return true;
420 : // vmlx feeding into another vmlx. We actually want to unfold
421 : // the use later in the MLxExpansion pass. e.g.
422 : // vmla
423 : // vmla (stall 8 cycles)
424 : //
425 : // vmul (5 cycles)
426 : // vadd (5 cycles)
427 : // vmla
428 : // This adds up to about 18 - 19 cycles.
429 : //
430 : // vmla
431 : // vmul (stall 4 cycles)
432 : // vadd adds up to about 14 cycles.
433 72 : return TII->isFpMLxInstruction(Opcode);
434 : }
435 :
436 : return false;
437 : }
438 :
439 0 : bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
440 : ARM_AM::ShiftOpc ShOpcVal,
441 : unsigned ShAmt) {
442 0 : if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
443 0 : return true;
444 0 : if (Shift.hasOneUse())
445 0 : return true;
446 : // R << 2 is free.
447 0 : return ShOpcVal == ARM_AM::lsl &&
448 0 : (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
449 : }
450 :
451 0 : unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
452 0 : if (Subtarget->isThumb()) {
453 0 : if (Val <= 255) return 1; // MOV
454 0 : if (Subtarget->hasV6T2Ops() &&
455 0 : (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
456 0 : return 1; // MOVW
457 0 : if (Val <= 510) return 2; // MOV + ADDi8
458 0 : if (~Val <= 255) return 2; // MOV + MVN
459 0 : if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
460 : } else {
461 0 : if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
462 0 : if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
463 0 : if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
464 0 : if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
465 : }
466 0 : if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
467 : return 3; // Literal pool load
468 : }
469 :
470 196 : bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
471 : unsigned MaxShift,
472 : unsigned &PowerOfTwo,
473 : SDValue &NewMulConst) const {
474 : assert(N.getOpcode() == ISD::MUL);
475 : assert(MaxShift > 0);
476 :
477 : // If the multiply is used in more than one place then changing the constant
478 : // will make other uses incorrect, so don't.
479 196 : if (!N.hasOneUse()) return false;
480 : // Check if the multiply is by a constant
481 160 : ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
482 : if (!MulConst) return false;
483 : // If the constant is used in more than one place then modifying it will mean
484 : // we need to materialize two constants instead of one, which is a bad idea.
485 : if (!MulConst->hasOneUse()) return false;
486 60 : unsigned MulConstVal = MulConst->getZExtValue();
487 60 : if (MulConstVal == 0) return false;
488 :
489 : // Find the largest power of 2 that MulConstVal is a multiple of
490 60 : PowerOfTwo = MaxShift;
491 1427 : while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
492 1377 : --PowerOfTwo;
493 1377 : if (PowerOfTwo == 0) return false;
494 : }
495 :
496 : // Only optimise if the new cost is better
497 50 : unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
498 50 : NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
499 50 : unsigned OldCost = ConstantMaterializationCost(MulConstVal);
500 50 : unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
501 50 : return NewCost < OldCost;
502 : }
503 :
504 0 : void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
505 0 : CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
506 0 : ReplaceUses(N, M);
507 0 : }
508 :
509 15377 : bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
510 : SDValue &BaseReg,
511 : SDValue &Opc,
512 : bool CheckProfitability) {
513 15377 : if (DisableShifterOp)
514 : return false;
515 :
516 : // If N is a multiply-by-constant and it's profitable to extract a shift and
517 : // use it in a shifted operand do so.
518 30754 : if (N.getOpcode() == ISD::MUL) {
519 168 : unsigned PowerOfTwo = 0;
520 168 : SDValue NewMulConst;
521 168 : if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
522 36 : HandleSDNode Handle(N);
523 : SDLoc Loc(N);
524 36 : replaceDAGValue(N.getOperand(1), NewMulConst);
525 18 : BaseReg = Handle.getValue();
526 36 : Opc = CurDAG->getTargetConstant(
527 36 : ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
528 : return true;
529 : }
530 : }
531 :
532 15359 : ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
533 :
534 : // Don't match base register only case. That is matched to a separate
535 : // lower complexity pattern with explicit register operand.
536 1089 : if (ShOpcVal == ARM_AM::no_shift) return false;
537 :
538 1089 : BaseReg = N.getOperand(0);
539 : unsigned ShImmVal = 0;
540 : ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
541 : if (!RHS) return false;
542 1072 : ShImmVal = RHS->getZExtValue() & 31;
543 1072 : Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
544 1072 : SDLoc(N), MVT::i32);
545 1072 : return true;
546 : }
547 :
548 9175 : bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
549 : SDValue &BaseReg,
550 : SDValue &ShReg,
551 : SDValue &Opc,
552 : bool CheckProfitability) {
553 9175 : if (DisableShifterOp)
554 : return false;
555 :
556 9175 : ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
557 :
558 : // Don't match base register only case. That is matched to a separate
559 : // lower complexity pattern with explicit register operand.
560 885 : if (ShOpcVal == ARM_AM::no_shift) return false;
561 :
562 885 : BaseReg = N.getOperand(0);
563 : unsigned ShImmVal = 0;
564 : ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
565 : if (RHS) return false;
566 :
567 36 : ShReg = N.getOperand(1);
568 36 : if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
569 : return false;
570 36 : Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
571 36 : SDLoc(N), MVT::i32);
572 36 : return true;
573 : }
574 :
575 : // Determine whether an ISD::OR's operands are suitable to turn the operation
576 : // into an addition, which often has more compact encodings.
577 0 : bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
578 : assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
579 569 : Out = N;
580 1138 : return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
581 : }
582 :
583 :
584 0 : bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
585 : SDValue &Base,
586 : SDValue &OffImm) {
587 : // Match simple R + imm12 operands.
588 :
589 : // Base only.
590 0 : if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
591 0 : !CurDAG->isBaseWithConstantOffset(N)) {
592 0 : if (N.getOpcode() == ISD::FrameIndex) {
593 : // Match frame index.
594 0 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
595 0 : Base = CurDAG->getTargetFrameIndex(
596 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
597 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
598 0 : return true;
599 : }
600 :
601 0 : if (N.getOpcode() == ARMISD::Wrapper &&
602 0 : N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
603 0 : N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
604 : N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
605 0 : Base = N.getOperand(0);
606 : } else
607 0 : Base = N;
608 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
609 0 : return true;
610 : }
611 :
612 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
613 0 : int RHSC = (int)RHS->getSExtValue();
614 0 : if (N.getOpcode() == ISD::SUB)
615 0 : RHSC = -RHSC;
616 :
617 0 : if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
618 0 : Base = N.getOperand(0);
619 0 : if (Base.getOpcode() == ISD::FrameIndex) {
620 0 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
621 0 : Base = CurDAG->getTargetFrameIndex(
622 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
623 : }
624 0 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
625 0 : return true;
626 : }
627 : }
628 :
629 : // Base only.
630 0 : Base = N;
631 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
632 0 : return true;
633 : }
634 :
635 :
636 :
637 6443 : bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
638 : SDValue &Opc) {
639 6443 : if (N.getOpcode() == ISD::MUL &&
640 0 : ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
641 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
642 : // X * [3,5,9] -> X + X * [2,4,8] etc.
643 0 : int RHSC = (int)RHS->getZExtValue();
644 0 : if (RHSC & 1) {
645 0 : RHSC = RHSC & ~1;
646 : ARM_AM::AddrOpc AddSub = ARM_AM::add;
647 0 : if (RHSC < 0) {
648 : AddSub = ARM_AM::sub;
649 0 : RHSC = - RHSC;
650 : }
651 0 : if (isPowerOf2_32(RHSC)) {
652 : unsigned ShAmt = Log2_32(RHSC);
653 0 : Base = Offset = N.getOperand(0);
654 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
655 : ARM_AM::lsl),
656 0 : SDLoc(N), MVT::i32);
657 0 : return true;
658 : }
659 : }
660 : }
661 : }
662 :
663 9831 : if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
664 : // ISD::OR that is equivalent to an ISD::ADD.
665 3388 : !CurDAG->isBaseWithConstantOffset(N))
666 : return false;
667 :
668 : // Leave simple R +/- imm12 operands for LDRi12
669 3219 : if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
670 : int RHSC;
671 3214 : if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
672 : -0x1000+1, 0x1000, RHSC)) // 12 bits.
673 : return false;
674 : }
675 :
676 : // Otherwise this is R +/- [possibly shifted] R.
677 147 : ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
678 : ARM_AM::ShiftOpc ShOpcVal =
679 147 : ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
680 : unsigned ShAmt = 0;
681 :
682 147 : Base = N.getOperand(0);
683 147 : Offset = N.getOperand(1);
684 :
685 147 : if (ShOpcVal != ARM_AM::no_shift) {
686 : // Check to see if the RHS of the shift is a constant, if not, we can't fold
687 : // it.
688 : if (ConstantSDNode *Sh =
689 71 : dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
690 71 : ShAmt = Sh->getZExtValue();
691 71 : if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
692 142 : Offset = N.getOperand(1).getOperand(0);
693 : else {
694 : ShAmt = 0;
695 : ShOpcVal = ARM_AM::no_shift;
696 : }
697 : } else {
698 : ShOpcVal = ARM_AM::no_shift;
699 : }
700 : }
701 :
702 : // Try matching (R shl C) + (R).
703 147 : if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
704 193 : !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
705 60 : N.getOperand(0).hasOneUse())) {
706 5 : ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
707 0 : if (ShOpcVal != ARM_AM::no_shift) {
708 : // Check to see if the RHS of the shift is a constant, if not, we can't
709 : // fold it.
710 : if (ConstantSDNode *Sh =
711 : dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
712 0 : ShAmt = Sh->getZExtValue();
713 0 : if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
714 0 : Offset = N.getOperand(0).getOperand(0);
715 0 : Base = N.getOperand(1);
716 : } else {
717 : ShAmt = 0;
718 : ShOpcVal = ARM_AM::no_shift;
719 : }
720 : } else {
721 : ShOpcVal = ARM_AM::no_shift;
722 : }
723 : }
724 : }
725 :
726 : // If Offset is a multiply-by-constant and it's profitable to extract a shift
727 : // and use it in a shifted operand do so.
728 311 : if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
729 17 : unsigned PowerOfTwo = 0;
730 17 : SDValue NewMulConst;
731 17 : if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
732 11 : HandleSDNode Handle(Offset);
733 22 : replaceDAGValue(Offset.getOperand(1), NewMulConst);
734 11 : Offset = Handle.getValue();
735 11 : ShAmt = PowerOfTwo;
736 : ShOpcVal = ARM_AM::lsl;
737 : }
738 : }
739 :
740 147 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
741 147 : SDLoc(N), MVT::i32);
742 147 : return true;
743 : }
744 :
745 25 : bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
746 : SDValue &Offset, SDValue &Opc) {
747 25 : unsigned Opcode = Op->getOpcode();
748 : ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
749 25 : ? cast<LoadSDNode>(Op)->getAddressingMode()
750 : : cast<StoreSDNode>(Op)->getAddressingMode();
751 25 : ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
752 25 : ? ARM_AM::add : ARM_AM::sub;
753 : int Val;
754 25 : if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
755 : return false;
756 :
757 19 : Offset = N;
758 : ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
759 : unsigned ShAmt = 0;
760 7 : if (ShOpcVal != ARM_AM::no_shift) {
761 : // Check to see if the RHS of the shift is a constant, if not, we can't fold
762 : // it.
763 : if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
764 7 : ShAmt = Sh->getZExtValue();
765 7 : if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
766 10 : Offset = N.getOperand(0);
767 : else {
768 : ShAmt = 0;
769 : ShOpcVal = ARM_AM::no_shift;
770 : }
771 : } else {
772 : ShOpcVal = ARM_AM::no_shift;
773 : }
774 : }
775 :
776 19 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
777 19 : SDLoc(N), MVT::i32);
778 19 : return true;
779 : }
780 :
781 0 : bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
782 : SDValue &Offset, SDValue &Opc) {
783 0 : unsigned Opcode = Op->getOpcode();
784 : ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
785 0 : ? cast<LoadSDNode>(Op)->getAddressingMode()
786 : : cast<StoreSDNode>(Op)->getAddressingMode();
787 0 : ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
788 0 : ? ARM_AM::add : ARM_AM::sub;
789 : int Val;
790 0 : if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
791 0 : if (AddSub == ARM_AM::sub) Val *= -1;
792 0 : Offset = CurDAG->getRegister(0, MVT::i32);
793 0 : Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
794 0 : return true;
795 : }
796 :
797 : return false;
798 : }
799 :
800 :
801 0 : bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
802 : SDValue &Offset, SDValue &Opc) {
803 0 : unsigned Opcode = Op->getOpcode();
804 : ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
805 0 : ? cast<LoadSDNode>(Op)->getAddressingMode()
806 : : cast<StoreSDNode>(Op)->getAddressingMode();
807 0 : ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
808 0 : ? ARM_AM::add : ARM_AM::sub;
809 : int Val;
810 0 : if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
811 0 : Offset = CurDAG->getRegister(0, MVT::i32);
812 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
813 : ARM_AM::no_shift),
814 0 : SDLoc(Op), MVT::i32);
815 0 : return true;
816 : }
817 :
818 : return false;
819 : }
820 :
821 0 : bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
822 436 : Base = N;
823 0 : return true;
824 : }
825 :
826 0 : bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
827 : SDValue &Base, SDValue &Offset,
828 : SDValue &Opc) {
829 0 : if (N.getOpcode() == ISD::SUB) {
830 : // X - C is canonicalize to X + -C, no need to handle it here.
831 0 : Base = N.getOperand(0);
832 0 : Offset = N.getOperand(1);
833 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
834 0 : MVT::i32);
835 0 : return true;
836 : }
837 :
838 0 : if (!CurDAG->isBaseWithConstantOffset(N)) {
839 0 : Base = N;
840 0 : if (N.getOpcode() == ISD::FrameIndex) {
841 0 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
842 0 : Base = CurDAG->getTargetFrameIndex(
843 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
844 : }
845 0 : Offset = CurDAG->getRegister(0, MVT::i32);
846 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
847 0 : MVT::i32);
848 0 : return true;
849 : }
850 :
851 : // If the RHS is +/- imm8, fold into addr mode.
852 : int RHSC;
853 0 : if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
854 : -256 + 1, 256, RHSC)) { // 8 bits.
855 0 : Base = N.getOperand(0);
856 0 : if (Base.getOpcode() == ISD::FrameIndex) {
857 0 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
858 0 : Base = CurDAG->getTargetFrameIndex(
859 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
860 : }
861 0 : Offset = CurDAG->getRegister(0, MVT::i32);
862 :
863 : ARM_AM::AddrOpc AddSub = ARM_AM::add;
864 0 : if (RHSC < 0) {
865 : AddSub = ARM_AM::sub;
866 0 : RHSC = -RHSC;
867 : }
868 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
869 0 : MVT::i32);
870 0 : return true;
871 : }
872 :
873 0 : Base = N.getOperand(0);
874 0 : Offset = N.getOperand(1);
875 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
876 0 : MVT::i32);
877 0 : return true;
878 : }
879 :
880 0 : bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
881 : SDValue &Offset, SDValue &Opc) {
882 0 : unsigned Opcode = Op->getOpcode();
883 : ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
884 0 : ? cast<LoadSDNode>(Op)->getAddressingMode()
885 : : cast<StoreSDNode>(Op)->getAddressingMode();
886 0 : ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
887 0 : ? ARM_AM::add : ARM_AM::sub;
888 : int Val;
889 0 : if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
890 0 : Offset = CurDAG->getRegister(0, MVT::i32);
891 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
892 0 : MVT::i32);
893 0 : return true;
894 : }
895 :
896 0 : Offset = N;
897 0 : Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
898 0 : MVT::i32);
899 0 : return true;
900 : }
901 :
902 3824 : bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
903 : int Lwb, int Upb, bool FP16) {
904 3824 : if (!CurDAG->isBaseWithConstantOffset(N)) {
905 3560 : Base = N;
906 3560 : if (N.getOpcode() == ISD::FrameIndex) {
907 474 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
908 948 : Base = CurDAG->getTargetFrameIndex(
909 948 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
910 1097 : } else if (N.getOpcode() == ARMISD::Wrapper &&
911 1097 : N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
912 4094 : N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
913 : N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
914 1008 : Base = N.getOperand(0);
915 : }
916 3560 : Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
917 3560 : SDLoc(N), MVT::i32);
918 3560 : return true;
919 : }
920 :
921 : // If the RHS is +/- imm8, fold into addr mode.
922 : int RHSC;
923 264 : const int Scale = FP16 ? 2 : 4;
924 :
925 264 : if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
926 264 : Base = N.getOperand(0);
927 528 : if (Base.getOpcode() == ISD::FrameIndex) {
928 2 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
929 4 : Base = CurDAG->getTargetFrameIndex(
930 4 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
931 : }
932 :
933 : ARM_AM::AddrOpc AddSub = ARM_AM::add;
934 264 : if (RHSC < 0) {
935 : AddSub = ARM_AM::sub;
936 2 : RHSC = -RHSC;
937 : }
938 :
939 264 : if (FP16)
940 0 : Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
941 0 : SDLoc(N), MVT::i32);
942 : else
943 264 : Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
944 264 : SDLoc(N), MVT::i32);
945 :
946 264 : return true;
947 : }
948 :
949 0 : Base = N;
950 :
951 0 : if (FP16)
952 0 : Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
953 0 : SDLoc(N), MVT::i32);
954 : else
955 0 : Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
956 0 : SDLoc(N), MVT::i32);
957 :
958 : return true;
959 : }
960 :
961 : bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
962 : SDValue &Base, SDValue &Offset) {
963 : int Lwb = -256 + 1;
964 : int Upb = 256;
965 3338 : return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
966 : }
967 :
968 : bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
969 : SDValue &Base, SDValue &Offset) {
970 : int Lwb = -512 + 1;
971 : int Upb = 512;
972 486 : return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
973 : }
974 :
975 0 : bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
976 : SDValue &Align) {
977 0 : Addr = N;
978 :
979 : unsigned Alignment = 0;
980 :
981 : MemSDNode *MemN = cast<MemSDNode>(Parent);
982 :
983 0 : if (isa<LSBaseSDNode>(MemN) ||
984 0 : ((MemN->getOpcode() == ARMISD::VST1_UPD ||
985 0 : MemN->getOpcode() == ARMISD::VLD1_UPD) &&
986 0 : MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
987 : // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
988 : // The maximum alignment is equal to the memory size being referenced.
989 0 : unsigned MMOAlign = MemN->getAlignment();
990 0 : unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
991 0 : if (MMOAlign >= MemSize && MemSize > 1)
992 : Alignment = MemSize;
993 : } else {
994 : // All other uses of addrmode6 are for intrinsics. For now just record
995 : // the raw alignment value; it will be refined later based on the legal
996 : // alignment operands for the intrinsic.
997 0 : Alignment = MemN->getAlignment();
998 : }
999 :
1000 0 : Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001 0 : return true;
1002 : }
1003 :
1004 0 : bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005 : SDValue &Offset) {
1006 : LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007 : ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008 0 : if (AM != ISD::POST_INC)
1009 0 : return false;
1010 0 : Offset = N;
1011 : if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012 0 : if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013 0 : Offset = CurDAG->getRegister(0, MVT::i32);
1014 : }
1015 : return true;
1016 : }
1017 :
1018 0 : bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019 : SDValue &Offset, SDValue &Label) {
1020 0 : if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021 0 : Offset = N.getOperand(0);
1022 0 : SDValue N1 = N.getOperand(1);
1023 0 : Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024 0 : SDLoc(N), MVT::i32);
1025 0 : return true;
1026 : }
1027 :
1028 : return false;
1029 : }
1030 :
1031 :
1032 : //===----------------------------------------------------------------------===//
1033 : // Thumb Addressing Modes
1034 : //===----------------------------------------------------------------------===//
1035 :
1036 0 : bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1037 : SDValue &Base, SDValue &Offset){
1038 0 : if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1039 : ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1040 0 : if (!NC || !NC->isNullValue())
1041 0 : return false;
1042 :
1043 0 : Base = Offset = N;
1044 0 : return true;
1045 : }
1046 :
1047 0 : Base = N.getOperand(0);
1048 0 : Offset = N.getOperand(1);
1049 0 : return true;
1050 : }
1051 :
1052 : bool
1053 0 : ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1054 : SDValue &Base, SDValue &OffImm) {
1055 0 : if (!CurDAG->isBaseWithConstantOffset(N)) {
1056 0 : if (N.getOpcode() == ISD::ADD) {
1057 0 : return false; // We want to select register offset instead
1058 0 : } else if (N.getOpcode() == ARMISD::Wrapper &&
1059 0 : N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1060 0 : N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1061 0 : N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1062 : N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1063 0 : Base = N.getOperand(0);
1064 : } else {
1065 0 : Base = N;
1066 : }
1067 :
1068 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1069 0 : return true;
1070 : }
1071 :
1072 : // If the RHS is + imm5 * scale, fold into addr mode.
1073 : int RHSC;
1074 0 : if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1075 0 : Base = N.getOperand(0);
1076 0 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1077 0 : return true;
1078 : }
1079 :
1080 : // Offset is too large, so use register offset instead.
1081 : return false;
1082 : }
1083 :
1084 : bool
1085 : ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1086 : SDValue &OffImm) {
1087 944 : return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1088 : }
1089 :
1090 : bool
1091 : ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1092 : SDValue &OffImm) {
1093 122 : return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1094 : }
1095 :
1096 : bool
1097 : ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1098 : SDValue &OffImm) {
1099 131 : return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1100 : }
1101 :
1102 0 : bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1103 : SDValue &Base, SDValue &OffImm) {
1104 0 : if (N.getOpcode() == ISD::FrameIndex) {
1105 0 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
1106 : // Only multiples of 4 are allowed for the offset, so the frame object
1107 : // alignment must be at least 4.
1108 0 : MachineFrameInfo &MFI = MF->getFrameInfo();
1109 0 : if (MFI.getObjectAlignment(FI) < 4)
1110 : MFI.setObjectAlignment(FI, 4);
1111 0 : Base = CurDAG->getTargetFrameIndex(
1112 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1113 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1114 0 : return true;
1115 : }
1116 :
1117 0 : if (!CurDAG->isBaseWithConstantOffset(N))
1118 0 : return false;
1119 :
1120 : RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1121 0 : if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1122 0 : (LHSR && LHSR->getReg() == ARM::SP)) {
1123 : // If the RHS is + imm8 * scale, fold into addr mode.
1124 : int RHSC;
1125 0 : if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1126 0 : Base = N.getOperand(0);
1127 0 : if (Base.getOpcode() == ISD::FrameIndex) {
1128 0 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1129 : // For LHS+RHS to result in an offset that's a multiple of 4 the object
1130 : // indexed by the LHS must be 4-byte aligned.
1131 0 : MachineFrameInfo &MFI = MF->getFrameInfo();
1132 0 : if (MFI.getObjectAlignment(FI) < 4)
1133 : MFI.setObjectAlignment(FI, 4);
1134 0 : Base = CurDAG->getTargetFrameIndex(
1135 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1136 : }
1137 0 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1138 0 : return true;
1139 : }
1140 : }
1141 :
1142 : return false;
1143 : }
1144 :
1145 :
1146 : //===----------------------------------------------------------------------===//
1147 : // Thumb 2 Addressing Modes
1148 : //===----------------------------------------------------------------------===//
1149 :
1150 :
1151 4518 : bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1152 : SDValue &Base, SDValue &OffImm) {
1153 : // Match simple R + imm12 operands.
1154 :
1155 : // Base only.
1156 7192 : if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1157 2674 : !CurDAG->isBaseWithConstantOffset(N)) {
1158 2607 : if (N.getOpcode() == ISD::FrameIndex) {
1159 : // Match frame index.
1160 1000 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
1161 2000 : Base = CurDAG->getTargetFrameIndex(
1162 1000 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1163 2000 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1164 1000 : return true;
1165 : }
1166 :
1167 241 : if (N.getOpcode() == ARMISD::Wrapper &&
1168 241 : N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1169 1680 : N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1170 : N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1171 67 : Base = N.getOperand(0);
1172 67 : if (Base.getOpcode() == ISD::TargetConstantPool)
1173 : return false; // We want to select t2LDRpci instead.
1174 : } else
1175 1540 : Base = N;
1176 3080 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1177 1540 : return true;
1178 : }
1179 :
1180 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1181 1911 : if (SelectT2AddrModeImm8(N, Base, OffImm))
1182 : // Let t2LDRi8 handle (R - imm8).
1183 : return false;
1184 :
1185 3772 : int RHSC = (int)RHS->getZExtValue();
1186 1886 : if (N.getOpcode() == ISD::SUB)
1187 0 : RHSC = -RHSC;
1188 :
1189 1886 : if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1190 1886 : Base = N.getOperand(0);
1191 3772 : if (Base.getOpcode() == ISD::FrameIndex) {
1192 813 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1193 1626 : Base = CurDAG->getTargetFrameIndex(
1194 1626 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1195 : }
1196 1901 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1197 1886 : return true;
1198 : }
1199 : }
1200 :
1201 : // Base only.
1202 0 : Base = N;
1203 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1204 0 : return true;
1205 : }
1206 :
1207 0 : bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1208 : SDValue &Base, SDValue &OffImm) {
1209 : // Match simple R - imm8 operands.
1210 0 : if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1211 0 : !CurDAG->isBaseWithConstantOffset(N))
1212 0 : return false;
1213 :
1214 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215 0 : int RHSC = (int)RHS->getSExtValue();
1216 0 : if (N.getOpcode() == ISD::SUB)
1217 0 : RHSC = -RHSC;
1218 :
1219 0 : if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1220 0 : Base = N.getOperand(0);
1221 0 : if (Base.getOpcode() == ISD::FrameIndex) {
1222 0 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223 0 : Base = CurDAG->getTargetFrameIndex(
1224 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225 : }
1226 0 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227 0 : return true;
1228 : }
1229 : }
1230 :
1231 : return false;
1232 : }
1233 :
1234 0 : bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1235 : SDValue &OffImm){
1236 0 : unsigned Opcode = Op->getOpcode();
1237 : ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1238 0 : ? cast<LoadSDNode>(Op)->getAddressingMode()
1239 : : cast<StoreSDNode>(Op)->getAddressingMode();
1240 : int RHSC;
1241 : if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1242 0 : OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1243 0 : ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1244 0 : : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1245 0 : return true;
1246 : }
1247 :
1248 : return false;
1249 : }
1250 :
1251 4687 : bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1252 : SDValue &Base,
1253 : SDValue &OffReg, SDValue &ShImm) {
1254 : // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1255 4687 : if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1256 : return false;
1257 :
1258 : // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1259 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1260 1926 : int RHSC = (int)RHS->getZExtValue();
1261 1926 : if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1262 : return false;
1263 40 : else if (RHSC < 0 && RHSC >= -255) // 8 bits
1264 : return false;
1265 : }
1266 :
1267 : // Look for (R + R) or (R + (R << [1,2,3])).
1268 : unsigned ShAmt = 0;
1269 169 : Base = N.getOperand(0);
1270 169 : OffReg = N.getOperand(1);
1271 :
1272 : // Swap if it is ((R << c) + R).
1273 169 : ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1274 69 : if (ShOpcVal != ARM_AM::lsl) {
1275 107 : ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1276 0 : if (ShOpcVal == ARM_AM::lsl)
1277 : std::swap(Base, OffReg);
1278 : }
1279 :
1280 169 : if (ShOpcVal == ARM_AM::lsl) {
1281 : // Check to see if the RHS of the shift is a constant, if not, we can't fold
1282 : // it.
1283 62 : if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1284 62 : ShAmt = Sh->getZExtValue();
1285 62 : if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1286 124 : OffReg = OffReg.getOperand(0);
1287 : else {
1288 : ShAmt = 0;
1289 : }
1290 : }
1291 : }
1292 :
1293 : // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1294 : // and use it in a shifted operand do so.
1295 350 : if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1296 11 : unsigned PowerOfTwo = 0;
1297 11 : SDValue NewMulConst;
1298 11 : if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1299 4 : HandleSDNode Handle(OffReg);
1300 8 : replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1301 4 : OffReg = Handle.getValue();
1302 4 : ShAmt = PowerOfTwo;
1303 : }
1304 : }
1305 :
1306 170 : ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1307 :
1308 169 : return true;
1309 : }
1310 :
1311 0 : bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1312 : SDValue &OffImm) {
1313 : // This *must* succeed since it's used for the irreplaceable ldrex and strex
1314 : // instructions.
1315 0 : Base = N;
1316 0 : OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1317 :
1318 0 : if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1319 0 : return true;
1320 :
1321 : ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1322 : if (!RHS)
1323 0 : return true;
1324 :
1325 0 : uint32_t RHSC = (int)RHS->getZExtValue();
1326 0 : if (RHSC > 1020 || RHSC % 4 != 0)
1327 0 : return true;
1328 :
1329 0 : Base = N.getOperand(0);
1330 0 : if (Base.getOpcode() == ISD::FrameIndex) {
1331 0 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1332 0 : Base = CurDAG->getTargetFrameIndex(
1333 0 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1334 : }
1335 :
1336 0 : OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1337 0 : return true;
1338 : }
1339 :
1340 : //===--------------------------------------------------------------------===//
1341 :
1342 : /// getAL - Returns a ARMCC::AL immediate node.
1343 3791 : static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1344 3791 : return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1345 : }
1346 :
1347 0 : void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1348 69 : MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1349 69 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1350 0 : }
1351 :
1352 7464 : bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1353 : LoadSDNode *LD = cast<LoadSDNode>(N);
1354 : ISD::MemIndexedMode AM = LD->getAddressingMode();
1355 7464 : if (AM == ISD::UNINDEXED)
1356 : return false;
1357 :
1358 : EVT LoadedVT = LD->getMemoryVT();
1359 43 : SDValue Offset, AMOpc;
1360 43 : bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1361 : unsigned Opcode = 0;
1362 : bool Match = false;
1363 38 : if (LoadedVT == MVT::i32 && isPre &&
1364 11 : SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1365 : Opcode = ARM::LDR_PRE_IMM;
1366 : Match = true;
1367 41 : } else if (LoadedVT == MVT::i32 && !isPre &&
1368 16 : SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1369 : Opcode = ARM::LDR_POST_IMM;
1370 : Match = true;
1371 14 : } else if (LoadedVT == MVT::i32 &&
1372 14 : SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1373 14 : Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1374 : Match = true;
1375 :
1376 4 : } else if (LoadedVT == MVT::i16 &&
1377 4 : SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1378 : Match = true;
1379 : Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1380 4 : ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1381 : : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1382 : } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1383 12 : if (LD->getExtensionType() == ISD::SEXTLOAD) {
1384 5 : if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1385 : Match = true;
1386 5 : Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1387 : }
1388 : } else {
1389 14 : if (isPre &&
1390 7 : SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1391 : Match = true;
1392 : Opcode = ARM::LDRB_PRE_IMM;
1393 0 : } else if (!isPre &&
1394 0 : SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1395 : Match = true;
1396 : Opcode = ARM::LDRB_POST_IMM;
1397 0 : } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1398 : Match = true;
1399 0 : Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1400 : }
1401 : }
1402 : }
1403 :
1404 : if (Match) {
1405 43 : if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1406 9 : SDValue Chain = LD->getChain();
1407 9 : SDValue Base = LD->getBasePtr();
1408 9 : SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1409 27 : CurDAG->getRegister(0, MVT::i32), Chain };
1410 9 : SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1411 : MVT::Other, Ops);
1412 9 : transferMemOperands(N, New);
1413 9 : ReplaceNode(N, New);
1414 : return true;
1415 : } else {
1416 34 : SDValue Chain = LD->getChain();
1417 34 : SDValue Base = LD->getBasePtr();
1418 34 : SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1419 102 : CurDAG->getRegister(0, MVT::i32), Chain };
1420 34 : SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1421 : MVT::Other, Ops);
1422 34 : transferMemOperands(N, New);
1423 34 : ReplaceNode(N, New);
1424 : return true;
1425 : }
1426 : }
1427 :
1428 : return false;
1429 : }
1430 :
1431 1711 : bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1432 : LoadSDNode *LD = cast<LoadSDNode>(N);
1433 : EVT LoadedVT = LD->getMemoryVT();
1434 : ISD::MemIndexedMode AM = LD->getAddressingMode();
1435 1711 : if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1436 : LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1437 : return false;
1438 :
1439 : auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1440 6 : if (!COffs || COffs->getZExtValue() != 4)
1441 : return false;
1442 :
1443 : // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1444 : // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1445 : // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1446 : // ISel.
1447 3 : SDValue Chain = LD->getChain();
1448 3 : SDValue Base = LD->getBasePtr();
1449 3 : SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1450 9 : CurDAG->getRegister(0, MVT::i32), Chain };
1451 3 : SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1452 : MVT::i32, MVT::Other, Ops);
1453 3 : transferMemOperands(N, New);
1454 3 : ReplaceNode(N, New);
1455 3 : return true;
1456 : }
1457 :
1458 4214 : bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1459 : LoadSDNode *LD = cast<LoadSDNode>(N);
1460 : ISD::MemIndexedMode AM = LD->getAddressingMode();
1461 4214 : if (AM == ISD::UNINDEXED)
1462 : return false;
1463 :
1464 : EVT LoadedVT = LD->getMemoryVT();
1465 : bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1466 23 : SDValue Offset;
1467 23 : bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1468 : unsigned Opcode = 0;
1469 : bool Match = false;
1470 23 : if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1471 23 : switch (LoadedVT.getSimpleVT().SimpleTy) {
1472 16 : case MVT::i32:
1473 16 : Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1474 : break;
1475 0 : case MVT::i16:
1476 0 : if (isSExtLd)
1477 0 : Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1478 : else
1479 0 : Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1480 : break;
1481 7 : case MVT::i8:
1482 : case MVT::i1:
1483 7 : if (isSExtLd)
1484 2 : Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1485 : else
1486 5 : Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1487 : break;
1488 : default:
1489 : return false;
1490 : }
1491 : Match = true;
1492 : }
1493 :
1494 : if (Match) {
1495 23 : SDValue Chain = LD->getChain();
1496 23 : SDValue Base = LD->getBasePtr();
1497 23 : SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1498 69 : CurDAG->getRegister(0, MVT::i32), Chain };
1499 23 : SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1500 : MVT::Other, Ops);
1501 23 : transferMemOperands(N, New);
1502 23 : ReplaceNode(N, New);
1503 : return true;
1504 : }
1505 :
1506 : return false;
1507 : }
1508 :
1509 : /// Form a GPRPair pseudo register from a pair of GPR regs.
1510 0 : SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1511 0 : SDLoc dl(V0.getNode());
1512 : SDValue RegClass =
1513 0 : CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1514 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1515 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1516 0 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1517 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1518 : }
1519 :
1520 : /// Form a D register from a pair of S registers.
1521 0 : SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1522 0 : SDLoc dl(V0.getNode());
1523 : SDValue RegClass =
1524 0 : CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1525 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1526 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1527 0 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1528 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1529 : }
1530 :
1531 : /// Form a quad register from a pair of D registers.
1532 0 : SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1533 0 : SDLoc dl(V0.getNode());
1534 0 : SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1535 0 : MVT::i32);
1536 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1537 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1538 0 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1539 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1540 : }
1541 :
1542 : /// Form 4 consecutive D registers from a pair of Q registers.
1543 0 : SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1544 0 : SDLoc dl(V0.getNode());
1545 0 : SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1546 0 : MVT::i32);
1547 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1548 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1549 0 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1550 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1551 : }
1552 :
1553 : /// Form 4 consecutive S registers.
1554 0 : SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1555 : SDValue V2, SDValue V3) {
1556 0 : SDLoc dl(V0.getNode());
1557 : SDValue RegClass =
1558 0 : CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1559 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1560 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1561 0 : SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1562 0 : SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1563 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1564 0 : V2, SubReg2, V3, SubReg3 };
1565 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1566 : }
1567 :
1568 : /// Form 4 consecutive D registers.
1569 0 : SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1570 : SDValue V2, SDValue V3) {
1571 0 : SDLoc dl(V0.getNode());
1572 0 : SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1573 0 : MVT::i32);
1574 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1575 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1576 0 : SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1577 0 : SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1578 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1579 0 : V2, SubReg2, V3, SubReg3 };
1580 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1581 : }
1582 :
1583 : /// Form 4 consecutive Q registers.
1584 0 : SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1585 : SDValue V2, SDValue V3) {
1586 0 : SDLoc dl(V0.getNode());
1587 0 : SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1588 0 : MVT::i32);
1589 0 : SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1590 0 : SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1591 0 : SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1592 0 : SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1593 : const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1594 0 : V2, SubReg2, V3, SubReg3 };
1595 0 : return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1596 : }
1597 :
1598 : /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1599 : /// of a NEON VLD or VST instruction. The supported values depend on the
1600 : /// number of registers being loaded.
1601 558 : SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1602 : unsigned NumVecs, bool is64BitVector) {
1603 : unsigned NumRegs = NumVecs;
1604 558 : if (!is64BitVector && NumVecs < 3)
1605 299 : NumRegs *= 2;
1606 :
1607 558 : unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1608 558 : if (Alignment >= 32 && NumRegs == 4)
1609 : Alignment = 32;
1610 524 : else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1611 : Alignment = 16;
1612 437 : else if (Alignment >= 8)
1613 : Alignment = 8;
1614 : else
1615 : Alignment = 0;
1616 :
1617 558 : return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1618 : }
1619 :
1620 : static bool isVLDfixed(unsigned Opc)
1621 : {
1622 : switch (Opc) {
1623 : default: return false;
1624 : case ARM::VLD1d8wb_fixed : return true;
1625 : case ARM::VLD1d16wb_fixed : return true;
1626 : case ARM::VLD1d64Qwb_fixed : return true;
1627 : case ARM::VLD1d32wb_fixed : return true;
1628 : case ARM::VLD1d64wb_fixed : return true;
1629 : case ARM::VLD1d64TPseudoWB_fixed : return true;
1630 : case ARM::VLD1d64QPseudoWB_fixed : return true;
1631 : case ARM::VLD1q8wb_fixed : return true;
1632 : case ARM::VLD1q16wb_fixed : return true;
1633 : case ARM::VLD1q32wb_fixed : return true;
1634 : case ARM::VLD1q64wb_fixed : return true;
1635 : case ARM::VLD1DUPd8wb_fixed : return true;
1636 : case ARM::VLD1DUPd16wb_fixed : return true;
1637 : case ARM::VLD1DUPd32wb_fixed : return true;
1638 : case ARM::VLD1DUPq8wb_fixed : return true;
1639 : case ARM::VLD1DUPq16wb_fixed : return true;
1640 : case ARM::VLD1DUPq32wb_fixed : return true;
1641 : case ARM::VLD2d8wb_fixed : return true;
1642 : case ARM::VLD2d16wb_fixed : return true;
1643 : case ARM::VLD2d32wb_fixed : return true;
1644 : case ARM::VLD2q8PseudoWB_fixed : return true;
1645 : case ARM::VLD2q16PseudoWB_fixed : return true;
1646 : case ARM::VLD2q32PseudoWB_fixed : return true;
1647 : case ARM::VLD2DUPd8wb_fixed : return true;
1648 : case ARM::VLD2DUPd16wb_fixed : return true;
1649 : case ARM::VLD2DUPd32wb_fixed : return true;
1650 : }
1651 : }
1652 :
1653 : static bool isVSTfixed(unsigned Opc)
1654 : {
1655 : switch (Opc) {
1656 : default: return false;
1657 : case ARM::VST1d8wb_fixed : return true;
1658 : case ARM::VST1d16wb_fixed : return true;
1659 : case ARM::VST1d32wb_fixed : return true;
1660 : case ARM::VST1d64wb_fixed : return true;
1661 : case ARM::VST1q8wb_fixed : return true;
1662 : case ARM::VST1q16wb_fixed : return true;
1663 : case ARM::VST1q32wb_fixed : return true;
1664 : case ARM::VST1q64wb_fixed : return true;
1665 : case ARM::VST1d64TPseudoWB_fixed : return true;
1666 : case ARM::VST1d64QPseudoWB_fixed : return true;
1667 : case ARM::VST2d8wb_fixed : return true;
1668 : case ARM::VST2d16wb_fixed : return true;
1669 : case ARM::VST2d32wb_fixed : return true;
1670 : case ARM::VST2q8PseudoWB_fixed : return true;
1671 : case ARM::VST2q16PseudoWB_fixed : return true;
1672 : case ARM::VST2q32PseudoWB_fixed : return true;
1673 : }
1674 : }
1675 :
1676 : // Get the register stride update opcode of a VLD/VST instruction that
1677 : // is otherwise equivalent to the given fixed stride updating instruction.
1678 139 : static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1679 : assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1680 : && "Incorrect fixed stride updating instruction.");
1681 139 : switch (Opc) {
1682 : default: break;
1683 : case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1684 0 : case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1685 2 : case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1686 8 : case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1687 21 : case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1688 0 : case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1689 9 : case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1690 6 : case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1691 0 : case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1692 0 : case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1693 2 : case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1694 1 : case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1695 1 : case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1696 0 : case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1697 0 : case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1698 1 : case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1699 0 : case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1700 0 : case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1701 :
1702 10 : case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1703 4 : case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1704 10 : case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1705 4 : case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1706 14 : case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1707 5 : case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1708 9 : case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1709 13 : case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1710 1 : case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1711 1 : case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1712 :
1713 0 : case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1714 0 : case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1715 0 : case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1716 1 : case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1717 0 : case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1718 0 : case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1719 :
1720 1 : case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1721 0 : case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1722 0 : case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1723 0 : case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1724 0 : case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1725 0 : case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1726 :
1727 1 : case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1728 1 : case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1729 0 : case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1730 : }
1731 0 : return Opc; // If not one we handle, return it unchanged.
1732 : }
1733 :
1734 : /// Returns true if the given increment is a Constant known to be equal to the
1735 : /// access size performed by a NEON load/store. This means the "[rN]!" form can
1736 : /// be used.
1737 0 : static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1738 : auto C = dyn_cast<ConstantSDNode>(Inc);
1739 0 : return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1740 : }
1741 :
1742 288 : void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1743 : const uint16_t *DOpcodes,
1744 : const uint16_t *QOpcodes0,
1745 : const uint16_t *QOpcodes1) {
1746 : assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1747 : SDLoc dl(N);
1748 :
1749 288 : SDValue MemAddr, Align;
1750 : bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1751 : // nodes are not intrinsics.
1752 288 : unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1753 576 : if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1754 : return;
1755 :
1756 288 : SDValue Chain = N->getOperand(0);
1757 288 : EVT VT = N->getValueType(0);
1758 288 : bool is64BitVector = VT.is64BitVector();
1759 288 : Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1760 :
1761 : unsigned OpcodeIndex;
1762 288 : switch (VT.getSimpleVT().SimpleTy) {
1763 0 : default: llvm_unreachable("unhandled vld type");
1764 : // Double-register operations:
1765 : case MVT::v8i8: OpcodeIndex = 0; break;
1766 13 : case MVT::v4i16: OpcodeIndex = 1; break;
1767 25 : case MVT::v2f32:
1768 25 : case MVT::v2i32: OpcodeIndex = 2; break;
1769 23 : case MVT::v1i64: OpcodeIndex = 3; break;
1770 : // Quad-register operations:
1771 : case MVT::v16i8: OpcodeIndex = 0; break;
1772 20 : case MVT::v8i16: OpcodeIndex = 1; break;
1773 85 : case MVT::v4f32:
1774 85 : case MVT::v4i32: OpcodeIndex = 2; break;
1775 28 : case MVT::v2f64:
1776 28 : case MVT::v2i64: OpcodeIndex = 3; break;
1777 : }
1778 :
1779 288 : EVT ResTy;
1780 288 : if (NumVecs == 1)
1781 180 : ResTy = VT;
1782 : else {
1783 108 : unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1784 108 : if (!is64BitVector)
1785 48 : ResTyElts *= 2;
1786 108 : ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1787 : }
1788 : std::vector<EVT> ResTys;
1789 288 : ResTys.push_back(ResTy);
1790 288 : if (isUpdating)
1791 132 : ResTys.push_back(MVT::i32);
1792 288 : ResTys.push_back(MVT::Other);
1793 :
1794 288 : SDValue Pred = getAL(CurDAG, dl);
1795 576 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1796 : SDNode *VLd;
1797 : SmallVector<SDValue, 7> Ops;
1798 :
1799 : // Double registers and VLD1/VLD2 quad registers are directly supported.
1800 288 : if (is64BitVector || NumVecs <= 2) {
1801 260 : unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1802 156 : QOpcodes0[OpcodeIndex]);
1803 260 : Ops.push_back(MemAddr);
1804 260 : Ops.push_back(Align);
1805 260 : if (isUpdating) {
1806 128 : SDValue Inc = N->getOperand(AddrOpIdx + 1);
1807 128 : bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1808 128 : if (!IsImmUpdate) {
1809 : // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1810 : // check for the opcode rather than the number of vector elements.
1811 63 : if (isVLDfixed(Opc))
1812 63 : Opc = getVLDSTRegisterUpdateOpcode(Opc);
1813 66 : Ops.push_back(Inc);
1814 : // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1815 : // the operands if not such an opcode.
1816 62 : } else if (!isVLDfixed(Opc))
1817 0 : Ops.push_back(Reg0);
1818 : }
1819 260 : Ops.push_back(Pred);
1820 260 : Ops.push_back(Reg0);
1821 260 : Ops.push_back(Chain);
1822 520 : VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1823 :
1824 : } else {
1825 : // Otherwise, quad registers are loaded with two separate instructions,
1826 : // where one loads the even registers and the other loads the odd registers.
1827 28 : EVT AddrTy = MemAddr.getValueType();
1828 :
1829 : // Load the even subregs. This is always an updating load, so that it
1830 : // provides the address to the second load for the odd subregs.
1831 : SDValue ImplDef =
1832 28 : SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1833 28 : const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1834 56 : SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1835 : ResTy, AddrTy, MVT::Other, OpsA);
1836 28 : Chain = SDValue(VLdA, 2);
1837 :
1838 : // Load the odd subregs.
1839 28 : Ops.push_back(SDValue(VLdA, 1));
1840 28 : Ops.push_back(Align);
1841 28 : if (isUpdating) {
1842 : SDValue Inc = N->getOperand(AddrOpIdx + 1);
1843 : assert(isa<ConstantSDNode>(Inc.getNode()) &&
1844 : "only constant post-increment update allowed for VLD3/4");
1845 : (void)Inc;
1846 4 : Ops.push_back(Reg0);
1847 : }
1848 28 : Ops.push_back(SDValue(VLdA, 0));
1849 28 : Ops.push_back(Pred);
1850 28 : Ops.push_back(Reg0);
1851 28 : Ops.push_back(Chain);
1852 56 : VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1853 : }
1854 :
1855 : // Transfer memoperands.
1856 288 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1857 576 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1858 :
1859 288 : if (NumVecs == 1) {
1860 180 : ReplaceNode(N, VLd);
1861 : return;
1862 : }
1863 :
1864 : // Extract out the subregisters.
1865 : SDValue SuperReg = SDValue(VLd, 0);
1866 : static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1867 : ARM::qsub_3 == ARM::qsub_0 + 3,
1868 : "Unexpected subreg numbering");
1869 108 : unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1870 424 : for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1871 632 : ReplaceUses(SDValue(N, Vec),
1872 316 : CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1873 108 : ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1874 108 : if (isUpdating)
1875 40 : ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1876 108 : CurDAG->RemoveDeadNode(N);
1877 : }
1878 :
1879 270 : void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1880 : const uint16_t *DOpcodes,
1881 : const uint16_t *QOpcodes0,
1882 : const uint16_t *QOpcodes1) {
1883 : assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1884 : SDLoc dl(N);
1885 :
1886 270 : SDValue MemAddr, Align;
1887 : bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
1888 : // nodes are not intrinsics.
1889 270 : unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1890 : unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1891 540 : if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1892 : return;
1893 :
1894 270 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1895 :
1896 270 : SDValue Chain = N->getOperand(0);
1897 270 : EVT VT = N->getOperand(Vec0Idx).getValueType();
1898 270 : bool is64BitVector = VT.is64BitVector();
1899 270 : Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1900 :
1901 : unsigned OpcodeIndex;
1902 270 : switch (VT.getSimpleVT().SimpleTy) {
1903 0 : default: llvm_unreachable("unhandled vst type");
1904 : // Double-register operations:
1905 : case MVT::v8i8: OpcodeIndex = 0; break;
1906 13 : case MVT::v4f16:
1907 13 : case MVT::v4i16: OpcodeIndex = 1; break;
1908 31 : case MVT::v2f32:
1909 31 : case MVT::v2i32: OpcodeIndex = 2; break;
1910 17 : case MVT::v1i64: OpcodeIndex = 3; break;
1911 : // Quad-register operations:
1912 : case MVT::v16i8: OpcodeIndex = 0; break;
1913 31 : case MVT::v8f16:
1914 31 : case MVT::v8i16: OpcodeIndex = 1; break;
1915 73 : case MVT::v4f32:
1916 73 : case MVT::v4i32: OpcodeIndex = 2; break;
1917 27 : case MVT::v2f64:
1918 27 : case MVT::v2i64: OpcodeIndex = 3; break;
1919 : }
1920 :
1921 : std::vector<EVT> ResTys;
1922 270 : if (isUpdating)
1923 151 : ResTys.push_back(MVT::i32);
1924 270 : ResTys.push_back(MVT::Other);
1925 :
1926 270 : SDValue Pred = getAL(CurDAG, dl);
1927 540 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1928 : SmallVector<SDValue, 7> Ops;
1929 :
1930 : // Double registers and VST1/VST2 quad registers are directly supported.
1931 270 : if (is64BitVector || NumVecs <= 2) {
1932 244 : SDValue SrcReg;
1933 244 : if (NumVecs == 1) {
1934 175 : SrcReg = N->getOperand(Vec0Idx);
1935 69 : } else if (is64BitVector) {
1936 : // Form a REG_SEQUENCE to force register allocation.
1937 50 : SDValue V0 = N->getOperand(Vec0Idx + 0);
1938 50 : SDValue V1 = N->getOperand(Vec0Idx + 1);
1939 50 : if (NumVecs == 2)
1940 14 : SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1941 : else {
1942 36 : SDValue V2 = N->getOperand(Vec0Idx + 2);
1943 : // If it's a vst3, form a quad D-register and leave the last part as
1944 : // an undef.
1945 : SDValue V3 = (NumVecs == 3)
1946 22 : ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1947 36 : : N->getOperand(Vec0Idx + 3);
1948 36 : SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1949 : }
1950 : } else {
1951 : // Form a QQ register.
1952 19 : SDValue Q0 = N->getOperand(Vec0Idx);
1953 19 : SDValue Q1 = N->getOperand(Vec0Idx + 1);
1954 19 : SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1955 : }
1956 :
1957 244 : unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1958 143 : QOpcodes0[OpcodeIndex]);
1959 244 : Ops.push_back(MemAddr);
1960 244 : Ops.push_back(Align);
1961 244 : if (isUpdating) {
1962 149 : SDValue Inc = N->getOperand(AddrOpIdx + 1);
1963 149 : bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1964 149 : if (!IsImmUpdate) {
1965 : // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1966 : // check for the opcode rather than the number of vector elements.
1967 72 : if (isVSTfixed(Opc))
1968 72 : Opc = getVLDSTRegisterUpdateOpcode(Opc);
1969 73 : Ops.push_back(Inc);
1970 : }
1971 : // VST1/VST2 fixed increment does not need Reg0 so only include it in
1972 : // the operands if not such an opcode.
1973 75 : else if (!isVSTfixed(Opc))
1974 1 : Ops.push_back(Reg0);
1975 : }
1976 244 : Ops.push_back(SrcReg);
1977 244 : Ops.push_back(Pred);
1978 244 : Ops.push_back(Reg0);
1979 244 : Ops.push_back(Chain);
1980 488 : SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1981 :
1982 : // Transfer memoperands.
1983 488 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
1984 :
1985 244 : ReplaceNode(N, VSt);
1986 : return;
1987 : }
1988 :
1989 : // Otherwise, quad registers are stored with two separate instructions,
1990 : // where one stores the even registers and the other stores the odd registers.
1991 :
1992 : // Form the QQQQ REG_SEQUENCE.
1993 26 : SDValue V0 = N->getOperand(Vec0Idx + 0);
1994 26 : SDValue V1 = N->getOperand(Vec0Idx + 1);
1995 26 : SDValue V2 = N->getOperand(Vec0Idx + 2);
1996 : SDValue V3 = (NumVecs == 3)
1997 9 : ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1998 26 : : N->getOperand(Vec0Idx + 3);
1999 26 : SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2000 :
2001 : // Store the even D registers. This is always an updating store, so that it
2002 : // provides the address to the second store for the odd subregs.
2003 26 : const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2004 78 : SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2005 : MemAddr.getValueType(),
2006 : MVT::Other, OpsA);
2007 52 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2008 26 : Chain = SDValue(VStA, 1);
2009 :
2010 : // Store the odd D registers.
2011 26 : Ops.push_back(SDValue(VStA, 0));
2012 26 : Ops.push_back(Align);
2013 26 : if (isUpdating) {
2014 : SDValue Inc = N->getOperand(AddrOpIdx + 1);
2015 : assert(isa<ConstantSDNode>(Inc.getNode()) &&
2016 : "only constant post-increment update allowed for VST3/4");
2017 : (void)Inc;
2018 2 : Ops.push_back(Reg0);
2019 : }
2020 26 : Ops.push_back(RegSeq);
2021 26 : Ops.push_back(Pred);
2022 26 : Ops.push_back(Reg0);
2023 26 : Ops.push_back(Chain);
2024 52 : SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2025 : Ops);
2026 52 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2027 26 : ReplaceNode(N, VStB);
2028 : }
2029 :
2030 87 : void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2031 : unsigned NumVecs,
2032 : const uint16_t *DOpcodes,
2033 : const uint16_t *QOpcodes) {
2034 : assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2035 : SDLoc dl(N);
2036 :
2037 87 : SDValue MemAddr, Align;
2038 : bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2039 : // nodes are not intrinsics.
2040 87 : unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2041 : unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2042 174 : if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2043 : return;
2044 :
2045 87 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2046 :
2047 87 : SDValue Chain = N->getOperand(0);
2048 : unsigned Lane =
2049 174 : cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2050 87 : EVT VT = N->getOperand(Vec0Idx).getValueType();
2051 87 : bool is64BitVector = VT.is64BitVector();
2052 :
2053 : unsigned Alignment = 0;
2054 87 : if (NumVecs != 3) {
2055 116 : Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2056 58 : unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2057 58 : if (Alignment > NumBytes)
2058 : Alignment = NumBytes;
2059 58 : if (Alignment < 8 && Alignment < NumBytes)
2060 : Alignment = 0;
2061 : // Alignment must be a power of two; make sure of that.
2062 58 : Alignment = (Alignment & -Alignment);
2063 58 : if (Alignment == 1)
2064 : Alignment = 0;
2065 : }
2066 87 : Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2067 :
2068 : unsigned OpcodeIndex;
2069 87 : switch (VT.getSimpleVT().SimpleTy) {
2070 0 : default: llvm_unreachable("unhandled vld/vst lane type");
2071 : // Double-register operations:
2072 : case MVT::v8i8: OpcodeIndex = 0; break;
2073 10 : case MVT::v4i16: OpcodeIndex = 1; break;
2074 28 : case MVT::v2f32:
2075 28 : case MVT::v2i32: OpcodeIndex = 2; break;
2076 : // Quad-register operations:
2077 : case MVT::v8i16: OpcodeIndex = 0; break;
2078 19 : case MVT::v4f32:
2079 19 : case MVT::v4i32: OpcodeIndex = 1; break;
2080 : }
2081 :
2082 : std::vector<EVT> ResTys;
2083 87 : if (IsLoad) {
2084 60 : unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2085 60 : if (!is64BitVector)
2086 25 : ResTyElts *= 2;
2087 120 : ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2088 120 : MVT::i64, ResTyElts));
2089 : }
2090 87 : if (isUpdating)
2091 11 : ResTys.push_back(MVT::i32);
2092 87 : ResTys.push_back(MVT::Other);
2093 :
2094 87 : SDValue Pred = getAL(CurDAG, dl);
2095 174 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2096 :
2097 : SmallVector<SDValue, 8> Ops;
2098 87 : Ops.push_back(MemAddr);
2099 87 : Ops.push_back(Align);
2100 87 : if (isUpdating) {
2101 11 : SDValue Inc = N->getOperand(AddrOpIdx + 1);
2102 : bool IsImmUpdate =
2103 11 : isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2104 16 : Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2105 : }
2106 :
2107 87 : SDValue SuperReg;
2108 87 : SDValue V0 = N->getOperand(Vec0Idx + 0);
2109 87 : SDValue V1 = N->getOperand(Vec0Idx + 1);
2110 87 : if (NumVecs == 2) {
2111 32 : if (is64BitVector)
2112 21 : SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2113 : else
2114 11 : SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2115 : } else {
2116 55 : SDValue V2 = N->getOperand(Vec0Idx + 2);
2117 : SDValue V3 = (NumVecs == 3)
2118 29 : ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2119 55 : : N->getOperand(Vec0Idx + 3);
2120 55 : if (is64BitVector)
2121 31 : SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2122 : else
2123 24 : SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2124 : }
2125 87 : Ops.push_back(SuperReg);
2126 87 : Ops.push_back(getI32Imm(Lane, dl));
2127 87 : Ops.push_back(Pred);
2128 87 : Ops.push_back(Reg0);
2129 87 : Ops.push_back(Chain);
2130 :
2131 87 : unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2132 35 : QOpcodes[OpcodeIndex]);
2133 174 : SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2134 174 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2135 87 : if (!IsLoad) {
2136 27 : ReplaceNode(N, VLdLn);
2137 : return;
2138 : }
2139 :
2140 : // Extract the subregisters.
2141 60 : SuperReg = SDValue(VLdLn, 0);
2142 : static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2143 : ARM::qsub_3 == ARM::qsub_0 + 3,
2144 : "Unexpected subreg numbering");
2145 60 : unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2146 234 : for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2147 348 : ReplaceUses(SDValue(N, Vec),
2148 174 : CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2149 60 : ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2150 60 : if (isUpdating)
2151 16 : ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2152 60 : CurDAG->RemoveDeadNode(N);
2153 : }
2154 :
2155 70 : void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2156 : bool isUpdating, unsigned NumVecs,
2157 : const uint16_t *DOpcodes,
2158 : const uint16_t *QOpcodes0,
2159 : const uint16_t *QOpcodes1) {
2160 : assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2161 : SDLoc dl(N);
2162 :
2163 70 : SDValue MemAddr, Align;
2164 70 : unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2165 140 : if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2166 : return;
2167 :
2168 70 : SDValue Chain = N->getOperand(0);
2169 70 : EVT VT = N->getValueType(0);
2170 70 : bool is64BitVector = VT.is64BitVector();
2171 :
2172 : unsigned Alignment = 0;
2173 70 : if (NumVecs != 3) {
2174 120 : Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2175 60 : unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2176 60 : if (Alignment > NumBytes)
2177 : Alignment = NumBytes;
2178 60 : if (Alignment < 8 && Alignment < NumBytes)
2179 : Alignment = 0;
2180 : // Alignment must be a power of two; make sure of that.
2181 60 : Alignment = (Alignment & -Alignment);
2182 60 : if (Alignment == 1)
2183 : Alignment = 0;
2184 : }
2185 70 : Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2186 :
2187 : unsigned OpcodeIndex;
2188 70 : switch (VT.getSimpleVT().SimpleTy) {
2189 0 : default: llvm_unreachable("unhandled vld-dup type");
2190 : case MVT::v8i8:
2191 : case MVT::v16i8: OpcodeIndex = 0; break;
2192 20 : case MVT::v4i16:
2193 20 : case MVT::v8i16: OpcodeIndex = 1; break;
2194 25 : case MVT::v2f32:
2195 : case MVT::v2i32:
2196 : case MVT::v4f32:
2197 25 : case MVT::v4i32: OpcodeIndex = 2; break;
2198 3 : case MVT::v1f64:
2199 3 : case MVT::v1i64: OpcodeIndex = 3; break;
2200 : }
2201 :
2202 70 : unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2203 70 : if (!is64BitVector)
2204 21 : ResTyElts *= 2;
2205 70 : EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2206 :
2207 : std::vector<EVT> ResTys;
2208 70 : ResTys.push_back(ResTy);
2209 70 : if (isUpdating)
2210 12 : ResTys.push_back(MVT::i32);
2211 70 : ResTys.push_back(MVT::Other);
2212 :
2213 70 : SDValue Pred = getAL(CurDAG, dl);
2214 140 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2215 :
2216 : SDNode *VLdDup;
2217 70 : if (is64BitVector || NumVecs == 1) {
2218 : SmallVector<SDValue, 6> Ops;
2219 61 : Ops.push_back(MemAddr);
2220 61 : Ops.push_back(Align);
2221 61 : unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2222 12 : QOpcodes0[OpcodeIndex];
2223 61 : if (isUpdating) {
2224 : // fixed-stride update instructions don't have an explicit writeback
2225 : // operand. It's implicit in the opcode itself.
2226 12 : SDValue Inc = N->getOperand(2);
2227 : bool IsImmUpdate =
2228 12 : isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2229 12 : if (NumVecs <= 2 && !IsImmUpdate)
2230 4 : Opc = getVLDSTRegisterUpdateOpcode(Opc);
2231 12 : if (!IsImmUpdate)
2232 5 : Ops.push_back(Inc);
2233 : // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2234 7 : else if (NumVecs > 2)
2235 1 : Ops.push_back(Reg0);
2236 : }
2237 61 : Ops.push_back(Pred);
2238 61 : Ops.push_back(Reg0);
2239 61 : Ops.push_back(Chain);
2240 122 : VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2241 9 : } else if (NumVecs == 2) {
2242 3 : const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2243 6 : SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2244 : dl, ResTys, OpsA);
2245 :
2246 3 : Chain = SDValue(VLdA, 1);
2247 3 : const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2248 6 : VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2249 : } else {
2250 : SDValue ImplDef =
2251 6 : SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2252 6 : const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2253 12 : SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2254 : dl, ResTys, OpsA);
2255 :
2256 : SDValue SuperReg = SDValue(VLdA, 0);
2257 6 : Chain = SDValue(VLdA, 1);
2258 6 : const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2259 12 : VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2260 : }
2261 :
2262 : // Transfer memoperands.
2263 70 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2264 140 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2265 :
2266 : // Extract the subregisters.
2267 70 : if (NumVecs == 1) {
2268 68 : ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2269 : } else {
2270 : SDValue SuperReg = SDValue(VLdDup, 0);
2271 : static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2272 36 : unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2273 138 : for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2274 204 : ReplaceUses(SDValue(N, Vec),
2275 102 : CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2276 : }
2277 : }
2278 70 : ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2279 70 : if (isUpdating)
2280 24 : ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2281 70 : CurDAG->RemoveDeadNode(N);
2282 : }
2283 :
2284 2499 : bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2285 2499 : if (!Subtarget->hasV6T2Ops())
2286 : return false;
2287 :
2288 : unsigned Opc = isSigned
2289 1321 : ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2290 1091 : : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2291 : SDLoc dl(N);
2292 :
2293 : // For unsigned extracts, check for a shift right and mask
2294 : unsigned And_imm = 0;
2295 2642 : if (N->getOpcode() == ISD::AND) {
2296 : if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2297 :
2298 : // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2299 643 : if (And_imm & (And_imm + 1))
2300 : return false;
2301 :
2302 : unsigned Srl_imm = 0;
2303 479 : if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2304 : Srl_imm)) {
2305 : assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2306 :
2307 : // Mask off the unnecessary bits of the AND immediate; normally
2308 : // DAGCombine will do this, but that might not happen if
2309 : // targetShrinkDemandedConstant chooses a different immediate.
2310 59 : And_imm &= -1U >> Srl_imm;
2311 :
2312 : // Note: The width operand is encoded as width-1.
2313 59 : unsigned Width = countTrailingOnes(And_imm) - 1;
2314 : unsigned LSB = Srl_imm;
2315 :
2316 118 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2317 :
2318 118 : if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2319 : // It's cheaper to use a right shift to extract the top bits.
2320 1 : if (Subtarget->isThumb()) {
2321 0 : Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2322 0 : SDValue Ops[] = { N->getOperand(0).getOperand(0),
2323 0 : CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2324 0 : getAL(CurDAG, dl), Reg0, Reg0 };
2325 0 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2326 : return true;
2327 : }
2328 :
2329 : // ARM models shift instructions as MOVsi with shifter operand.
2330 : ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2331 : SDValue ShOpc =
2332 1 : CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2333 1 : MVT::i32);
2334 1 : SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2335 1 : getAL(CurDAG, dl), Reg0, Reg0 };
2336 2 : CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2337 1 : return true;
2338 : }
2339 :
2340 : assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2341 58 : SDValue Ops[] = { N->getOperand(0).getOperand(0),
2342 116 : CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2343 58 : CurDAG->getTargetConstant(Width, dl, MVT::i32),
2344 116 : getAL(CurDAG, dl), Reg0 };
2345 116 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2346 58 : return true;
2347 : }
2348 : }
2349 : return false;
2350 : }
2351 :
2352 : // Otherwise, we're looking for a shift of a shift
2353 : unsigned Shl_imm = 0;
2354 516 : if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2355 : assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2356 : unsigned Srl_imm = 0;
2357 12 : if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2358 : assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2359 : // Note: The width operand is encoded as width-1.
2360 12 : unsigned Width = 32 - Srl_imm - 1;
2361 12 : int LSB = Srl_imm - Shl_imm;
2362 12 : if (LSB < 0)
2363 : return false;
2364 22 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2365 : assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2366 11 : SDValue Ops[] = { N->getOperand(0).getOperand(0),
2367 22 : CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2368 11 : CurDAG->getTargetConstant(Width, dl, MVT::i32),
2369 22 : getAL(CurDAG, dl), Reg0 };
2370 22 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2371 11 : return true;
2372 : }
2373 : }
2374 :
2375 : // Or we are looking for a shift of an and, with a mask operand
2376 : if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2377 : isShiftedMask_32(And_imm)) {
2378 : unsigned Srl_imm = 0;
2379 5 : unsigned LSB = countTrailingZeros(And_imm);
2380 : // Shift must be the same as the ands lsb
2381 10 : if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2382 : assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2383 3 : unsigned MSB = 31 - countLeadingZeros(And_imm);
2384 : // Note: The width operand is encoded as width-1.
2385 3 : unsigned Width = MSB - LSB;
2386 6 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2387 : assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2388 3 : SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389 6 : CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2390 3 : CurDAG->getTargetConstant(Width, dl, MVT::i32),
2391 6 : getAL(CurDAG, dl), Reg0 };
2392 6 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2393 : return true;
2394 : }
2395 : }
2396 :
2397 501 : if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2398 168 : unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2399 : unsigned LSB = 0;
2400 323 : if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2401 : !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2402 : return false;
2403 :
2404 13 : if (LSB + Width > 32)
2405 : return false;
2406 :
2407 26 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2408 : assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2409 13 : SDValue Ops[] = { N->getOperand(0).getOperand(0),
2410 26 : CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2411 13 : CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2412 26 : getAL(CurDAG, dl), Reg0 };
2413 26 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2414 13 : return true;
2415 : }
2416 :
2417 : return false;
2418 : }
2419 :
2420 : /// Target-specific DAG combining for ISD::XOR.
2421 : /// Target-independent combining lowers SELECT_CC nodes of the form
2422 : /// select_cc setg[ge] X, 0, X, -X
2423 : /// select_cc setgt X, -1, X, -X
2424 : /// select_cc setl[te] X, 0, -X, X
2425 : /// select_cc setlt X, 1, -X, X
2426 : /// which represent Integer ABS into:
2427 : /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2428 : /// ARM instruction selection detects the latter and matches it to
2429 : /// ARM::ABS or ARM::t2ABS machine node.
2430 0 : bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2431 0 : SDValue XORSrc0 = N->getOperand(0);
2432 0 : SDValue XORSrc1 = N->getOperand(1);
2433 0 : EVT VT = N->getValueType(0);
2434 :
2435 0 : if (Subtarget->isThumb1Only())
2436 0 : return false;
2437 :
2438 0 : if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2439 0 : return false;
2440 :
2441 0 : SDValue ADDSrc0 = XORSrc0.getOperand(0);
2442 0 : SDValue ADDSrc1 = XORSrc0.getOperand(1);
2443 0 : SDValue SRASrc0 = XORSrc1.getOperand(0);
2444 0 : SDValue SRASrc1 = XORSrc1.getOperand(1);
2445 : ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2446 0 : EVT XType = SRASrc0.getValueType();
2447 0 : unsigned Size = XType.getSizeInBits() - 1;
2448 :
2449 0 : if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2450 0 : XType.isInteger() && SRAConstant != nullptr &&
2451 0 : Size == SRAConstant->getZExtValue()) {
2452 : unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2453 0 : CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2454 0 : return true;
2455 : }
2456 :
2457 : return false;
2458 : }
2459 :
2460 : /// We've got special pseudo-instructions for these
2461 6 : void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2462 : unsigned Opcode;
2463 : EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2464 : if (MemTy == MVT::i8)
2465 : Opcode = ARM::CMP_SWAP_8;
2466 : else if (MemTy == MVT::i16)
2467 : Opcode = ARM::CMP_SWAP_16;
2468 : else if (MemTy == MVT::i32)
2469 : Opcode = ARM::CMP_SWAP_32;
2470 : else
2471 0 : llvm_unreachable("Unknown AtomicCmpSwap type");
2472 :
2473 6 : SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2474 6 : N->getOperand(0)};
2475 18 : SDNode *CmpSwap = CurDAG->getMachineNode(
2476 6 : Opcode, SDLoc(N),
2477 : CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2478 :
2479 6 : MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2480 12 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2481 :
2482 6 : ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2483 6 : ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2484 6 : CurDAG->RemoveDeadNode(N);
2485 6 : }
2486 :
2487 : static Optional<std::pair<unsigned, unsigned>>
2488 91 : getContiguousRangeOfSetBits(const APInt &A) {
2489 91 : unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2490 91 : unsigned LastOne = A.countTrailingZeros();
2491 91 : if (A.countPopulation() != (FirstOne - LastOne + 1))
2492 : return Optional<std::pair<unsigned,unsigned>>();
2493 : return std::make_pair(FirstOne, LastOne);
2494 : }
2495 :
2496 2151 : void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2497 : assert(N->getOpcode() == ARMISD::CMPZ);
2498 2151 : SwitchEQNEToPLMI = false;
2499 :
2500 2151 : if (!Subtarget->isThumb())
2501 : // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2502 : // LSR don't exist as standalone instructions - they need the barrel shifter.
2503 2068 : return;
2504 :
2505 : // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2506 1297 : SDValue And = N->getOperand(0);
2507 : if (!And->hasOneUse())
2508 : return;
2509 :
2510 642 : SDValue Zero = N->getOperand(1);
2511 1138 : if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2512 437 : And->getOpcode() != ISD::AND)
2513 : return;
2514 98 : SDValue X = And.getOperand(0);
2515 : auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2516 :
2517 : if (!C)
2518 : return;
2519 182 : auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2520 91 : if (!Range)
2521 : return;
2522 :
2523 : // There are several ways to lower this:
2524 : SDNode *NewN;
2525 : SDLoc dl(N);
2526 :
2527 : auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2528 : if (Subtarget->isThumb2()) {
2529 : Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2530 : SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2531 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2532 : CurDAG->getRegister(0, MVT::i32) };
2533 : return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2534 : } else {
2535 : SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2536 : CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2537 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2538 : return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2539 : }
2540 83 : };
2541 :
2542 83 : if (Range->second == 0) {
2543 : // 1. Mask includes the LSB -> Simply shift the top N bits off
2544 64 : NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2545 64 : ReplaceNode(And.getNode(), NewN);
2546 19 : } else if (Range->first == 31) {
2547 : // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2548 2 : NewN = EmitShift(ARM::tLSRri, X, Range->second);
2549 2 : ReplaceNode(And.getNode(), NewN);
2550 17 : } else if (Range->first == Range->second) {
2551 : // 3. Only one bit is set. We can shift this into the sign bit and use a
2552 : // PL/MI comparison.
2553 15 : NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2554 15 : ReplaceNode(And.getNode(), NewN);
2555 :
2556 15 : SwitchEQNEToPLMI = true;
2557 2 : } else if (!Subtarget->hasV6T2Ops()) {
2558 : // 4. Do a double shift to clear bottom and top bits, but only in
2559 : // thumb-1 mode as in thumb-2 we can use UBFX.
2560 1 : NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2561 1 : NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2562 1 : Range->second + (31 - Range->first));
2563 1 : ReplaceNode(And.getNode(), NewN);
2564 : }
2565 :
2566 : }
2567 :
2568 302193 : void ARMDAGToDAGISel::Select(SDNode *N) {
2569 : SDLoc dl(N);
2570 :
2571 302193 : if (N->isMachineOpcode()) {
2572 : N->setNodeId(-1);
2573 87 : return; // Already selected.
2574 : }
2575 :
2576 302106 : switch (N->getOpcode()) {
2577 : default: break;
2578 135 : case ISD::WRITE_REGISTER:
2579 135 : if (tryWriteRegister(N))
2580 : return;
2581 : break;
2582 95 : case ISD::READ_REGISTER:
2583 95 : if (tryReadRegister(N))
2584 : return;
2585 : break;
2586 3566 : case ISD::INLINEASM:
2587 3566 : if (tryInlineAsm(N))
2588 : return;
2589 : break;
2590 471 : case ISD::XOR:
2591 : // Select special operations if XOR node forms integer ABS pattern
2592 471 : if (tryABSOp(N))
2593 : return;
2594 : // Other cases are autogenerated.
2595 : break;
2596 : case ISD::Constant: {
2597 5944 : unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2598 : // If we can't materialize the constant we need to use a literal pool
2599 5944 : if (ConstantMaterializationCost(Val) > 2) {
2600 331 : SDValue CPIdx = CurDAG->getTargetConstantPool(
2601 331 : ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2602 331 : TLI->getPointerTy(CurDAG->getDataLayout()));
2603 :
2604 : SDNode *ResNode;
2605 331 : if (Subtarget->isThumb()) {
2606 : SDValue Ops[] = {
2607 : CPIdx,
2608 221 : getAL(CurDAG, dl),
2609 442 : CurDAG->getRegister(0, MVT::i32),
2610 442 : CurDAG->getEntryNode()
2611 221 : };
2612 221 : ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2613 : Ops);
2614 : } else {
2615 : SDValue Ops[] = {
2616 : CPIdx,
2617 220 : CurDAG->getTargetConstant(0, dl, MVT::i32),
2618 110 : getAL(CurDAG, dl),
2619 220 : CurDAG->getRegister(0, MVT::i32),
2620 220 : CurDAG->getEntryNode()
2621 110 : };
2622 110 : ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2623 : Ops);
2624 : }
2625 : // Annotate the Node with memory operand information so that MachineInstr
2626 : // queries work properly. This e.g. gives the register allocation the
2627 : // required information for rematerialization.
2628 331 : MachineFunction& MF = CurDAG->getMachineFunction();
2629 : MachineMemOperand *MemOp =
2630 331 : MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2631 : MachineMemOperand::MOLoad, 4, 4);
2632 :
2633 662 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2634 :
2635 331 : ReplaceNode(N, ResNode);
2636 : return;
2637 : }
2638 :
2639 : // Other cases are autogenerated.
2640 : break;
2641 : }
2642 : case ISD::FrameIndex: {
2643 : // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2644 2084 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
2645 2084 : SDValue TFI = CurDAG->getTargetFrameIndex(
2646 2084 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2647 2084 : if (Subtarget->isThumb1Only()) {
2648 : // Set the alignment of the frame object to 4, to avoid having to generate
2649 : // more than one ADD
2650 285 : MachineFrameInfo &MFI = MF->getFrameInfo();
2651 285 : if (MFI.getObjectAlignment(FI) < 4)
2652 : MFI.setObjectAlignment(FI, 4);
2653 285 : CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2654 : CurDAG->getTargetConstant(0, dl, MVT::i32));
2655 285 : return;
2656 : } else {
2657 1799 : unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2658 : ARM::t2ADDri : ARM::ADDri);
2659 3598 : SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2660 1799 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2661 5397 : CurDAG->getRegister(0, MVT::i32) };
2662 3598 : CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2663 : return;
2664 : }
2665 : }
2666 476 : case ISD::SRL:
2667 476 : if (tryV6T2BitfieldExtractOp(N, false))
2668 : return;
2669 : break;
2670 496 : case ISD::SIGN_EXTEND_INREG:
2671 : case ISD::SRA:
2672 496 : if (tryV6T2BitfieldExtractOp(N, true))
2673 : return;
2674 : break;
2675 308 : case ISD::MUL:
2676 308 : if (Subtarget->isThumb1Only())
2677 : break;
2678 261 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2679 55 : unsigned RHSV = C->getZExtValue();
2680 55 : if (!RHSV) break;
2681 55 : if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2682 : unsigned ShImm = Log2_32(RHSV-1);
2683 : if (ShImm >= 32)
2684 : break;
2685 0 : SDValue V = N->getOperand(0);
2686 : ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2687 0 : SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2688 0 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2689 0 : if (Subtarget->isThumb()) {
2690 0 : SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2691 0 : CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2692 : return;
2693 : } else {
2694 0 : SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2695 0 : Reg0 };
2696 0 : CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2697 : return;
2698 : }
2699 : }
2700 55 : if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2701 : unsigned ShImm = Log2_32(RHSV+1);
2702 : if (ShImm >= 32)
2703 : break;
2704 0 : SDValue V = N->getOperand(0);
2705 : ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2706 0 : SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2707 0 : SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2708 0 : if (Subtarget->isThumb()) {
2709 0 : SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2710 0 : CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2711 : return;
2712 : } else {
2713 0 : SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2714 0 : Reg0 };
2715 0 : CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2716 : return;
2717 : }
2718 : }
2719 : }
2720 : break;
2721 1527 : case ISD::AND: {
2722 : // Check for unsigned bitfield extract
2723 1527 : if (tryV6T2BitfieldExtractOp(N, false))
2724 90 : return;
2725 :
2726 : // If an immediate is used in an AND node, it is possible that the immediate
2727 : // can be more optimally materialized when negated. If this is the case we
2728 : // can negate the immediate and use a BIC instead.
2729 1468 : auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2730 867 : if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2731 528 : uint32_t Imm = (uint32_t) N1C->getZExtValue();
2732 :
2733 : // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2734 : // immediate can be negated and fit in the immediate operand of
2735 : // a t2BIC, don't do any manual transform here as this can be
2736 : // handled by the generic ISel machinery.
2737 : bool PreferImmediateEncoding =
2738 528 : Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2739 271 : if (!PreferImmediateEncoding &&
2740 271 : ConstantMaterializationCost(Imm) >
2741 271 : ConstantMaterializationCost(~Imm)) {
2742 : // The current immediate costs more to materialize than a negated
2743 : // immediate, so negate the immediate and use a BIC.
2744 : SDValue NewImm =
2745 87 : CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2746 : // If the new constant didn't exist before, reposition it in the topological
2747 : // ordering so it is just before N. Otherwise, don't touch its location.
2748 29 : if (NewImm->getNodeId() == -1)
2749 19 : CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2750 :
2751 29 : if (!Subtarget->hasThumb2()) {
2752 44 : SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2753 44 : N->getOperand(0), NewImm, getAL(CurDAG, dl),
2754 22 : CurDAG->getRegister(0, MVT::i32)};
2755 44 : ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2756 : return;
2757 : } else {
2758 14 : SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2759 14 : CurDAG->getRegister(0, MVT::i32),
2760 21 : CurDAG->getRegister(0, MVT::i32)};
2761 7 : ReplaceNode(N,
2762 21 : CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2763 : return;
2764 : }
2765 : }
2766 : }
2767 :
2768 : // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2769 : // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2770 : // are entirely contributed by c2 and lower 16-bits are entirely contributed
2771 : // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2772 : // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2773 2878 : EVT VT = N->getValueType(0);
2774 : if (VT != MVT::i32)
2775 : break;
2776 795 : unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2777 1385 : ? ARM::t2MOVTi16
2778 852 : : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2779 : if (!Opc)
2780 : break;
2781 694 : SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2782 : N1C = dyn_cast<ConstantSDNode>(N1);
2783 : if (!N1C)
2784 : break;
2785 577 : if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2786 15 : SDValue N2 = N0.getOperand(1);
2787 : ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2788 : if (!N2C)
2789 : break;
2790 6 : unsigned N1CVal = N1C->getZExtValue();
2791 6 : unsigned N2CVal = N2C->getZExtValue();
2792 6 : if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2793 6 : (N1CVal & 0xffffU) == 0xffffU &&
2794 6 : (N2CVal & 0xffffU) == 0x0U) {
2795 2 : SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2796 2 : dl, MVT::i32);
2797 : SDValue Ops[] = { N0.getOperand(0), Imm16,
2798 2 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2799 4 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2800 : return;
2801 : }
2802 : }
2803 :
2804 : break;
2805 : }
2806 12 : case ARMISD::UMAAL: {
2807 12 : unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2808 12 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2809 : N->getOperand(2), N->getOperand(3),
2810 12 : getAL(CurDAG, dl),
2811 12 : CurDAG->getRegister(0, MVT::i32) };
2812 24 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2813 : return;
2814 : }
2815 31 : case ARMISD::UMLAL:{
2816 31 : if (Subtarget->isThumb()) {
2817 15 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2818 15 : N->getOperand(3), getAL(CurDAG, dl),
2819 15 : CurDAG->getRegister(0, MVT::i32)};
2820 15 : ReplaceNode(
2821 45 : N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2822 : return;
2823 : }else{
2824 16 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2825 16 : N->getOperand(3), getAL(CurDAG, dl),
2826 32 : CurDAG->getRegister(0, MVT::i32),
2827 48 : CurDAG->getRegister(0, MVT::i32) };
2828 28 : ReplaceNode(N, CurDAG->getMachineNode(
2829 16 : Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2830 : MVT::i32, MVT::i32, Ops));
2831 : return;
2832 : }
2833 : }
2834 37 : case ARMISD::SMLAL:{
2835 37 : if (Subtarget->isThumb()) {
2836 22 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2837 22 : N->getOperand(3), getAL(CurDAG, dl),
2838 22 : CurDAG->getRegister(0, MVT::i32)};
2839 22 : ReplaceNode(
2840 66 : N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2841 : return;
2842 : }else{
2843 15 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2844 15 : N->getOperand(3), getAL(CurDAG, dl),
2845 30 : CurDAG->getRegister(0, MVT::i32),
2846 45 : CurDAG->getRegister(0, MVT::i32) };
2847 24 : ReplaceNode(N, CurDAG->getMachineNode(
2848 15 : Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2849 : MVT::i32, MVT::i32, Ops));
2850 : return;
2851 : }
2852 : }
2853 149 : case ARMISD::SUBE: {
2854 149 : if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2855 : break;
2856 : // Look for a pattern to match SMMLS
2857 : // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2858 82 : if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2859 94 : N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2860 : !SDValue(N, 1).use_empty())
2861 : break;
2862 :
2863 7 : if (Subtarget->isThumb())
2864 : assert(Subtarget->hasThumb2() &&
2865 : "This pattern should not be generated for Thumb");
2866 :
2867 7 : SDValue SmulLoHi = N->getOperand(1);
2868 7 : SDValue Subc = N->getOperand(2);
2869 : auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2870 :
2871 14 : if (!Zero || Zero->getZExtValue() != 0 ||
2872 : Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2873 : N->getOperand(1) != SmulLoHi.getValue(1) ||
2874 : N->getOperand(2) != Subc.getValue(1))
2875 : break;
2876 :
2877 : unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2878 : SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2879 7 : N->getOperand(0), getAL(CurDAG, dl),
2880 7 : CurDAG->getRegister(0, MVT::i32) };
2881 14 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2882 7 : return;
2883 : }
2884 13389 : case ISD::LOAD: {
2885 13389 : if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2886 4214 : if (tryT2IndexedLoad(N))
2887 : return;
2888 9175 : } else if (Subtarget->isThumb()) {
2889 1711 : if (tryT1IndexedLoad(N))
2890 : return;
2891 7464 : } else if (tryARMIndexedLoad(N))
2892 : return;
2893 : // Other cases are autogenerated.
2894 : break;
2895 : }
2896 1962 : case ARMISD::BRCOND: {
2897 : // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2898 : // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2899 : // Pattern complexity = 6 cost = 1 size = 0
2900 :
2901 : // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2902 : // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2903 : // Pattern complexity = 6 cost = 1 size = 0
2904 :
2905 : // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2906 : // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2907 : // Pattern complexity = 6 cost = 1 size = 0
2908 :
2909 1962 : unsigned Opc = Subtarget->isThumb() ?
2910 1230 : ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2911 1962 : SDValue Chain = N->getOperand(0);
2912 1962 : SDValue N1 = N->getOperand(1);
2913 1962 : SDValue N2 = N->getOperand(2);
2914 1962 : SDValue N3 = N->getOperand(3);
2915 1962 : SDValue InFlag = N->getOperand(4);
2916 : assert(N1.getOpcode() == ISD::BasicBlock);
2917 : assert(N2.getOpcode() == ISD::Constant);
2918 : assert(N3.getOpcode() == ISD::Register);
2919 :
2920 3924 : unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2921 :
2922 1962 : if (InFlag.getOpcode() == ARMISD::CMPZ) {
2923 : bool SwitchEQNEToPLMI;
2924 1602 : SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2925 1602 : InFlag = N->getOperand(4);
2926 :
2927 1602 : if (SwitchEQNEToPLMI) {
2928 11 : switch ((ARMCC::CondCodes)CC) {
2929 0 : default: llvm_unreachable("CMPZ must be either NE or EQ!");
2930 : case ARMCC::NE:
2931 : CC = (unsigned)ARMCC::MI;
2932 : break;
2933 9 : case ARMCC::EQ:
2934 : CC = (unsigned)ARMCC::PL;
2935 9 : break;
2936 : }
2937 : }
2938 : }
2939 :
2940 1962 : SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
2941 1962 : SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2942 3924 : SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2943 : MVT::Glue, Ops);
2944 : Chain = SDValue(ResNode, 0);
2945 1962 : if (N->getNumValues() == 2) {
2946 : InFlag = SDValue(ResNode, 1);
2947 106 : ReplaceUses(SDValue(N, 1), InFlag);
2948 : }
2949 1962 : ReplaceUses(SDValue(N, 0),
2950 : SDValue(Chain.getNode(), Chain.getResNo()));
2951 1962 : CurDAG->RemoveDeadNode(N);
2952 : return;
2953 : }
2954 :
2955 2151 : case ARMISD::CMPZ: {
2956 : // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2957 : // This allows us to avoid materializing the expensive negative constant.
2958 : // The CMPZ #0 is useless and will be peepholed away but we need to keep it
2959 : // for its glue output.
2960 2151 : SDValue X = N->getOperand(0);
2961 2151 : auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
2962 3900 : if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
2963 27 : int64_t Addend = -C->getSExtValue();
2964 :
2965 : SDNode *Add = nullptr;
2966 : // ADDS can be better than CMN if the immediate fits in a
2967 : // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
2968 : // Outside that range we can just use a CMN which is 32-bit but has a
2969 : // 12-bit immediate range.
2970 27 : if (Addend < 1<<8) {
2971 : if (Subtarget->isThumb2()) {
2972 42 : SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2973 21 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2974 63 : CurDAG->getRegister(0, MVT::i32) };
2975 42 : Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
2976 : } else {
2977 2 : unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
2978 4 : SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
2979 4 : CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2980 2 : getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2981 4 : Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2982 : }
2983 : }
2984 23 : if (Add) {
2985 46 : SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
2986 46 : CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
2987 : }
2988 : }
2989 : // Other cases are autogenerated.
2990 : break;
2991 : }
2992 :
2993 1757 : case ARMISD::CMOV: {
2994 1757 : SDValue InFlag = N->getOperand(4);
2995 :
2996 1757 : if (InFlag.getOpcode() == ARMISD::CMPZ) {
2997 : bool SwitchEQNEToPLMI;
2998 549 : SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2999 :
3000 549 : if (SwitchEQNEToPLMI) {
3001 4 : SDValue ARMcc = N->getOperand(2);
3002 : ARMCC::CondCodes CC =
3003 4 : (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3004 :
3005 4 : switch (CC) {
3006 0 : default: llvm_unreachable("CMPZ must be either NE or EQ!");
3007 : case ARMCC::NE:
3008 : CC = ARMCC::MI;
3009 : break;
3010 4 : case ARMCC::EQ:
3011 : CC = ARMCC::PL;
3012 4 : break;
3013 : }
3014 8 : SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3015 4 : SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3016 4 : N->getOperand(3), N->getOperand(4)};
3017 12 : CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3018 : }
3019 :
3020 : }
3021 : // Other cases are autogenerated.
3022 : break;
3023 : }
3024 :
3025 28 : case ARMISD::VZIP: {
3026 : unsigned Opc = 0;
3027 56 : EVT VT = N->getValueType(0);
3028 : switch (VT.getSimpleVT().SimpleTy) {
3029 : default: return;
3030 : case MVT::v8i8: Opc = ARM::VZIPd8; break;
3031 8 : case MVT::v4f16:
3032 8 : case MVT::v4i16: Opc = ARM::VZIPd16; break;
3033 0 : case MVT::v2f32:
3034 : // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3035 0 : case MVT::v2i32: Opc = ARM::VTRNd32; break;
3036 4 : case MVT::v16i8: Opc = ARM::VZIPq8; break;
3037 3 : case MVT::v8f16:
3038 3 : case MVT::v8i16: Opc = ARM::VZIPq16; break;
3039 8 : case MVT::v4f32:
3040 8 : case MVT::v4i32: Opc = ARM::VZIPq32; break;
3041 : }
3042 28 : SDValue Pred = getAL(CurDAG, dl);
3043 56 : SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3044 28 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3045 56 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3046 28 : return;
3047 : }
3048 50 : case ARMISD::VUZP: {
3049 : unsigned Opc = 0;
3050 100 : EVT VT = N->getValueType(0);
3051 : switch (VT.getSimpleVT().SimpleTy) {
3052 : default: return;
3053 : case MVT::v8i8: Opc = ARM::VUZPd8; break;
3054 15 : case MVT::v4f16:
3055 15 : case MVT::v4i16: Opc = ARM::VUZPd16; break;
3056 0 : case MVT::v2f32:
3057 : // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3058 0 : case MVT::v2i32: Opc = ARM::VTRNd32; break;
3059 2 : case MVT::v16i8: Opc = ARM::VUZPq8; break;
3060 7 : case MVT::v8f16:
3061 7 : case MVT::v8i16: Opc = ARM::VUZPq16; break;
3062 8 : case MVT::v4f32:
3063 8 : case MVT::v4i32: Opc = ARM::VUZPq32; break;
3064 : }
3065 50 : SDValue Pred = getAL(CurDAG, dl);
3066 100 : SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3067 50 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3068 100 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3069 50 : return;
3070 : }
3071 33 : case ARMISD::VTRN: {
3072 : unsigned Opc = 0;
3073 66 : EVT VT = N->getValueType(0);
3074 : switch (VT.getSimpleVT().SimpleTy) {
3075 : default: return;
3076 : case MVT::v8i8: Opc = ARM::VTRNd8; break;
3077 8 : case MVT::v4f16:
3078 8 : case MVT::v4i16: Opc = ARM::VTRNd16; break;
3079 5 : case MVT::v2f32:
3080 5 : case MVT::v2i32: Opc = ARM::VTRNd32; break;
3081 2 : case MVT::v16i8: Opc = ARM::VTRNq8; break;
3082 7 : case MVT::v8f16:
3083 7 : case MVT::v8i16: Opc = ARM::VTRNq16; break;
3084 5 : case MVT::v4f32:
3085 5 : case MVT::v4i32: Opc = ARM::VTRNq32; break;
3086 : }
3087 33 : SDValue Pred = getAL(CurDAG, dl);
3088 66 : SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3089 33 : SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3090 66 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3091 33 : return;
3092 : }
3093 408 : case ARMISD::BUILD_VECTOR: {
3094 408 : EVT VecVT = N->getValueType(0);
3095 408 : EVT EltVT = VecVT.getVectorElementType();
3096 : unsigned NumElts = VecVT.getVectorNumElements();
3097 : if (EltVT == MVT::f64) {
3098 : assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3099 319 : ReplaceNode(
3100 319 : N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3101 319 : return;
3102 : }
3103 : assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3104 89 : if (NumElts == 2) {
3105 32 : ReplaceNode(
3106 32 : N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3107 32 : return;
3108 : }
3109 : assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3110 57 : ReplaceNode(N,
3111 : createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3112 57 : N->getOperand(2), N->getOperand(3)));
3113 57 : return;
3114 : }
3115 :
3116 28 : case ARMISD::VLD1DUP: {
3117 : static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3118 : ARM::VLD1DUPd32 };
3119 : static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3120 : ARM::VLD1DUPq32 };
3121 28 : SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3122 28 : return;
3123 : }
3124 :
3125 4 : case ARMISD::VLD2DUP: {
3126 : static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3127 : ARM::VLD2DUPd32 };
3128 4 : SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3129 4 : return;
3130 : }
3131 :
3132 1 : case ARMISD::VLD3DUP: {
3133 : static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3134 : ARM::VLD3DUPd16Pseudo,
3135 : ARM::VLD3DUPd32Pseudo };
3136 1 : SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3137 1 : return;
3138 : }
3139 :
3140 1 : case ARMISD::VLD4DUP: {
3141 : static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3142 : ARM::VLD4DUPd16Pseudo,
3143 : ARM::VLD4DUPd32Pseudo };
3144 1 : SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3145 1 : return;
3146 : }
3147 :
3148 6 : case ARMISD::VLD1DUP_UPD: {
3149 : static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3150 : ARM::VLD1DUPd16wb_fixed,
3151 : ARM::VLD1DUPd32wb_fixed };
3152 : static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3153 : ARM::VLD1DUPq16wb_fixed,
3154 : ARM::VLD1DUPq32wb_fixed };
3155 6 : SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3156 6 : return;
3157 : }
3158 :
3159 4 : case ARMISD::VLD2DUP_UPD: {
3160 : static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3161 : ARM::VLD2DUPd16wb_fixed,
3162 : ARM::VLD2DUPd32wb_fixed };
3163 4 : SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3164 4 : return;
3165 : }
3166 :
3167 1 : case ARMISD::VLD3DUP_UPD: {
3168 : static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3169 : ARM::VLD3DUPd16Pseudo_UPD,
3170 : ARM::VLD3DUPd32Pseudo_UPD };
3171 1 : SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3172 1 : return;
3173 : }
3174 :
3175 1 : case ARMISD::VLD4DUP_UPD: {
3176 : static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3177 : ARM::VLD4DUPd16Pseudo_UPD,
3178 : ARM::VLD4DUPd32Pseudo_UPD };
3179 1 : SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3180 1 : return;
3181 : }
3182 :
3183 112 : case ARMISD::VLD1_UPD: {
3184 : static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3185 : ARM::VLD1d16wb_fixed,
3186 : ARM::VLD1d32wb_fixed,
3187 : ARM::VLD1d64wb_fixed };
3188 : static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3189 : ARM::VLD1q16wb_fixed,
3190 : ARM::VLD1q32wb_fixed,
3191 : ARM::VLD1q64wb_fixed };
3192 112 : SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3193 112 : return;
3194 : }
3195 :
3196 7 : case ARMISD::VLD2_UPD: {
3197 : static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3198 : ARM::VLD2d16wb_fixed,
3199 : ARM::VLD2d32wb_fixed,
3200 : ARM::VLD1q64wb_fixed};
3201 : static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3202 : ARM::VLD2q16PseudoWB_fixed,
3203 : ARM::VLD2q32PseudoWB_fixed };
3204 7 : SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3205 7 : return;
3206 : }
3207 :
3208 9 : case ARMISD::VLD3_UPD: {
3209 : static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3210 : ARM::VLD3d16Pseudo_UPD,
3211 : ARM::VLD3d32Pseudo_UPD,
3212 : ARM::VLD1d64TPseudoWB_fixed};
3213 : static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3214 : ARM::VLD3q16Pseudo_UPD,
3215 : ARM::VLD3q32Pseudo_UPD };
3216 : static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3217 : ARM::VLD3q16oddPseudo_UPD,
3218 : ARM::VLD3q32oddPseudo_UPD };
3219 9 : SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3220 9 : return;
3221 : }
3222 :
3223 4 : case ARMISD::VLD4_UPD: {
3224 : static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3225 : ARM::VLD4d16Pseudo_UPD,
3226 : ARM::VLD4d32Pseudo_UPD,
3227 : ARM::VLD1d64QPseudoWB_fixed};
3228 : static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3229 : ARM::VLD4q16Pseudo_UPD,
3230 : ARM::VLD4q32Pseudo_UPD };
3231 : static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3232 : ARM::VLD4q16oddPseudo_UPD,
3233 : ARM::VLD4q32oddPseudo_UPD };
3234 4 : SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3235 4 : return;
3236 : }
3237 :
3238 4 : case ARMISD::VLD2LN_UPD: {
3239 : static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3240 : ARM::VLD2LNd16Pseudo_UPD,
3241 : ARM::VLD2LNd32Pseudo_UPD };
3242 : static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3243 : ARM::VLD2LNq32Pseudo_UPD };
3244 4 : SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3245 4 : return;
3246 : }
3247 :
3248 2 : case ARMISD::VLD3LN_UPD: {
3249 : static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3250 : ARM::VLD3LNd16Pseudo_UPD,
3251 : ARM::VLD3LNd32Pseudo_UPD };
3252 : static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3253 : ARM::VLD3LNq32Pseudo_UPD };
3254 2 : SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3255 2 : return;
3256 : }
3257 :
3258 2 : case ARMISD::VLD4LN_UPD: {
3259 : static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3260 : ARM::VLD4LNd16Pseudo_UPD,
3261 : ARM::VLD4LNd32Pseudo_UPD };
3262 : static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3263 : ARM::VLD4LNq32Pseudo_UPD };
3264 2 : SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3265 2 : return;
3266 : }
3267 :
3268 138 : case ARMISD::VST1_UPD: {
3269 : static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3270 : ARM::VST1d16wb_fixed,
3271 : ARM::VST1d32wb_fixed,
3272 : ARM::VST1d64wb_fixed };
3273 : static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3274 : ARM::VST1q16wb_fixed,
3275 : ARM::VST1q32wb_fixed,
3276 : ARM::VST1q64wb_fixed };
3277 138 : SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3278 138 : return;
3279 : }
3280 :
3281 5 : case ARMISD::VST2_UPD: {
3282 : static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3283 : ARM::VST2d16wb_fixed,
3284 : ARM::VST2d32wb_fixed,
3285 : ARM::VST1q64wb_fixed};
3286 : static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3287 : ARM::VST2q16PseudoWB_fixed,
3288 : ARM::VST2q32PseudoWB_fixed };
3289 5 : SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3290 5 : return;
3291 : }
3292 :
3293 4 : case ARMISD::VST3_UPD: {
3294 : static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3295 : ARM::VST3d16Pseudo_UPD,
3296 : ARM::VST3d32Pseudo_UPD,
3297 : ARM::VST1d64TPseudoWB_fixed};
3298 : static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3299 : ARM::VST3q16Pseudo_UPD,
3300 : ARM::VST3q32Pseudo_UPD };
3301 : static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3302 : ARM::VST3q16oddPseudo_UPD,
3303 : ARM::VST3q32oddPseudo_UPD };
3304 4 : SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3305 4 : return;
3306 : }
3307 :
3308 4 : case ARMISD::VST4_UPD: {
3309 : static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3310 : ARM::VST4d16Pseudo_UPD,
3311 : ARM::VST4d32Pseudo_UPD,
3312 : ARM::VST1d64QPseudoWB_fixed};
3313 : static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3314 : ARM::VST4q16Pseudo_UPD,
3315 : ARM::VST4q32Pseudo_UPD };
3316 : static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3317 : ARM::VST4q16oddPseudo_UPD,
3318 : ARM::VST4q32oddPseudo_UPD };
3319 4 : SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3320 4 : return;
3321 : }
3322 :
3323 1 : case ARMISD::VST2LN_UPD: {
3324 : static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3325 : ARM::VST2LNd16Pseudo_UPD,
3326 : ARM::VST2LNd32Pseudo_UPD };
3327 : static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3328 : ARM::VST2LNq32Pseudo_UPD };
3329 1 : SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3330 1 : return;
3331 : }
3332 :
3333 1 : case ARMISD::VST3LN_UPD: {
3334 : static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3335 : ARM::VST3LNd16Pseudo_UPD,
3336 : ARM::VST3LNd32Pseudo_UPD };
3337 : static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3338 : ARM::VST3LNq32Pseudo_UPD };
3339 1 : SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3340 1 : return;
3341 : }
3342 :
3343 1 : case ARMISD::VST4LN_UPD: {
3344 : static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3345 : ARM::VST4LNd16Pseudo_UPD,
3346 : ARM::VST4LNd32Pseudo_UPD };
3347 : static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3348 : ARM::VST4LNq32Pseudo_UPD };
3349 1 : SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3350 1 : return;
3351 : }
3352 :
3353 1658 : case ISD::INTRINSIC_VOID:
3354 : case ISD::INTRINSIC_W_CHAIN: {
3355 4974 : unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3356 : switch (IntNo) {
3357 : default:
3358 : break;
3359 :
3360 : case Intrinsic::arm_mrrc:
3361 : case Intrinsic::arm_mrrc2: {
3362 : SDLoc dl(N);
3363 4 : SDValue Chain = N->getOperand(0);
3364 : unsigned Opc;
3365 :
3366 4 : if (Subtarget->isThumb())
3367 2 : Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3368 : else
3369 2 : Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3370 :
3371 : SmallVector<SDValue, 5> Ops;
3372 8 : Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3373 12 : Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3374 12 : Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3375 :
3376 : // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3377 : // instruction will always be '1111' but it is possible in assembly language to specify
3378 : // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3379 4 : if (Opc != ARM::MRRC2) {
3380 3 : Ops.push_back(getAL(CurDAG, dl));
3381 6 : Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3382 : }
3383 :
3384 4 : Ops.push_back(Chain);
3385 :
3386 : // Writes to two registers.
3387 4 : const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3388 :
3389 8 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3390 : return;
3391 : }
3392 : case Intrinsic::arm_ldaexd:
3393 : case Intrinsic::arm_ldrexd: {
3394 : SDLoc dl(N);
3395 127 : SDValue Chain = N->getOperand(0);
3396 127 : SDValue MemAddr = N->getOperand(2);
3397 127 : bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3398 :
3399 : bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3400 127 : unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3401 : : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3402 :
3403 : // arm_ldrexd returns a i64 value in {i32, i32}
3404 : std::vector<EVT> ResTys;
3405 127 : if (isThumb) {
3406 58 : ResTys.push_back(MVT::i32);
3407 58 : ResTys.push_back(MVT::i32);
3408 : } else
3409 69 : ResTys.push_back(MVT::Untyped);
3410 127 : ResTys.push_back(MVT::Other);
3411 :
3412 : // Place arguments in the right order.
3413 127 : SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3414 127 : CurDAG->getRegister(0, MVT::i32), Chain};
3415 254 : SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3416 : // Transfer memoperands.
3417 127 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3418 254 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3419 :
3420 : // Remap uses.
3421 127 : SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3422 127 : if (!SDValue(N, 0).use_empty()) {
3423 : SDValue Result;
3424 109 : if (isThumb)
3425 : Result = SDValue(Ld, 0);
3426 : else {
3427 : SDValue SubRegIdx =
3428 56 : CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3429 112 : SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3430 : dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3431 : Result = SDValue(ResNode,0);
3432 : }
3433 218 : ReplaceUses(SDValue(N, 0), Result);
3434 : }
3435 127 : if (!SDValue(N, 1).use_empty()) {
3436 : SDValue Result;
3437 108 : if (isThumb)
3438 : Result = SDValue(Ld, 1);
3439 : else {
3440 : SDValue SubRegIdx =
3441 55 : CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3442 110 : SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3443 : dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3444 : Result = SDValue(ResNode,0);
3445 : }
3446 216 : ReplaceUses(SDValue(N, 1), Result);
3447 : }
3448 127 : ReplaceUses(SDValue(N, 2), OutChain);
3449 127 : CurDAG->RemoveDeadNode(N);
3450 : return;
3451 : }
3452 : case Intrinsic::arm_stlexd:
3453 : case Intrinsic::arm_strexd: {
3454 : SDLoc dl(N);
3455 109 : SDValue Chain = N->getOperand(0);
3456 109 : SDValue Val0 = N->getOperand(2);
3457 109 : SDValue Val1 = N->getOperand(3);
3458 109 : SDValue MemAddr = N->getOperand(4);
3459 :
3460 : // Store exclusive double return a i32 value which is the return status
3461 : // of the issued store.
3462 109 : const EVT ResTys[] = {MVT::i32, MVT::Other};
3463 :
3464 109 : bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3465 : // Place arguments in the right order.
3466 : SmallVector<SDValue, 7> Ops;
3467 109 : if (isThumb) {
3468 51 : Ops.push_back(Val0);
3469 51 : Ops.push_back(Val1);
3470 : } else
3471 : // arm_strexd uses GPRPair.
3472 58 : Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3473 109 : Ops.push_back(MemAddr);
3474 109 : Ops.push_back(getAL(CurDAG, dl));
3475 218 : Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3476 109 : Ops.push_back(Chain);
3477 :
3478 : bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3479 109 : unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3480 : : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3481 :
3482 218 : SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3483 : // Transfer memoperands.
3484 109 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3485 218 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3486 :
3487 109 : ReplaceNode(N, St);
3488 : return;
3489 : }
3490 :
3491 68 : case Intrinsic::arm_neon_vld1: {
3492 : static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3493 : ARM::VLD1d32, ARM::VLD1d64 };
3494 : static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3495 : ARM::VLD1q32, ARM::VLD1q64};
3496 68 : SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3497 68 : return;
3498 : }
3499 :
3500 8 : case Intrinsic::arm_neon_vld1x2: {
3501 : static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3502 : ARM::VLD1q32, ARM::VLD1q64 };
3503 : static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3504 : ARM::VLD1d16QPseudo,
3505 : ARM::VLD1d32QPseudo,
3506 : ARM::VLD1d64QPseudo };
3507 8 : SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3508 8 : return;
3509 : }
3510 :
3511 8 : case Intrinsic::arm_neon_vld1x3: {
3512 : static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3513 : ARM::VLD1d16TPseudo,
3514 : ARM::VLD1d32TPseudo,
3515 : ARM::VLD1d64TPseudo };
3516 : static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3517 : ARM::VLD1q16LowTPseudo_UPD,
3518 : ARM::VLD1q32LowTPseudo_UPD,
3519 : ARM::VLD1q64LowTPseudo_UPD };
3520 : static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3521 : ARM::VLD1q16HighTPseudo,
3522 : ARM::VLD1q32HighTPseudo,
3523 : ARM::VLD1q64HighTPseudo };
3524 8 : SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3525 8 : return;
3526 : }
3527 :
3528 8 : case Intrinsic::arm_neon_vld1x4: {
3529 : static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3530 : ARM::VLD1d16QPseudo,
3531 : ARM::VLD1d32QPseudo,
3532 : ARM::VLD1d64QPseudo };
3533 : static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3534 : ARM::VLD1q16LowQPseudo_UPD,
3535 : ARM::VLD1q32LowQPseudo_UPD,
3536 : ARM::VLD1q64LowQPseudo_UPD };
3537 : static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3538 : ARM::VLD1q16HighQPseudo,
3539 : ARM::VLD1q32HighQPseudo,
3540 : ARM::VLD1q64HighQPseudo };
3541 8 : SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3542 8 : return;
3543 : }
3544 :
3545 16 : case Intrinsic::arm_neon_vld2: {
3546 : static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3547 : ARM::VLD2d32, ARM::VLD1q64 };
3548 : static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3549 : ARM::VLD2q32Pseudo };
3550 16 : SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3551 16 : return;
3552 : }
3553 :
3554 37 : case Intrinsic::arm_neon_vld3: {
3555 : static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3556 : ARM::VLD3d16Pseudo,
3557 : ARM::VLD3d32Pseudo,
3558 : ARM::VLD1d64TPseudo };
3559 : static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3560 : ARM::VLD3q16Pseudo_UPD,
3561 : ARM::VLD3q32Pseudo_UPD };
3562 : static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3563 : ARM::VLD3q16oddPseudo,
3564 : ARM::VLD3q32oddPseudo };
3565 37 : SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3566 37 : return;
3567 : }
3568 :
3569 11 : case Intrinsic::arm_neon_vld4: {
3570 : static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3571 : ARM::VLD4d16Pseudo,
3572 : ARM::VLD4d32Pseudo,
3573 : ARM::VLD1d64QPseudo };
3574 : static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3575 : ARM::VLD4q16Pseudo_UPD,
3576 : ARM::VLD4q32Pseudo_UPD };
3577 : static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3578 : ARM::VLD4q16oddPseudo,
3579 : ARM::VLD4q32oddPseudo };
3580 11 : SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3581 11 : return;
3582 : }
3583 :
3584 8 : case Intrinsic::arm_neon_vld2dup: {
3585 : static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3586 : ARM::VLD2DUPd32, ARM::VLD1q64 };
3587 : static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3588 : ARM::VLD2DUPq16EvenPseudo,
3589 : ARM::VLD2DUPq32EvenPseudo };
3590 : static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3591 : ARM::VLD2DUPq16OddPseudo,
3592 : ARM::VLD2DUPq32OddPseudo };
3593 8 : SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3594 : DOpcodes, QOpcodes0, QOpcodes1);
3595 8 : return;
3596 : }
3597 :
3598 8 : case Intrinsic::arm_neon_vld3dup: {
3599 : static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3600 : ARM::VLD3DUPd16Pseudo,
3601 : ARM::VLD3DUPd32Pseudo,
3602 : ARM::VLD1d64TPseudo };
3603 : static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3604 : ARM::VLD3DUPq16EvenPseudo,
3605 : ARM::VLD3DUPq32EvenPseudo };
3606 : static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3607 : ARM::VLD3DUPq16OddPseudo,
3608 : ARM::VLD3DUPq32OddPseudo };
3609 8 : SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3610 : DOpcodes, QOpcodes0, QOpcodes1);
3611 8 : return;
3612 : }
3613 :
3614 8 : case Intrinsic::arm_neon_vld4dup: {
3615 : static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3616 : ARM::VLD4DUPd16Pseudo,
3617 : ARM::VLD4DUPd32Pseudo,
3618 : ARM::VLD1d64QPseudo };
3619 : static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3620 : ARM::VLD4DUPq16EvenPseudo,
3621 : ARM::VLD4DUPq32EvenPseudo };
3622 : static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3623 : ARM::VLD4DUPq16OddPseudo,
3624 : ARM::VLD4DUPq32OddPseudo };
3625 8 : SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3626 : DOpcodes, QOpcodes0, QOpcodes1);
3627 8 : return;
3628 : }
3629 :
3630 19 : case Intrinsic::arm_neon_vld2lane: {
3631 : static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3632 : ARM::VLD2LNd16Pseudo,
3633 : ARM::VLD2LNd32Pseudo };
3634 : static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3635 : ARM::VLD2LNq32Pseudo };
3636 19 : SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3637 19 : return;
3638 : }
3639 :
3640 18 : case Intrinsic::arm_neon_vld3lane: {
3641 : static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3642 : ARM::VLD3LNd16Pseudo,
3643 : ARM::VLD3LNd32Pseudo };
3644 : static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3645 : ARM::VLD3LNq32Pseudo };
3646 18 : SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3647 18 : return;
3648 : }
3649 :
3650 15 : case Intrinsic::arm_neon_vld4lane: {
3651 : static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3652 : ARM::VLD4LNd16Pseudo,
3653 : ARM::VLD4LNd32Pseudo };
3654 : static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3655 : ARM::VLD4LNq32Pseudo };
3656 15 : SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3657 15 : return;
3658 : }
3659 :
3660 37 : case Intrinsic::arm_neon_vst1: {
3661 : static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3662 : ARM::VST1d32, ARM::VST1d64 };
3663 : static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3664 : ARM::VST1q32, ARM::VST1q64 };
3665 37 : SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3666 37 : return;
3667 : }
3668 :
3669 8 : case Intrinsic::arm_neon_vst1x2: {
3670 : static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3671 : ARM::VST1q32, ARM::VST1q64 };
3672 : static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3673 : ARM::VST1d16QPseudo,
3674 : ARM::VST1d32QPseudo,
3675 : ARM::VST1d64QPseudo };
3676 8 : SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3677 8 : return;
3678 : }
3679 :
3680 8 : case Intrinsic::arm_neon_vst1x3: {
3681 : static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3682 : ARM::VST1d16TPseudo,
3683 : ARM::VST1d32TPseudo,
3684 : ARM::VST1d64TPseudo };
3685 : static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3686 : ARM::VST1q16LowTPseudo_UPD,
3687 : ARM::VST1q32LowTPseudo_UPD,
3688 : ARM::VST1q64LowTPseudo_UPD };
3689 : static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3690 : ARM::VST1q16HighTPseudo,
3691 : ARM::VST1q32HighTPseudo,
3692 : ARM::VST1q64HighTPseudo };
3693 8 : SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3694 8 : return;
3695 : }
3696 :
3697 8 : case Intrinsic::arm_neon_vst1x4: {
3698 : static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3699 : ARM::VST1d16QPseudo,
3700 : ARM::VST1d32QPseudo,
3701 : ARM::VST1d64QPseudo };
3702 : static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3703 : ARM::VST1q16LowQPseudo_UPD,
3704 : ARM::VST1q32LowQPseudo_UPD,
3705 : ARM::VST1q64LowQPseudo_UPD };
3706 : static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3707 : ARM::VST1q16HighQPseudo,
3708 : ARM::VST1q32HighQPseudo,
3709 : ARM::VST1q64HighQPseudo };
3710 8 : SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3711 8 : return;
3712 : }
3713 :
3714 20 : case Intrinsic::arm_neon_vst2: {
3715 : static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3716 : ARM::VST2d32, ARM::VST1q64 };
3717 : static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3718 : ARM::VST2q32Pseudo };
3719 20 : SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3720 20 : return;
3721 : }
3722 :
3723 19 : case Intrinsic::arm_neon_vst3: {
3724 : static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3725 : ARM::VST3d16Pseudo,
3726 : ARM::VST3d32Pseudo,
3727 : ARM::VST1d64TPseudo };
3728 : static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3729 : ARM::VST3q16Pseudo_UPD,
3730 : ARM::VST3q32Pseudo_UPD };
3731 : static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3732 : ARM::VST3q16oddPseudo,
3733 : ARM::VST3q32oddPseudo };
3734 19 : SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3735 19 : return;
3736 : }
3737 :
3738 19 : case Intrinsic::arm_neon_vst4: {
3739 : static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3740 : ARM::VST4d16Pseudo,
3741 : ARM::VST4d32Pseudo,
3742 : ARM::VST1d64QPseudo };
3743 : static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3744 : ARM::VST4q16Pseudo_UPD,
3745 : ARM::VST4q32Pseudo_UPD };
3746 : static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3747 : ARM::VST4q16oddPseudo,
3748 : ARM::VST4q32oddPseudo };
3749 19 : SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3750 19 : return;
3751 : }
3752 :
3753 8 : case Intrinsic::arm_neon_vst2lane: {
3754 : static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3755 : ARM::VST2LNd16Pseudo,
3756 : ARM::VST2LNd32Pseudo };
3757 : static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3758 : ARM::VST2LNq32Pseudo };
3759 8 : SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3760 8 : return;
3761 : }
3762 :
3763 8 : case Intrinsic::arm_neon_vst3lane: {
3764 : static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3765 : ARM::VST3LNd16Pseudo,
3766 : ARM::VST3LNd32Pseudo };
3767 : static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3768 : ARM::VST3LNq32Pseudo };
3769 8 : SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3770 8 : return;
3771 : }
3772 :
3773 8 : case Intrinsic::arm_neon_vst4lane: {
3774 : static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3775 : ARM::VST4LNd16Pseudo,
3776 : ARM::VST4LNd32Pseudo };
3777 : static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3778 : ARM::VST4LNq32Pseudo };
3779 8 : SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3780 8 : return;
3781 : }
3782 : }
3783 : break;
3784 : }
3785 :
3786 6 : case ISD::ATOMIC_CMP_SWAP:
3787 6 : SelectCMP_SWAP(N);
3788 6 : return;
3789 : }
3790 :
3791 : SelectCode(N);
3792 : }
3793 :
3794 : // Inspect a register string of the form
3795 : // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3796 : // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3797 : // and obtain the integer operands from them, adding these operands to the
3798 : // provided vector.
3799 230 : static void getIntOperandsFromRegisterString(StringRef RegString,
3800 : SelectionDAG *CurDAG,
3801 : const SDLoc &DL,
3802 : std::vector<SDValue> &Ops) {
3803 : SmallVector<StringRef, 5> Fields;
3804 230 : RegString.split(Fields, ':');
3805 :
3806 460 : if (Fields.size() > 1) {
3807 : bool AllIntFields = true;
3808 :
3809 40 : for (StringRef Field : Fields) {
3810 : // Need to trim out leading 'cp' characters and get the integer field.
3811 : unsigned IntField;
3812 32 : AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3813 32 : Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3814 : }
3815 :
3816 : assert(AllIntFields &&
3817 : "Unexpected non-integer value in special register string.");
3818 : }
3819 230 : }
3820 :
3821 : // Maps a Banked Register string to its mask value. The mask value returned is
3822 : // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3823 : // mask operand, which expresses which register is to be used, e.g. r8, and in
3824 : // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3825 : // was invalid.
3826 222 : static inline int getBankedRegisterMask(StringRef RegString) {
3827 222 : auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3828 222 : if (!TheReg)
3829 : return -1;
3830 0 : return TheReg->Encoding;
3831 : }
3832 :
3833 : // The flags here are common to those allowed for apsr in the A class cores and
3834 : // those allowed for the special registers in the M class cores. Returns a
3835 : // value representing which flags were present, -1 if invalid.
3836 12 : static inline int getMClassFlagsMask(StringRef Flags) {
3837 12 : return StringSwitch<int>(Flags)
3838 12 : .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3839 : // correct when flags are not permitted
3840 12 : .Case("g", 0x1)
3841 12 : .Case("nzcvq", 0x2)
3842 12 : .Case("nzcvqg", 0x3)
3843 12 : .Default(-1);
3844 : }
3845 :
3846 : // Maps MClass special registers string to its value for use in the
3847 : // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3848 : // Returns -1 to signify that the string was invalid.
3849 182 : static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3850 182 : auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3851 : const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3852 363 : if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3853 4 : return -1;
3854 178 : return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3855 : }
3856 :
3857 24 : static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3858 : // The mask operand contains the special register (R Bit) in bit 4, whether
3859 : // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3860 : // bits 3-0 contains the fields to be accessed in the special register, set by
3861 : // the flags provided with the register.
3862 : int Mask = 0;
3863 : if (Reg == "apsr") {
3864 : // The flags permitted for apsr are the same flags that are allowed in
3865 : // M class registers. We get the flag value and then shift the flags into
3866 : // the correct place to combine with the mask.
3867 12 : Mask = getMClassFlagsMask(Flags);
3868 12 : if (Mask == -1)
3869 : return -1;
3870 12 : return Mask << 2;
3871 : }
3872 :
3873 : if (Reg != "cpsr" && Reg != "spsr") {
3874 : return -1;
3875 : }
3876 :
3877 : // This is the same as if the flags were "fc"
3878 10 : if (Flags.empty() || Flags == "all")
3879 : return Mask | 0x9;
3880 :
3881 : // Inspect the supplied flags string and set the bits in the mask for
3882 : // the relevant and valid flags allowed for cpsr and spsr.
3883 26 : for (char Flag : Flags) {
3884 : int FlagVal;
3885 16 : switch (Flag) {
3886 : case 'c':
3887 : FlagVal = 0x1;
3888 : break;
3889 : case 'x':
3890 : FlagVal = 0x2;
3891 : break;
3892 : case 's':
3893 : FlagVal = 0x4;
3894 : break;
3895 : case 'f':
3896 : FlagVal = 0x8;
3897 : break;
3898 : default:
3899 : FlagVal = 0;
3900 : }
3901 :
3902 : // This avoids allowing strings where the same flag bit appears twice.
3903 16 : if (!FlagVal || (Mask & FlagVal))
3904 : return -1;
3905 16 : Mask |= FlagVal;
3906 : }
3907 :
3908 : // If the register is spsr then we need to set the R bit.
3909 : if (Reg == "spsr")
3910 5 : Mask |= 0x10;
3911 :
3912 : return Mask;
3913 : }
3914 :
3915 : // Lower the read_register intrinsic to ARM specific DAG nodes
3916 : // using the supplied metadata string to select the instruction node to use
3917 : // and the registers/masks to construct as operands for the node.
3918 95 : bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3919 95 : const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3920 95 : const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3921 95 : bool IsThumb2 = Subtarget->isThumb2();
3922 : SDLoc DL(N);
3923 :
3924 : std::vector<SDValue> Ops;
3925 95 : getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3926 :
3927 95 : if (!Ops.empty()) {
3928 : // If the special register string was constructed of fields (as defined
3929 : // in the ACLE) then need to lower to MRC node (32 bit) or
3930 : // MRRC node(64 bit), we can make the distinction based on the number of
3931 : // operands we have.
3932 : unsigned Opcode;
3933 : SmallVector<EVT, 3> ResTypes;
3934 4 : if (Ops.size() == 5){
3935 2 : Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3936 : ResTypes.append({ MVT::i32, MVT::Other });
3937 : } else {
3938 : assert(Ops.size() == 3 &&
3939 : "Invalid number of fields in special register string.");
3940 2 : Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3941 : ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3942 : }
3943 :
3944 4 : Ops.push_back(getAL(CurDAG, DL));
3945 8 : Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3946 8 : Ops.push_back(N->getOperand(0));
3947 8 : ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3948 : return true;
3949 : }
3950 :
3951 91 : std::string SpecialReg = RegString->getString().lower();
3952 :
3953 91 : int BankedReg = getBankedRegisterMask(SpecialReg);
3954 91 : if (BankedReg != -1) {
3955 0 : Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3956 0 : getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3957 0 : N->getOperand(0) };
3958 0 : ReplaceNode(
3959 0 : N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3960 : DL, MVT::i32, MVT::Other, Ops));
3961 0 : return true;
3962 : }
3963 :
3964 : // The VFP registers are read by creating SelectionDAG nodes with opcodes
3965 : // corresponding to the register that is being read from. So we switch on the
3966 : // string to find which opcode we need to use.
3967 89 : unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3968 : .Case("fpscr", ARM::VMRS)
3969 : .Case("fpexc", ARM::VMRS_FPEXC)
3970 : .Case("fpsid", ARM::VMRS_FPSID)
3971 : .Case("mvfr0", ARM::VMRS_MVFR0)
3972 : .Case("mvfr1", ARM::VMRS_MVFR1)
3973 : .Case("mvfr2", ARM::VMRS_MVFR2)
3974 : .Case("fpinst", ARM::VMRS_FPINST)
3975 : .Case("fpinst2", ARM::VMRS_FPINST2)
3976 : .Default(0);
3977 :
3978 : // If an opcode was found then we can lower the read to a VFP instruction.
3979 2 : if (Opcode) {
3980 2 : if (!Subtarget->hasVFP2())
3981 : return false;
3982 2 : if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3983 : return false;
3984 :
3985 2 : Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3986 2 : N->getOperand(0) };
3987 2 : ReplaceNode(N,
3988 4 : CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
3989 2 : return true;
3990 : }
3991 :
3992 : // If the target is M Class then need to validate that the register string
3993 : // is an acceptable value, so check that a mask can be constructed from the
3994 : // string.
3995 89 : if (Subtarget->isMClass()) {
3996 77 : int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
3997 77 : if (SYSmValue == -1)
3998 : return false;
3999 :
4000 74 : SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4001 74 : getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4002 74 : N->getOperand(0) };
4003 74 : ReplaceNode(
4004 148 : N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4005 74 : return true;
4006 : }
4007 :
4008 : // Here we know the target is not M Class so we need to check if it is one
4009 : // of the remaining possible values which are apsr, cpsr or spsr.
4010 22 : if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4011 4 : Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4012 4 : N->getOperand(0) };
4013 12 : ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4014 : DL, MVT::i32, MVT::Other, Ops));
4015 4 : return true;
4016 : }
4017 :
4018 8 : if (SpecialReg == "spsr") {
4019 1 : Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4020 1 : N->getOperand(0) };
4021 1 : ReplaceNode(
4022 3 : N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4023 : MVT::i32, MVT::Other, Ops));
4024 1 : return true;
4025 : }
4026 :
4027 : return false;
4028 : }
4029 :
4030 : // Lower the write_register intrinsic to ARM specific DAG nodes
4031 : // using the supplied metadata string to select the instruction node to use
4032 : // and the registers/masks to use in the nodes
4033 135 : bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4034 135 : const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4035 135 : const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4036 135 : bool IsThumb2 = Subtarget->isThumb2();
4037 : SDLoc DL(N);
4038 :
4039 : std::vector<SDValue> Ops;
4040 135 : getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4041 :
4042 135 : if (!Ops.empty()) {
4043 : // If the special register string was constructed of fields (as defined
4044 : // in the ACLE) then need to lower to MCR node (32 bit) or
4045 : // MCRR node(64 bit), we can make the distinction based on the number of
4046 : // operands we have.
4047 : unsigned Opcode;
4048 4 : if (Ops.size() == 5) {
4049 2 : Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4050 4 : Ops.insert(Ops.begin()+2, N->getOperand(2));
4051 : } else {
4052 : assert(Ops.size() == 3 &&
4053 : "Invalid number of fields in special register string.");
4054 2 : Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4055 2 : SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4056 : Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4057 : }
4058 :
4059 4 : Ops.push_back(getAL(CurDAG, DL));
4060 8 : Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4061 8 : Ops.push_back(N->getOperand(0));
4062 :
4063 8 : ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4064 4 : return true;
4065 : }
4066 :
4067 131 : std::string SpecialReg = RegString->getString().lower();
4068 131 : int BankedReg = getBankedRegisterMask(SpecialReg);
4069 131 : if (BankedReg != -1) {
4070 0 : Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4071 0 : getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4072 0 : N->getOperand(0) };
4073 0 : ReplaceNode(
4074 0 : N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4075 : DL, MVT::Other, Ops));
4076 0 : return true;
4077 : }
4078 :
4079 : // The VFP registers are written to by creating SelectionDAG nodes with
4080 : // opcodes corresponding to the register that is being written. So we switch
4081 : // on the string to find which opcode we need to use.
4082 129 : unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4083 : .Case("fpscr", ARM::VMSR)
4084 : .Case("fpexc", ARM::VMSR_FPEXC)
4085 : .Case("fpsid", ARM::VMSR_FPSID)
4086 : .Case("fpinst", ARM::VMSR_FPINST)
4087 : .Case("fpinst2", ARM::VMSR_FPINST2)
4088 : .Default(0);
4089 :
4090 2 : if (Opcode) {
4091 2 : if (!Subtarget->hasVFP2())
4092 : return false;
4093 2 : Ops = { N->getOperand(2), getAL(CurDAG, DL),
4094 4 : CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4095 4 : ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4096 2 : return true;
4097 : }
4098 :
4099 : std::pair<StringRef, StringRef> Fields;
4100 258 : Fields = StringRef(SpecialReg).rsplit('_');
4101 : std::string Reg = Fields.first.str();
4102 129 : StringRef Flags = Fields.second;
4103 :
4104 : // If the target was M Class then need to validate the special register value
4105 : // and retrieve the mask for use in the instruction node.
4106 129 : if (Subtarget->isMClass()) {
4107 105 : int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4108 105 : if (SYSmValue == -1)
4109 : return false;
4110 :
4111 104 : SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4112 208 : N->getOperand(2), getAL(CurDAG, DL),
4113 104 : CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4114 208 : ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4115 104 : return true;
4116 : }
4117 :
4118 : // We then check to see if a valid mask can be constructed for one of the
4119 : // register string values permitted for the A and R class cores. These values
4120 : // are apsr, spsr and cpsr; these are also valid on older cores.
4121 24 : int Mask = getARClassRegisterMask(Reg, Flags);
4122 24 : if (Mask != -1) {
4123 22 : Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4124 22 : getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4125 22 : N->getOperand(0) };
4126 62 : ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4127 : DL, MVT::Other, Ops));
4128 22 : return true;
4129 : }
4130 :
4131 : return false;
4132 : }
4133 :
4134 3566 : bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4135 : std::vector<SDValue> AsmNodeOperands;
4136 : unsigned Flag, Kind;
4137 : bool Changed = false;
4138 3566 : unsigned NumOps = N->getNumOperands();
4139 :
4140 : // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4141 : // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4142 : // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4143 : // respectively. Since there is no constraint to explicitly specify a
4144 : // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4145 : // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4146 : // them into a GPRPair.
4147 :
4148 : SDLoc dl(N);
4149 147 : SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4150 147 : : SDValue(nullptr,0);
4151 :
4152 : SmallVector<bool, 8> OpChanged;
4153 : // Glue node will be appended late.
4154 26623 : for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4155 22910 : SDValue op = N->getOperand(i);
4156 22910 : AsmNodeOperands.push_back(op);
4157 :
4158 22910 : if (i < InlineAsm::Op_FirstOperand)
4159 22795 : continue;
4160 :
4161 8646 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4162 8810 : Flag = C->getZExtValue();
4163 : Kind = InlineAsm::getKind(Flag);
4164 : }
4165 : else
4166 : continue;
4167 :
4168 : // Immediate operands to inline asm in the SelectionDAG are modeled with
4169 : // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4170 : // the second is a constant with the value of the immediate. If we get here
4171 : // and we have a Kind_Imm, skip the next operand, and continue.
4172 4405 : if (Kind == InlineAsm::Kind_Imm) {
4173 40 : SDValue op = N->getOperand(++i);
4174 40 : AsmNodeOperands.push_back(op);
4175 : continue;
4176 : }
4177 :
4178 : unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4179 4365 : if (NumRegs)
4180 4365 : OpChanged.push_back(false);
4181 :
4182 : unsigned DefIdx = 0;
4183 : bool IsTiedToChangedOp = false;
4184 : // If it's a use that is tied with a previous def, it has no
4185 : // reg class constraint.
4186 4365 : if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4187 26 : IsTiedToChangedOp = OpChanged[DefIdx];
4188 :
4189 : // Memory operands to inline asm in the SelectionDAG are modeled with two
4190 : // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4191 : // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4192 : // it doesn't get misinterpreted), and continue. We do this here because
4193 : // it's important to update the OpChanged array correctly before moving on.
4194 4365 : if (Kind == InlineAsm::Kind_Mem) {
4195 25 : SDValue op = N->getOperand(++i);
4196 25 : AsmNodeOperands.push_back(op);
4197 : continue;
4198 : }
4199 :
4200 4340 : if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4201 4340 : && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4202 : continue;
4203 :
4204 : unsigned RC;
4205 : bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4206 476 : if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4207 319 : || NumRegs != 2)
4208 : continue;
4209 :
4210 : assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4211 115 : SDValue V0 = N->getOperand(i+1);
4212 115 : SDValue V1 = N->getOperand(i+2);
4213 115 : unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4214 115 : unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4215 115 : SDValue PairedReg;
4216 115 : MachineRegisterInfo &MRI = MF->getRegInfo();
4217 :
4218 115 : if (Kind == InlineAsm::Kind_RegDef ||
4219 : Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4220 : // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4221 : // the original GPRs.
4222 :
4223 56 : unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4224 112 : PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4225 : SDValue Chain = SDValue(N,0);
4226 :
4227 : SDNode *GU = N->getGluedUser();
4228 56 : SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4229 56 : Chain.getValue(1));
4230 :
4231 : // Extract values from a GPRPair reg and copy to the original GPR reg.
4232 56 : SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4233 56 : RegCopy);
4234 56 : SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4235 56 : RegCopy);
4236 56 : SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4237 56 : RegCopy.getValue(1));
4238 56 : SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4239 :
4240 : // Update the original glue user.
4241 112 : std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4242 56 : Ops.push_back(T1.getValue(1));
4243 112 : CurDAG->UpdateNodeOperands(GU, Ops);
4244 : }
4245 : else {
4246 : // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4247 : // GPRPair and then pass the GPRPair to the inline asm.
4248 59 : SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4249 :
4250 : // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4251 59 : SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4252 59 : Chain.getValue(1));
4253 59 : SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4254 118 : T0.getValue(1));
4255 59 : SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4256 :
4257 : // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4258 : // i32 VRs of inline asm with it.
4259 59 : unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4260 118 : PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4261 59 : Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4262 :
4263 59 : AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4264 59 : Glue = Chain.getValue(1);
4265 : }
4266 :
4267 : Changed = true;
4268 :
4269 115 : if(PairedReg.getNode()) {
4270 345 : OpChanged[OpChanged.size() -1 ] = true;
4271 : Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4272 115 : if (IsTiedToChangedOp)
4273 : Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4274 : else
4275 : Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4276 : // Replace the current flag.
4277 115 : AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4278 115 : Flag, dl, MVT::i32);
4279 : // Add the new register node and skip the original two GPRs.
4280 115 : AsmNodeOperands.push_back(PairedReg);
4281 : // Skip the next two GPRs.
4282 : i += 2;
4283 : }
4284 : }
4285 :
4286 3566 : if (Glue.getNode())
4287 147 : AsmNodeOperands.push_back(Glue);
4288 3566 : if (!Changed)
4289 : return false;
4290 :
4291 59 : SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4292 118 : CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4293 : New->setNodeId(-1);
4294 59 : ReplaceNode(N, New.getNode());
4295 59 : return true;
4296 : }
4297 :
4298 :
4299 18 : bool ARMDAGToDAGISel::
4300 : SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4301 : std::vector<SDValue> &OutOps) {
4302 : switch(ConstraintID) {
4303 0 : default:
4304 0 : llvm_unreachable("Unexpected asm memory constraint");
4305 18 : case InlineAsm::Constraint_i:
4306 : // FIXME: It seems strange that 'i' is needed here since it's supposed to
4307 : // be an immediate and not a memory constraint.
4308 : LLVM_FALLTHROUGH;
4309 : case InlineAsm::Constraint_m:
4310 : case InlineAsm::Constraint_o:
4311 : case InlineAsm::Constraint_Q:
4312 : case InlineAsm::Constraint_Um:
4313 : case InlineAsm::Constraint_Un:
4314 : case InlineAsm::Constraint_Uq:
4315 : case InlineAsm::Constraint_Us:
4316 : case InlineAsm::Constraint_Ut:
4317 : case InlineAsm::Constraint_Uv:
4318 : case InlineAsm::Constraint_Uy:
4319 : // Require the address to be in a register. That is safe for all ARM
4320 : // variants and it is hard to do anything much smarter without knowing
4321 : // how the operand is used.
4322 18 : OutOps.push_back(Op);
4323 : return false;
4324 : }
4325 : return true;
4326 : }
4327 :
4328 : /// createARMISelDag - This pass converts a legalized DAG into a
4329 : /// ARM-specific DAG, ready for instruction scheduling.
4330 : ///
4331 2801 : FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4332 : CodeGenOpt::Level OptLevel) {
4333 2801 : return new ARMDAGToDAGISel(TM, OptLevel);
4334 : }
|