LLVM 19.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Constants.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/IR/IntrinsicsARM.h"
33#include "llvm/IR/LLVMContext.h"
35#include "llvm/Support/Debug.h"
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "arm-isel"
43#define PASS_NAME "ARM Instruction Selection"
44
45static cl::opt<bool>
46DisableShifterOp("disable-shifter-op", cl::Hidden,
47 cl::desc("Disable isel of shifter-op"),
48 cl::init(false));
49
50//===--------------------------------------------------------------------===//
51/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52/// instructions for SelectionDAG operations.
53///
54namespace {
55
56class ARMDAGToDAGISel : public SelectionDAGISel {
57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58 /// make the right decision when generating code for different targets.
59 const ARMSubtarget *Subtarget;
60
61public:
62 static char ID;
63
64 ARMDAGToDAGISel() = delete;
65
66 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
67 : SelectionDAGISel(ID, tm, OptLevel) {}
68
69 bool runOnMachineFunction(MachineFunction &MF) override {
70 // Reset the subtarget each time through.
71 Subtarget = &MF.getSubtarget<ARMSubtarget>();
73 return true;
74 }
75
76 void PreprocessISelDAG() override;
77
78 /// getI32Imm - Return a target constant of type i32 with the specified
79 /// value.
80 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
81 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
82 }
83
84 void Select(SDNode *N) override;
85
86 /// Return true as some complex patterns, like those that call
87 /// canExtractShiftFromMul can modify the DAG inplace.
88 bool ComplexPatternFuncMutatesDAG() const override { return true; }
89
90 bool hasNoVMLxHazardUse(SDNode *N) const;
91 bool isShifterOpProfitable(const SDValue &Shift,
92 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
93 bool SelectRegShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, SDValue &C,
95 bool CheckProfitability = true);
96 bool SelectImmShifterOperand(SDValue N, SDValue &A,
97 SDValue &B, bool CheckProfitability = true);
98 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
99 SDValue &C) {
100 // Don't apply the profitability check
101 return SelectRegShifterOperand(N, A, B, C, false);
102 }
103 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
104 // Don't apply the profitability check
105 return SelectImmShifterOperand(N, A, B, false);
106 }
107 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
108 if (!N.hasOneUse())
109 return false;
110 return SelectImmShifterOperand(N, A, B, false);
111 }
112
113 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
114
115 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
116 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
117
118 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
119 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
120 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
121 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
122 return true;
123 }
124
125 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
126 SDValue &Offset, SDValue &Opc);
127 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
128 SDValue &Offset, SDValue &Opc);
129 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
130 SDValue &Offset, SDValue &Opc);
131 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
132 bool SelectAddrMode3(SDValue N, SDValue &Base,
133 SDValue &Offset, SDValue &Opc);
134 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
135 SDValue &Offset, SDValue &Opc);
136 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
137 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
138 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
139 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
140 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
141
142 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
143
144 // Thumb Addressing Modes:
145 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
146 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
147 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
150 SDValue &OffImm);
151 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
152 SDValue &OffImm);
153 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
154 SDValue &OffImm);
155 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158
159 // Thumb 2 Addressing Modes:
160 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
161 template <unsigned Shift>
162 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
163 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
164 SDValue &OffImm);
165 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
166 SDValue &OffImm);
167 template <unsigned Shift>
168 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
169 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
170 unsigned Shift);
171 template <unsigned Shift>
172 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
173 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
174 SDValue &OffReg, SDValue &ShImm);
175 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
176
177 template<int Min, int Max>
178 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
179
180 inline bool is_so_imm(unsigned Imm) const {
181 return ARM_AM::getSOImmVal(Imm) != -1;
182 }
183
184 inline bool is_so_imm_not(unsigned Imm) const {
185 return ARM_AM::getSOImmVal(~Imm) != -1;
186 }
187
188 inline bool is_t2_so_imm(unsigned Imm) const {
189 return ARM_AM::getT2SOImmVal(Imm) != -1;
190 }
191
192 inline bool is_t2_so_imm_not(unsigned Imm) const {
193 return ARM_AM::getT2SOImmVal(~Imm) != -1;
194 }
195
196 // Include the pieces autogenerated from the target description.
197#include "ARMGenDAGISel.inc"
198
199private:
200 void transferMemOperands(SDNode *Src, SDNode *Dst);
201
202 /// Indexed (pre/post inc/dec) load matching code for ARM.
203 bool tryARMIndexedLoad(SDNode *N);
204 bool tryT1IndexedLoad(SDNode *N);
205 bool tryT2IndexedLoad(SDNode *N);
206 bool tryMVEIndexedLoad(SDNode *N);
207 bool tryFMULFixed(SDNode *N, SDLoc dl);
208 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
209 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
210 bool IsUnsigned,
211 bool FixedToFloat);
212
213 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
214 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
215 /// loads of D registers and even subregs and odd subregs of Q registers.
216 /// For NumVecs <= 2, QOpcodes1 is not used.
217 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
219 const uint16_t *QOpcodes1);
220
221 /// SelectVST - Select NEON store intrinsics. NumVecs should
222 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
223 /// stores of D registers and even subregs and odd subregs of Q registers.
224 /// For NumVecs <= 2, QOpcodes1 is not used.
225 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
226 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
227 const uint16_t *QOpcodes1);
228
229 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
230 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
231 /// load/store of D registers and Q registers.
232 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
233 unsigned NumVecs, const uint16_t *DOpcodes,
234 const uint16_t *QOpcodes);
235
236 /// Helper functions for setting up clusters of MVE predication operands.
237 template <typename SDValueVector>
238 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
239 SDValue PredicateMask);
240 template <typename SDValueVector>
241 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
242 SDValue PredicateMask, SDValue Inactive);
243
244 template <typename SDValueVector>
245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
246 template <typename SDValueVector>
247 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
248
249 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
250 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
251
252 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
253 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
254 bool HasSaturationOperand);
255
256 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
257 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
258 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
259
260 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
261 /// vector lanes.
262 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
263
264 /// Select long MVE vector reductions with two vector operands
265 /// Stride is the number of vector element widths the instruction can operate
266 /// on:
267 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
268 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
269 /// Stride is used when addressing the OpcodesS array which contains multiple
270 /// opcodes for each element width.
271 /// TySize is the index into the list of element types listed above
272 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
273 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
274 size_t Stride, size_t TySize);
275
276 /// Select a 64-bit MVE vector reduction with two vector operands
277 /// arm_mve_vmlldava_[predicated]
278 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
279 const uint16_t *OpcodesU);
280 /// Select a 72-bit MVE vector rounding reduction with two vector operands
281 /// int_arm_mve_vrmlldavha[_predicated]
282 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
283 const uint16_t *OpcodesU);
284
285 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
286 /// should be 2 or 4. The opcode array specifies the instructions
287 /// used for 8, 16 and 32-bit lane sizes respectively, and each
288 /// pointer points to a set of NumVecs sub-opcodes used for the
289 /// different stages (e.g. VLD20 versus VLD21) of each load family.
290 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
291 const uint16_t *const *Opcodes, bool HasWriteback);
292
293 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
294 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
295 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
296 bool Wrapping, bool Predicated);
297
298 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
299 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
300 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
301 /// the accumulator and the immediate operand, i.e. 0
302 /// for CX1*, 1 for CX2*, 2 for CX3*
303 /// \arg \c HasAccum whether the instruction has an accumulator operand
304 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
305 bool HasAccum);
306
307 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
308 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
309 /// for loading D registers.
310 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
311 unsigned NumVecs, const uint16_t *DOpcodes,
312 const uint16_t *QOpcodes0 = nullptr,
313 const uint16_t *QOpcodes1 = nullptr);
314
315 /// Try to select SBFX/UBFX instructions for ARM.
316 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
317
318 bool tryInsertVectorElt(SDNode *N);
319
320 // Select special operations if node forms integer ABS pattern
321 bool tryABSOp(SDNode *N);
322
323 bool tryReadRegister(SDNode *N);
324 bool tryWriteRegister(SDNode *N);
325
326 bool tryInlineAsm(SDNode *N);
327
328 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
329
330 void SelectCMP_SWAP(SDNode *N);
331
332 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
333 /// inline asm expressions.
335 InlineAsm::ConstraintCode ConstraintID,
336 std::vector<SDValue> &OutOps) override;
337
338 // Form pairs of consecutive R, S, D, or Q registers.
340 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
341 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
342 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
343
344 // Form sequences of 4 consecutive S, D, or Q registers.
345 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
347 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
348
349 // Get the alignment operand for a NEON VLD or VST instruction.
350 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
351 bool is64BitVector);
352
353 /// Checks if N is a multiplication by a constant where we can extract out a
354 /// power of two from the constant so that it can be used in a shift, but only
355 /// if it simplifies the materialization of the constant. Returns true if it
356 /// is, and assigns to PowerOfTwo the power of two that should be extracted
357 /// out and to NewMulConst the new constant to be multiplied by.
358 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
359 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
360
361 /// Replace N with M in CurDAG, in a way that also ensures that M gets
362 /// selected when N would have been selected.
363 void replaceDAGValue(const SDValue &N, SDValue M);
364};
365}
366
367char ARMDAGToDAGISel::ID = 0;
368
369INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
370
371/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
372/// operand. If so Imm will receive the 32-bit value.
373static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
374 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
375 Imm = N->getAsZExtVal();
376 return true;
377 }
378 return false;
379}
380
381// isInt32Immediate - This method tests to see if a constant operand.
382// If so Imm will receive the 32 bit value.
383static bool isInt32Immediate(SDValue N, unsigned &Imm) {
384 return isInt32Immediate(N.getNode(), Imm);
385}
386
387// isOpcWithIntImmediate - This method tests to see if the node is a specific
388// opcode and that it has a immediate integer right operand.
389// If so Imm will receive the 32 bit value.
390static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
391 return N->getOpcode() == Opc &&
392 isInt32Immediate(N->getOperand(1).getNode(), Imm);
393}
394
395/// Check whether a particular node is a constant value representable as
396/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
397///
398/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
399static bool isScaledConstantInRange(SDValue Node, int Scale,
400 int RangeMin, int RangeMax,
401 int &ScaledConstant) {
402 assert(Scale > 0 && "Invalid scale!");
403
404 // Check that this is a constant.
405 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
406 if (!C)
407 return false;
408
409 ScaledConstant = (int) C->getZExtValue();
410 if ((ScaledConstant % Scale) != 0)
411 return false;
412
413 ScaledConstant /= Scale;
414 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
415}
416
417void ARMDAGToDAGISel::PreprocessISelDAG() {
418 if (!Subtarget->hasV6T2Ops())
419 return;
420
421 bool isThumb2 = Subtarget->isThumb();
422 // We use make_early_inc_range to avoid invalidation issues.
423 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
424 if (N.getOpcode() != ISD::ADD)
425 continue;
426
427 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
428 // leading zeros, followed by consecutive set bits, followed by 1 or 2
429 // trailing zeros, e.g. 1020.
430 // Transform the expression to
431 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
432 // of trailing zeros of c2. The left shift would be folded as an shifter
433 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
434 // node (UBFX).
435
436 SDValue N0 = N.getOperand(0);
437 SDValue N1 = N.getOperand(1);
438 unsigned And_imm = 0;
439 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
440 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
441 std::swap(N0, N1);
442 }
443 if (!And_imm)
444 continue;
445
446 // Check if the AND mask is an immediate of the form: 000.....1111111100
447 unsigned TZ = llvm::countr_zero(And_imm);
448 if (TZ != 1 && TZ != 2)
449 // Be conservative here. Shifter operands aren't always free. e.g. On
450 // Swift, left shifter operand of 1 / 2 for free but others are not.
451 // e.g.
452 // ubfx r3, r1, #16, #8
453 // ldr.w r3, [r0, r3, lsl #2]
454 // vs.
455 // mov.w r9, #1020
456 // and.w r2, r9, r1, lsr #14
457 // ldr r2, [r0, r2]
458 continue;
459 And_imm >>= TZ;
460 if (And_imm & (And_imm + 1))
461 continue;
462
463 // Look for (and (srl X, c1), c2).
464 SDValue Srl = N1.getOperand(0);
465 unsigned Srl_imm = 0;
466 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
467 (Srl_imm <= 2))
468 continue;
469
470 // Make sure first operand is not a shifter operand which would prevent
471 // folding of the left shift.
472 SDValue CPTmp0;
473 SDValue CPTmp1;
474 SDValue CPTmp2;
475 if (isThumb2) {
476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
477 continue;
478 } else {
479 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
480 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
481 continue;
482 }
483
484 // Now make the transformation.
485 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
486 Srl.getOperand(0),
487 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
488 MVT::i32));
489 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
490 Srl,
491 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
492 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
493 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
494 CurDAG->UpdateNodeOperands(&N, N0, N1);
495 }
496}
497
498/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
499/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
500/// least on current ARM implementations) which should be avoidded.
501bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
502 if (OptLevel == CodeGenOptLevel::None)
503 return true;
504
505 if (!Subtarget->hasVMLxHazards())
506 return true;
507
508 if (!N->hasOneUse())
509 return false;
510
511 SDNode *Use = *N->use_begin();
512 if (Use->getOpcode() == ISD::CopyToReg)
513 return true;
514 if (Use->isMachineOpcode()) {
515 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
516 CurDAG->getSubtarget().getInstrInfo());
517
518 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
519 if (MCID.mayStore())
520 return true;
521 unsigned Opcode = MCID.getOpcode();
522 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
523 return true;
524 // vmlx feeding into another vmlx. We actually want to unfold
525 // the use later in the MLxExpansion pass. e.g.
526 // vmla
527 // vmla (stall 8 cycles)
528 //
529 // vmul (5 cycles)
530 // vadd (5 cycles)
531 // vmla
532 // This adds up to about 18 - 19 cycles.
533 //
534 // vmla
535 // vmul (stall 4 cycles)
536 // vadd adds up to about 14 cycles.
537 return TII->isFpMLxInstruction(Opcode);
538 }
539
540 return false;
541}
542
543bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
544 ARM_AM::ShiftOpc ShOpcVal,
545 unsigned ShAmt) {
546 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
547 return true;
548 if (Shift.hasOneUse())
549 return true;
550 // R << 2 is free.
551 return ShOpcVal == ARM_AM::lsl &&
552 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
553}
554
555bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
556 unsigned MaxShift,
557 unsigned &PowerOfTwo,
558 SDValue &NewMulConst) const {
559 assert(N.getOpcode() == ISD::MUL);
560 assert(MaxShift > 0);
561
562 // If the multiply is used in more than one place then changing the constant
563 // will make other uses incorrect, so don't.
564 if (!N.hasOneUse()) return false;
565 // Check if the multiply is by a constant
566 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
567 if (!MulConst) return false;
568 // If the constant is used in more than one place then modifying it will mean
569 // we need to materialize two constants instead of one, which is a bad idea.
570 if (!MulConst->hasOneUse()) return false;
571 unsigned MulConstVal = MulConst->getZExtValue();
572 if (MulConstVal == 0) return false;
573
574 // Find the largest power of 2 that MulConstVal is a multiple of
575 PowerOfTwo = MaxShift;
576 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
577 --PowerOfTwo;
578 if (PowerOfTwo == 0) return false;
579 }
580
581 // Only optimise if the new cost is better
582 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
583 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
584 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
585 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
586 return NewCost < OldCost;
587}
588
589void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
590 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
591 ReplaceUses(N, M);
592}
593
594bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
595 SDValue &BaseReg,
596 SDValue &Opc,
597 bool CheckProfitability) {
599 return false;
600
601 // If N is a multiply-by-constant and it's profitable to extract a shift and
602 // use it in a shifted operand do so.
603 if (N.getOpcode() == ISD::MUL) {
604 unsigned PowerOfTwo = 0;
605 SDValue NewMulConst;
606 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
607 HandleSDNode Handle(N);
608 SDLoc Loc(N);
609 replaceDAGValue(N.getOperand(1), NewMulConst);
610 BaseReg = Handle.getValue();
611 Opc = CurDAG->getTargetConstant(
612 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
613 return true;
614 }
615 }
616
617 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
618
619 // Don't match base register only case. That is matched to a separate
620 // lower complexity pattern with explicit register operand.
621 if (ShOpcVal == ARM_AM::no_shift) return false;
622
623 BaseReg = N.getOperand(0);
624 unsigned ShImmVal = 0;
625 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
626 if (!RHS) return false;
627 ShImmVal = RHS->getZExtValue() & 31;
628 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
629 SDLoc(N), MVT::i32);
630 return true;
631}
632
633bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
634 SDValue &BaseReg,
635 SDValue &ShReg,
636 SDValue &Opc,
637 bool CheckProfitability) {
639 return false;
640
641 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
642
643 // Don't match base register only case. That is matched to a separate
644 // lower complexity pattern with explicit register operand.
645 if (ShOpcVal == ARM_AM::no_shift) return false;
646
647 BaseReg = N.getOperand(0);
648 unsigned ShImmVal = 0;
649 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
650 if (RHS) return false;
651
652 ShReg = N.getOperand(1);
653 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
654 return false;
655 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
656 SDLoc(N), MVT::i32);
657 return true;
658}
659
660// Determine whether an ISD::OR's operands are suitable to turn the operation
661// into an addition, which often has more compact encodings.
662bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
663 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
664 Out = N;
665 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
666}
667
668
669bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
670 SDValue &Base,
671 SDValue &OffImm) {
672 // Match simple R + imm12 operands.
673
674 // Base only.
675 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
676 !CurDAG->isBaseWithConstantOffset(N)) {
677 if (N.getOpcode() == ISD::FrameIndex) {
678 // Match frame index.
679 int FI = cast<FrameIndexSDNode>(N)->getIndex();
680 Base = CurDAG->getTargetFrameIndex(
681 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
682 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
683 return true;
684 }
685
686 if (N.getOpcode() == ARMISD::Wrapper &&
687 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
688 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
689 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
690 Base = N.getOperand(0);
691 } else
692 Base = N;
693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
694 return true;
695 }
696
697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
698 int RHSC = (int)RHS->getSExtValue();
699 if (N.getOpcode() == ISD::SUB)
700 RHSC = -RHSC;
701
702 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
703 Base = N.getOperand(0);
704 if (Base.getOpcode() == ISD::FrameIndex) {
705 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706 Base = CurDAG->getTargetFrameIndex(
707 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
708 }
709 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
710 return true;
711 }
712 }
713
714 // Base only.
715 Base = N;
716 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
717 return true;
718}
719
720
721
722bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
723 SDValue &Opc) {
724 if (N.getOpcode() == ISD::MUL &&
725 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
726 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
727 // X * [3,5,9] -> X + X * [2,4,8] etc.
728 int RHSC = (int)RHS->getZExtValue();
729 if (RHSC & 1) {
730 RHSC = RHSC & ~1;
732 if (RHSC < 0) {
734 RHSC = - RHSC;
735 }
736 if (isPowerOf2_32(RHSC)) {
737 unsigned ShAmt = Log2_32(RHSC);
738 Base = Offset = N.getOperand(0);
739 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
741 SDLoc(N), MVT::i32);
742 return true;
743 }
744 }
745 }
746 }
747
748 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
749 // ISD::OR that is equivalent to an ISD::ADD.
750 !CurDAG->isBaseWithConstantOffset(N))
751 return false;
752
753 // Leave simple R +/- imm12 operands for LDRi12
754 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
755 int RHSC;
756 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
757 -0x1000+1, 0x1000, RHSC)) // 12 bits.
758 return false;
759 }
760
761 // Otherwise this is R +/- [possibly shifted] R.
763 ARM_AM::ShiftOpc ShOpcVal =
764 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
765 unsigned ShAmt = 0;
766
767 Base = N.getOperand(0);
768 Offset = N.getOperand(1);
769
770 if (ShOpcVal != ARM_AM::no_shift) {
771 // Check to see if the RHS of the shift is a constant, if not, we can't fold
772 // it.
773 if (ConstantSDNode *Sh =
774 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
775 ShAmt = Sh->getZExtValue();
776 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
777 Offset = N.getOperand(1).getOperand(0);
778 else {
779 ShAmt = 0;
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 } else {
783 ShOpcVal = ARM_AM::no_shift;
784 }
785 }
786
787 // Try matching (R shl C) + (R).
788 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
789 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
790 N.getOperand(0).hasOneUse())) {
791 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
792 if (ShOpcVal != ARM_AM::no_shift) {
793 // Check to see if the RHS of the shift is a constant, if not, we can't
794 // fold it.
795 if (ConstantSDNode *Sh =
796 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
797 ShAmt = Sh->getZExtValue();
798 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
799 Offset = N.getOperand(0).getOperand(0);
800 Base = N.getOperand(1);
801 } else {
802 ShAmt = 0;
803 ShOpcVal = ARM_AM::no_shift;
804 }
805 } else {
806 ShOpcVal = ARM_AM::no_shift;
807 }
808 }
809 }
810
811 // If Offset is a multiply-by-constant and it's profitable to extract a shift
812 // and use it in a shifted operand do so.
813 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
814 unsigned PowerOfTwo = 0;
815 SDValue NewMulConst;
816 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
817 HandleSDNode Handle(Offset);
818 replaceDAGValue(Offset.getOperand(1), NewMulConst);
819 Offset = Handle.getValue();
820 ShAmt = PowerOfTwo;
821 ShOpcVal = ARM_AM::lsl;
822 }
823 }
824
825 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
826 SDLoc(N), MVT::i32);
827 return true;
828}
829
830bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
831 SDValue &Offset, SDValue &Opc) {
832 unsigned Opcode = Op->getOpcode();
833 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
834 ? cast<LoadSDNode>(Op)->getAddressingMode()
835 : cast<StoreSDNode>(Op)->getAddressingMode();
838 int Val;
839 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
840 return false;
841
842 Offset = N;
843 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
844 unsigned ShAmt = 0;
845 if (ShOpcVal != ARM_AM::no_shift) {
846 // Check to see if the RHS of the shift is a constant, if not, we can't fold
847 // it.
848 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
849 ShAmt = Sh->getZExtValue();
850 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
851 Offset = N.getOperand(0);
852 else {
853 ShAmt = 0;
854 ShOpcVal = ARM_AM::no_shift;
855 }
856 } else {
857 ShOpcVal = ARM_AM::no_shift;
858 }
859 }
860
861 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
862 SDLoc(N), MVT::i32);
863 return true;
864}
865
866bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
867 SDValue &Offset, SDValue &Opc) {
868 unsigned Opcode = Op->getOpcode();
869 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870 ? cast<LoadSDNode>(Op)->getAddressingMode()
871 : cast<StoreSDNode>(Op)->getAddressingMode();
874 int Val;
875 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
876 if (AddSub == ARM_AM::sub) Val *= -1;
877 Offset = CurDAG->getRegister(0, MVT::i32);
878 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
879 return true;
880 }
881
882 return false;
883}
884
885
886bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
887 SDValue &Offset, SDValue &Opc) {
888 unsigned Opcode = Op->getOpcode();
889 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
890 ? cast<LoadSDNode>(Op)->getAddressingMode()
891 : cast<StoreSDNode>(Op)->getAddressingMode();
894 int Val;
895 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
896 Offset = CurDAG->getRegister(0, MVT::i32);
897 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
899 SDLoc(Op), MVT::i32);
900 return true;
901 }
902
903 return false;
904}
905
906bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
907 Base = N;
908 return true;
909}
910
911bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
913 SDValue &Opc) {
914 if (N.getOpcode() == ISD::SUB) {
915 // X - C is canonicalize to X + -C, no need to handle it here.
916 Base = N.getOperand(0);
917 Offset = N.getOperand(1);
918 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
919 MVT::i32);
920 return true;
921 }
922
923 if (!CurDAG->isBaseWithConstantOffset(N)) {
924 Base = N;
925 if (N.getOpcode() == ISD::FrameIndex) {
926 int FI = cast<FrameIndexSDNode>(N)->getIndex();
927 Base = CurDAG->getTargetFrameIndex(
928 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
929 }
930 Offset = CurDAG->getRegister(0, MVT::i32);
931 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
932 MVT::i32);
933 return true;
934 }
935
936 // If the RHS is +/- imm8, fold into addr mode.
937 int RHSC;
938 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
939 -256 + 1, 256, RHSC)) { // 8 bits.
940 Base = N.getOperand(0);
941 if (Base.getOpcode() == ISD::FrameIndex) {
942 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
943 Base = CurDAG->getTargetFrameIndex(
944 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
945 }
946 Offset = CurDAG->getRegister(0, MVT::i32);
947
949 if (RHSC < 0) {
951 RHSC = -RHSC;
952 }
953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
954 MVT::i32);
955 return true;
956 }
957
958 Base = N.getOperand(0);
959 Offset = N.getOperand(1);
960 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
961 MVT::i32);
962 return true;
963}
964
965bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
966 SDValue &Offset, SDValue &Opc) {
967 unsigned Opcode = Op->getOpcode();
968 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
969 ? cast<LoadSDNode>(Op)->getAddressingMode()
970 : cast<StoreSDNode>(Op)->getAddressingMode();
973 int Val;
974 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
975 Offset = CurDAG->getRegister(0, MVT::i32);
976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
977 MVT::i32);
978 return true;
979 }
980
981 Offset = N;
982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
983 MVT::i32);
984 return true;
985}
986
987bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
988 bool FP16) {
989 if (!CurDAG->isBaseWithConstantOffset(N)) {
990 Base = N;
991 if (N.getOpcode() == ISD::FrameIndex) {
992 int FI = cast<FrameIndexSDNode>(N)->getIndex();
993 Base = CurDAG->getTargetFrameIndex(
994 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
995 } else if (N.getOpcode() == ARMISD::Wrapper &&
996 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
997 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
998 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
999 Base = N.getOperand(0);
1000 }
1001 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1002 SDLoc(N), MVT::i32);
1003 return true;
1004 }
1005
1006 // If the RHS is +/- imm8, fold into addr mode.
1007 int RHSC;
1008 const int Scale = FP16 ? 2 : 4;
1009
1010 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1011 Base = N.getOperand(0);
1012 if (Base.getOpcode() == ISD::FrameIndex) {
1013 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014 Base = CurDAG->getTargetFrameIndex(
1015 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1016 }
1017
1019 if (RHSC < 0) {
1021 RHSC = -RHSC;
1022 }
1023
1024 if (FP16)
1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1026 SDLoc(N), MVT::i32);
1027 else
1028 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1029 SDLoc(N), MVT::i32);
1030
1031 return true;
1032 }
1033
1034 Base = N;
1035
1036 if (FP16)
1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1038 SDLoc(N), MVT::i32);
1039 else
1040 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1041 SDLoc(N), MVT::i32);
1042
1043 return true;
1044}
1045
1046bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1048 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1049}
1050
1051bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1053 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1054}
1055
1056bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1057 SDValue &Align) {
1058 Addr = N;
1059
1060 unsigned Alignment = 0;
1061
1062 MemSDNode *MemN = cast<MemSDNode>(Parent);
1063
1064 if (isa<LSBaseSDNode>(MemN) ||
1065 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1066 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1067 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1068 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1069 // The maximum alignment is equal to the memory size being referenced.
1070 llvm::Align MMOAlign = MemN->getAlign();
1071 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1072 if (MMOAlign.value() >= MemSize && MemSize > 1)
1073 Alignment = MemSize;
1074 } else {
1075 // All other uses of addrmode6 are for intrinsics. For now just record
1076 // the raw alignment value; it will be refined later based on the legal
1077 // alignment operands for the intrinsic.
1078 Alignment = MemN->getAlign().value();
1079 }
1080
1081 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1082 return true;
1083}
1084
1085bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1086 SDValue &Offset) {
1087 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1089 if (AM != ISD::POST_INC)
1090 return false;
1091 Offset = N;
1092 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1093 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1094 Offset = CurDAG->getRegister(0, MVT::i32);
1095 }
1096 return true;
1097}
1098
1099bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1100 SDValue &Offset, SDValue &Label) {
1101 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1102 Offset = N.getOperand(0);
1103 SDValue N1 = N.getOperand(1);
1104 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1105 return true;
1106 }
1107
1108 return false;
1109}
1110
1111
1112//===----------------------------------------------------------------------===//
1113// Thumb Addressing Modes
1114//===----------------------------------------------------------------------===//
1115
1117 // Negative numbers are difficult to materialise in thumb1. If we are
1118 // selecting the add of a negative, instead try to select ri with a zero
1119 // offset, so create the add node directly which will become a sub.
1120 if (N.getOpcode() != ISD::ADD)
1121 return false;
1122
1123 // Look for an imm which is not legal for ld/st, but is legal for sub.
1124 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1125 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1126
1127 return false;
1128}
1129
1130bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1131 SDValue &Offset) {
1132 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1133 if (!isNullConstant(N))
1134 return false;
1135
1136 Base = Offset = N;
1137 return true;
1138 }
1139
1140 Base = N.getOperand(0);
1141 Offset = N.getOperand(1);
1142 return true;
1143}
1144
1145bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1146 SDValue &Offset) {
1148 return false; // Select ri instead
1149 return SelectThumbAddrModeRRSext(N, Base, Offset);
1150}
1151
1152bool
1153ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1154 SDValue &Base, SDValue &OffImm) {
1156 Base = N;
1157 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1158 return true;
1159 }
1160
1161 if (!CurDAG->isBaseWithConstantOffset(N)) {
1162 if (N.getOpcode() == ISD::ADD) {
1163 return false; // We want to select register offset instead
1164 } else if (N.getOpcode() == ARMISD::Wrapper &&
1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1166 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1167 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1168 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1169 Base = N.getOperand(0);
1170 } else {
1171 Base = N;
1172 }
1173
1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175 return true;
1176 }
1177
1178 // If the RHS is + imm5 * scale, fold into addr mode.
1179 int RHSC;
1180 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1181 Base = N.getOperand(0);
1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1183 return true;
1184 }
1185
1186 // Offset is too large, so use register offset instead.
1187 return false;
1188}
1189
1190bool
1191ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1200}
1201
1202bool
1203ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1204 SDValue &OffImm) {
1205 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1206}
1207
1208bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1209 SDValue &Base, SDValue &OffImm) {
1210 if (N.getOpcode() == ISD::FrameIndex) {
1211 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1212 // Only multiples of 4 are allowed for the offset, so the frame object
1213 // alignment must be at least 4.
1214 MachineFrameInfo &MFI = MF->getFrameInfo();
1215 if (MFI.getObjectAlign(FI) < Align(4))
1216 MFI.setObjectAlignment(FI, Align(4));
1217 Base = CurDAG->getTargetFrameIndex(
1218 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1219 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1220 return true;
1221 }
1222
1223 if (!CurDAG->isBaseWithConstantOffset(N))
1224 return false;
1225
1226 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1227 // If the RHS is + imm8 * scale, fold into addr mode.
1228 int RHSC;
1229 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1230 Base = N.getOperand(0);
1231 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1232 // Make sure the offset is inside the object, or we might fail to
1233 // allocate an emergency spill slot. (An out-of-range access is UB, but
1234 // it could show up anyway.)
1235 MachineFrameInfo &MFI = MF->getFrameInfo();
1236 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1237 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1238 // indexed by the LHS must be 4-byte aligned.
1239 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1240 MFI.setObjectAlignment(FI, Align(4));
1241 if (MFI.getObjectAlign(FI) >= Align(4)) {
1242 Base = CurDAG->getTargetFrameIndex(
1243 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1244 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1245 return true;
1246 }
1247 }
1248 }
1249 }
1250
1251 return false;
1252}
1253
1254template <unsigned Shift>
1255bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1256 SDValue &OffImm) {
1257 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1258 int RHSC;
1259 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1260 RHSC)) {
1261 Base = N.getOperand(0);
1262 if (N.getOpcode() == ISD::SUB)
1263 RHSC = -RHSC;
1264 OffImm =
1265 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1266 return true;
1267 }
1268 }
1269
1270 // Base only.
1271 Base = N;
1272 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1273 return true;
1274}
1275
1276
1277//===----------------------------------------------------------------------===//
1278// Thumb 2 Addressing Modes
1279//===----------------------------------------------------------------------===//
1280
1281
1282bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1283 SDValue &Base, SDValue &OffImm) {
1284 // Match simple R + imm12 operands.
1285
1286 // Base only.
1287 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1288 !CurDAG->isBaseWithConstantOffset(N)) {
1289 if (N.getOpcode() == ISD::FrameIndex) {
1290 // Match frame index.
1291 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1292 Base = CurDAG->getTargetFrameIndex(
1293 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1294 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1295 return true;
1296 }
1297
1298 if (N.getOpcode() == ARMISD::Wrapper &&
1299 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1300 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1301 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1302 Base = N.getOperand(0);
1303 if (Base.getOpcode() == ISD::TargetConstantPool)
1304 return false; // We want to select t2LDRpci instead.
1305 } else
1306 Base = N;
1307 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1308 return true;
1309 }
1310
1311 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1312 if (SelectT2AddrModeImm8(N, Base, OffImm))
1313 // Let t2LDRi8 handle (R - imm8).
1314 return false;
1315
1316 int RHSC = (int)RHS->getZExtValue();
1317 if (N.getOpcode() == ISD::SUB)
1318 RHSC = -RHSC;
1319
1320 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1321 Base = N.getOperand(0);
1322 if (Base.getOpcode() == ISD::FrameIndex) {
1323 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1324 Base = CurDAG->getTargetFrameIndex(
1325 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1326 }
1327 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1328 return true;
1329 }
1330 }
1331
1332 // Base only.
1333 Base = N;
1334 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1335 return true;
1336}
1337
1338template <unsigned Shift>
1339bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1340 SDValue &OffImm) {
1341 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1342 int RHSC;
1343 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1344 Base = N.getOperand(0);
1345 if (Base.getOpcode() == ISD::FrameIndex) {
1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1347 Base = CurDAG->getTargetFrameIndex(
1348 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1349 }
1350
1351 if (N.getOpcode() == ISD::SUB)
1352 RHSC = -RHSC;
1353 OffImm =
1354 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1355 return true;
1356 }
1357 }
1358
1359 // Base only.
1360 Base = N;
1361 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1362 return true;
1363}
1364
1365bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1366 SDValue &Base, SDValue &OffImm) {
1367 // Match simple R - imm8 operands.
1368 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1369 !CurDAG->isBaseWithConstantOffset(N))
1370 return false;
1371
1372 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1373 int RHSC = (int)RHS->getSExtValue();
1374 if (N.getOpcode() == ISD::SUB)
1375 RHSC = -RHSC;
1376
1377 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1378 Base = N.getOperand(0);
1379 if (Base.getOpcode() == ISD::FrameIndex) {
1380 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1381 Base = CurDAG->getTargetFrameIndex(
1382 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1383 }
1384 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1385 return true;
1386 }
1387 }
1388
1389 return false;
1390}
1391
1392bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1393 SDValue &OffImm){
1394 unsigned Opcode = Op->getOpcode();
1395 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1396 ? cast<LoadSDNode>(Op)->getAddressingMode()
1397 : cast<StoreSDNode>(Op)->getAddressingMode();
1398 int RHSC;
1399 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1400 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1401 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1402 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1403 return true;
1404 }
1405
1406 return false;
1407}
1408
1409template <unsigned Shift>
1410bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1411 SDValue &OffImm) {
1412 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1413 int RHSC;
1414 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1415 RHSC)) {
1416 Base = N.getOperand(0);
1417 if (Base.getOpcode() == ISD::FrameIndex) {
1418 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1419 Base = CurDAG->getTargetFrameIndex(
1420 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1421 }
1422
1423 if (N.getOpcode() == ISD::SUB)
1424 RHSC = -RHSC;
1425 OffImm =
1426 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1427 return true;
1428 }
1429 }
1430
1431 // Base only.
1432 Base = N;
1433 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1434 return true;
1435}
1436
1437template <unsigned Shift>
1438bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1439 SDValue &OffImm) {
1440 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1441}
1442
1443bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1444 SDValue &OffImm,
1445 unsigned Shift) {
1446 unsigned Opcode = Op->getOpcode();
1448 switch (Opcode) {
1449 case ISD::LOAD:
1450 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::STORE:
1453 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1454 break;
1455 case ISD::MLOAD:
1456 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1457 break;
1458 case ISD::MSTORE:
1459 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1460 break;
1461 default:
1462 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1463 }
1464
1465 int RHSC;
1466 // 7 bit constant, shifted by Shift.
1467 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1468 OffImm =
1469 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1470 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1471 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1472 MVT::i32);
1473 return true;
1474 }
1475 return false;
1476}
1477
1478template <int Min, int Max>
1479bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1480 int Val;
1481 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1482 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1483 return true;
1484 }
1485 return false;
1486}
1487
1488bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1489 SDValue &Base,
1490 SDValue &OffReg, SDValue &ShImm) {
1491 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1492 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1493 return false;
1494
1495 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1496 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1497 int RHSC = (int)RHS->getZExtValue();
1498 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1499 return false;
1500 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1501 return false;
1502 }
1503
1504 // Look for (R + R) or (R + (R << [1,2,3])).
1505 unsigned ShAmt = 0;
1506 Base = N.getOperand(0);
1507 OffReg = N.getOperand(1);
1508
1509 // Swap if it is ((R << c) + R).
1511 if (ShOpcVal != ARM_AM::lsl) {
1512 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1513 if (ShOpcVal == ARM_AM::lsl)
1514 std::swap(Base, OffReg);
1515 }
1516
1517 if (ShOpcVal == ARM_AM::lsl) {
1518 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1519 // it.
1520 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1521 ShAmt = Sh->getZExtValue();
1522 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1523 OffReg = OffReg.getOperand(0);
1524 else {
1525 ShAmt = 0;
1526 }
1527 }
1528 }
1529
1530 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1531 // and use it in a shifted operand do so.
1532 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1533 unsigned PowerOfTwo = 0;
1534 SDValue NewMulConst;
1535 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1536 HandleSDNode Handle(OffReg);
1537 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1538 OffReg = Handle.getValue();
1539 ShAmt = PowerOfTwo;
1540 }
1541 }
1542
1543 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1544
1545 return true;
1546}
1547
1548bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1549 SDValue &OffImm) {
1550 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1551 // instructions.
1552 Base = N;
1553 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1554
1555 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1556 return true;
1557
1558 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1559 if (!RHS)
1560 return true;
1561
1562 uint32_t RHSC = (int)RHS->getZExtValue();
1563 if (RHSC > 1020 || RHSC % 4 != 0)
1564 return true;
1565
1566 Base = N.getOperand(0);
1567 if (Base.getOpcode() == ISD::FrameIndex) {
1568 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1569 Base = CurDAG->getTargetFrameIndex(
1570 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1571 }
1572
1573 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1574 return true;
1575}
1576
1577//===--------------------------------------------------------------------===//
1578
1579/// getAL - Returns a ARMCC::AL immediate node.
1580static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1581 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1582}
1583
1584void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1585 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1586 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1587}
1588
1589bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1590 LoadSDNode *LD = cast<LoadSDNode>(N);
1591 ISD::MemIndexedMode AM = LD->getAddressingMode();
1592 if (AM == ISD::UNINDEXED)
1593 return false;
1594
1595 EVT LoadedVT = LD->getMemoryVT();
1596 SDValue Offset, AMOpc;
1597 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1598 unsigned Opcode = 0;
1599 bool Match = false;
1600 if (LoadedVT == MVT::i32 && isPre &&
1601 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1602 Opcode = ARM::LDR_PRE_IMM;
1603 Match = true;
1604 } else if (LoadedVT == MVT::i32 && !isPre &&
1605 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1606 Opcode = ARM::LDR_POST_IMM;
1607 Match = true;
1608 } else if (LoadedVT == MVT::i32 &&
1609 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1610 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1611 Match = true;
1612
1613 } else if (LoadedVT == MVT::i16 &&
1614 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1615 Match = true;
1616 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1617 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1618 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1619 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1620 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1621 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1622 Match = true;
1623 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1624 }
1625 } else {
1626 if (isPre &&
1627 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1628 Match = true;
1629 Opcode = ARM::LDRB_PRE_IMM;
1630 } else if (!isPre &&
1631 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1632 Match = true;
1633 Opcode = ARM::LDRB_POST_IMM;
1634 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1635 Match = true;
1636 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1637 }
1638 }
1639 }
1640
1641 if (Match) {
1642 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1643 SDValue Chain = LD->getChain();
1644 SDValue Base = LD->getBasePtr();
1645 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1646 CurDAG->getRegister(0, MVT::i32), Chain };
1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1648 MVT::Other, Ops);
1649 transferMemOperands(N, New);
1650 ReplaceNode(N, New);
1651 return true;
1652 } else {
1653 SDValue Chain = LD->getChain();
1654 SDValue Base = LD->getBasePtr();
1655 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1656 CurDAG->getRegister(0, MVT::i32), Chain };
1657 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1658 MVT::Other, Ops);
1659 transferMemOperands(N, New);
1660 ReplaceNode(N, New);
1661 return true;
1662 }
1663 }
1664
1665 return false;
1666}
1667
1668bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1669 LoadSDNode *LD = cast<LoadSDNode>(N);
1670 EVT LoadedVT = LD->getMemoryVT();
1671 ISD::MemIndexedMode AM = LD->getAddressingMode();
1672 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1673 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1674 return false;
1675
1676 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1677 if (!COffs || COffs->getZExtValue() != 4)
1678 return false;
1679
1680 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1681 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1682 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1683 // ISel.
1684 SDValue Chain = LD->getChain();
1685 SDValue Base = LD->getBasePtr();
1686 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1687 CurDAG->getRegister(0, MVT::i32), Chain };
1688 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1689 MVT::i32, MVT::Other, Ops);
1690 transferMemOperands(N, New);
1691 ReplaceNode(N, New);
1692 return true;
1693}
1694
1695bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1696 LoadSDNode *LD = cast<LoadSDNode>(N);
1697 ISD::MemIndexedMode AM = LD->getAddressingMode();
1698 if (AM == ISD::UNINDEXED)
1699 return false;
1700
1701 EVT LoadedVT = LD->getMemoryVT();
1702 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1704 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1705 unsigned Opcode = 0;
1706 bool Match = false;
1707 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1708 switch (LoadedVT.getSimpleVT().SimpleTy) {
1709 case MVT::i32:
1710 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1711 break;
1712 case MVT::i16:
1713 if (isSExtLd)
1714 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1715 else
1716 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1717 break;
1718 case MVT::i8:
1719 case MVT::i1:
1720 if (isSExtLd)
1721 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1722 else
1723 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1724 break;
1725 default:
1726 return false;
1727 }
1728 Match = true;
1729 }
1730
1731 if (Match) {
1732 SDValue Chain = LD->getChain();
1733 SDValue Base = LD->getBasePtr();
1734 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1735 CurDAG->getRegister(0, MVT::i32), Chain };
1736 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1737 MVT::Other, Ops);
1738 transferMemOperands(N, New);
1739 ReplaceNode(N, New);
1740 return true;
1741 }
1742
1743 return false;
1744}
1745
1746bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1747 EVT LoadedVT;
1748 unsigned Opcode = 0;
1749 bool isSExtLd, isPre;
1750 Align Alignment;
1751 ARMVCC::VPTCodes Pred;
1752 SDValue PredReg;
1753 SDValue Chain, Base, Offset;
1754
1755 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1756 ISD::MemIndexedMode AM = LD->getAddressingMode();
1757 if (AM == ISD::UNINDEXED)
1758 return false;
1759 LoadedVT = LD->getMemoryVT();
1760 if (!LoadedVT.isVector())
1761 return false;
1762
1763 Chain = LD->getChain();
1764 Base = LD->getBasePtr();
1765 Offset = LD->getOffset();
1766 Alignment = LD->getAlign();
1767 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1768 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1769 Pred = ARMVCC::None;
1770 PredReg = CurDAG->getRegister(0, MVT::i32);
1771 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1772 ISD::MemIndexedMode AM = LD->getAddressingMode();
1773 if (AM == ISD::UNINDEXED)
1774 return false;
1775 LoadedVT = LD->getMemoryVT();
1776 if (!LoadedVT.isVector())
1777 return false;
1778
1779 Chain = LD->getChain();
1780 Base = LD->getBasePtr();
1781 Offset = LD->getOffset();
1782 Alignment = LD->getAlign();
1783 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1784 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1785 Pred = ARMVCC::Then;
1786 PredReg = LD->getMask();
1787 } else
1788 llvm_unreachable("Expected a Load or a Masked Load!");
1789
1790 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1791 // as opposed to a vldrw.32). This can allow extra addressing modes or
1792 // alignments for what is otherwise an equivalent instruction.
1793 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1794
1795 SDValue NewOffset;
1796 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1802 } else if (LoadedVT == MVT::v8i8 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1808 } else if (LoadedVT == MVT::v4i8 &&
1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1810 if (isSExtLd)
1811 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1812 else
1813 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1814 } else if (Alignment >= Align(4) &&
1815 (CanChangeType || LoadedVT == MVT::v4i32 ||
1816 LoadedVT == MVT::v4f32) &&
1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1818 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1819 else if (Alignment >= Align(2) &&
1820 (CanChangeType || LoadedVT == MVT::v8i16 ||
1821 LoadedVT == MVT::v8f16) &&
1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1823 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1824 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1825 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1826 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1827 else
1828 return false;
1829
1830 SDValue Ops[] = {Base,
1831 NewOffset,
1832 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1833 PredReg,
1834 CurDAG->getRegister(0, MVT::i32), // tp_reg
1835 Chain};
1836 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1837 N->getValueType(0), MVT::Other, Ops);
1838 transferMemOperands(N, New);
1839 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1840 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1841 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1842 CurDAG->RemoveDeadNode(N);
1843 return true;
1844}
1845
1846/// Form a GPRPair pseudo register from a pair of GPR regs.
1847SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1848 SDLoc dl(V0.getNode());
1849 SDValue RegClass =
1850 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1851 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1852 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1853 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1854 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1855}
1856
1857/// Form a D register from a pair of S registers.
1858SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1859 SDLoc dl(V0.getNode());
1860 SDValue RegClass =
1861 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1862 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1863 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1864 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1865 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1866}
1867
1868/// Form a quad register from a pair of D registers.
1869SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1870 SDLoc dl(V0.getNode());
1871 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1872 MVT::i32);
1873 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1874 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1875 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1876 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1877}
1878
1879/// Form 4 consecutive D registers from a pair of Q registers.
1880SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1881 SDLoc dl(V0.getNode());
1882 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1883 MVT::i32);
1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1886 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1887 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1888}
1889
1890/// Form 4 consecutive S registers.
1891SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1892 SDValue V2, SDValue V3) {
1893 SDLoc dl(V0.getNode());
1894 SDValue RegClass =
1895 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1896 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1897 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1898 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1899 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1900 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1901 V2, SubReg2, V3, SubReg3 };
1902 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1903}
1904
1905/// Form 4 consecutive D registers.
1906SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1907 SDValue V2, SDValue V3) {
1908 SDLoc dl(V0.getNode());
1909 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1910 MVT::i32);
1911 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1912 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1913 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1914 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1915 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1916 V2, SubReg2, V3, SubReg3 };
1917 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1918}
1919
1920/// Form 4 consecutive Q registers.
1921SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1922 SDValue V2, SDValue V3) {
1923 SDLoc dl(V0.getNode());
1924 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1925 MVT::i32);
1926 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1927 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1928 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1929 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1930 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1931 V2, SubReg2, V3, SubReg3 };
1932 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1933}
1934
1935/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1936/// of a NEON VLD or VST instruction. The supported values depend on the
1937/// number of registers being loaded.
1938SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1939 unsigned NumVecs, bool is64BitVector) {
1940 unsigned NumRegs = NumVecs;
1941 if (!is64BitVector && NumVecs < 3)
1942 NumRegs *= 2;
1943
1944 unsigned Alignment = Align->getAsZExtVal();
1945 if (Alignment >= 32 && NumRegs == 4)
1946 Alignment = 32;
1947 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1948 Alignment = 16;
1949 else if (Alignment >= 8)
1950 Alignment = 8;
1951 else
1952 Alignment = 0;
1953
1954 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1955}
1956
1957static bool isVLDfixed(unsigned Opc)
1958{
1959 switch (Opc) {
1960 default: return false;
1961 case ARM::VLD1d8wb_fixed : return true;
1962 case ARM::VLD1d16wb_fixed : return true;
1963 case ARM::VLD1d64Qwb_fixed : return true;
1964 case ARM::VLD1d32wb_fixed : return true;
1965 case ARM::VLD1d64wb_fixed : return true;
1966 case ARM::VLD1d8TPseudoWB_fixed : return true;
1967 case ARM::VLD1d16TPseudoWB_fixed : return true;
1968 case ARM::VLD1d32TPseudoWB_fixed : return true;
1969 case ARM::VLD1d64TPseudoWB_fixed : return true;
1970 case ARM::VLD1d8QPseudoWB_fixed : return true;
1971 case ARM::VLD1d16QPseudoWB_fixed : return true;
1972 case ARM::VLD1d32QPseudoWB_fixed : return true;
1973 case ARM::VLD1d64QPseudoWB_fixed : return true;
1974 case ARM::VLD1q8wb_fixed : return true;
1975 case ARM::VLD1q16wb_fixed : return true;
1976 case ARM::VLD1q32wb_fixed : return true;
1977 case ARM::VLD1q64wb_fixed : return true;
1978 case ARM::VLD1DUPd8wb_fixed : return true;
1979 case ARM::VLD1DUPd16wb_fixed : return true;
1980 case ARM::VLD1DUPd32wb_fixed : return true;
1981 case ARM::VLD1DUPq8wb_fixed : return true;
1982 case ARM::VLD1DUPq16wb_fixed : return true;
1983 case ARM::VLD1DUPq32wb_fixed : return true;
1984 case ARM::VLD2d8wb_fixed : return true;
1985 case ARM::VLD2d16wb_fixed : return true;
1986 case ARM::VLD2d32wb_fixed : return true;
1987 case ARM::VLD2q8PseudoWB_fixed : return true;
1988 case ARM::VLD2q16PseudoWB_fixed : return true;
1989 case ARM::VLD2q32PseudoWB_fixed : return true;
1990 case ARM::VLD2DUPd8wb_fixed : return true;
1991 case ARM::VLD2DUPd16wb_fixed : return true;
1992 case ARM::VLD2DUPd32wb_fixed : return true;
1993 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1994 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1995 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1996 }
1997}
1998
1999static bool isVSTfixed(unsigned Opc)
2000{
2001 switch (Opc) {
2002 default: return false;
2003 case ARM::VST1d8wb_fixed : return true;
2004 case ARM::VST1d16wb_fixed : return true;
2005 case ARM::VST1d32wb_fixed : return true;
2006 case ARM::VST1d64wb_fixed : return true;
2007 case ARM::VST1q8wb_fixed : return true;
2008 case ARM::VST1q16wb_fixed : return true;
2009 case ARM::VST1q32wb_fixed : return true;
2010 case ARM::VST1q64wb_fixed : return true;
2011 case ARM::VST1d8TPseudoWB_fixed : return true;
2012 case ARM::VST1d16TPseudoWB_fixed : return true;
2013 case ARM::VST1d32TPseudoWB_fixed : return true;
2014 case ARM::VST1d64TPseudoWB_fixed : return true;
2015 case ARM::VST1d8QPseudoWB_fixed : return true;
2016 case ARM::VST1d16QPseudoWB_fixed : return true;
2017 case ARM::VST1d32QPseudoWB_fixed : return true;
2018 case ARM::VST1d64QPseudoWB_fixed : return true;
2019 case ARM::VST2d8wb_fixed : return true;
2020 case ARM::VST2d16wb_fixed : return true;
2021 case ARM::VST2d32wb_fixed : return true;
2022 case ARM::VST2q8PseudoWB_fixed : return true;
2023 case ARM::VST2q16PseudoWB_fixed : return true;
2024 case ARM::VST2q32PseudoWB_fixed : return true;
2025 }
2026}
2027
2028// Get the register stride update opcode of a VLD/VST instruction that
2029// is otherwise equivalent to the given fixed stride updating instruction.
2030static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2031 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2032 && "Incorrect fixed stride updating instruction.");
2033 switch (Opc) {
2034 default: break;
2035 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2036 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2037 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2038 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2039 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2040 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2041 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2042 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2043 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2044 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2045 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2046 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2047 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2048 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2049 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2050 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2051 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2052 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2053 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2054 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2055 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2056 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2057 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2058 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2059 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2060 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2061 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2062
2063 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2064 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2065 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2066 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2067 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2068 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2069 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2070 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2071 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2072 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2073 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2074 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2075 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2076 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2077 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2078 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2079
2080 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2081 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2082 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2083 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2084 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2085 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2086
2087 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2088 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2089 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2090 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2091 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2092 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2093
2094 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2095 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2096 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2097 }
2098 return Opc; // If not one we handle, return it unchanged.
2099}
2100
2101/// Returns true if the given increment is a Constant known to be equal to the
2102/// access size performed by a NEON load/store. This means the "[rN]!" form can
2103/// be used.
2104static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2105 auto C = dyn_cast<ConstantSDNode>(Inc);
2106 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2107}
2108
2109void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2110 const uint16_t *DOpcodes,
2111 const uint16_t *QOpcodes0,
2112 const uint16_t *QOpcodes1) {
2113 assert(Subtarget->hasNEON());
2114 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2115 SDLoc dl(N);
2116
2117 SDValue MemAddr, Align;
2118 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2119 // nodes are not intrinsics.
2120 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2121 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2122 return;
2123
2124 SDValue Chain = N->getOperand(0);
2125 EVT VT = N->getValueType(0);
2126 bool is64BitVector = VT.is64BitVector();
2127 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2128
2129 unsigned OpcodeIndex;
2130 switch (VT.getSimpleVT().SimpleTy) {
2131 default: llvm_unreachable("unhandled vld type");
2132 // Double-register operations:
2133 case MVT::v8i8: OpcodeIndex = 0; break;
2134 case MVT::v4f16:
2135 case MVT::v4bf16:
2136 case MVT::v4i16: OpcodeIndex = 1; break;
2137 case MVT::v2f32:
2138 case MVT::v2i32: OpcodeIndex = 2; break;
2139 case MVT::v1i64: OpcodeIndex = 3; break;
2140 // Quad-register operations:
2141 case MVT::v16i8: OpcodeIndex = 0; break;
2142 case MVT::v8f16:
2143 case MVT::v8bf16:
2144 case MVT::v8i16: OpcodeIndex = 1; break;
2145 case MVT::v4f32:
2146 case MVT::v4i32: OpcodeIndex = 2; break;
2147 case MVT::v2f64:
2148 case MVT::v2i64: OpcodeIndex = 3; break;
2149 }
2150
2151 EVT ResTy;
2152 if (NumVecs == 1)
2153 ResTy = VT;
2154 else {
2155 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2156 if (!is64BitVector)
2157 ResTyElts *= 2;
2158 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2159 }
2160 std::vector<EVT> ResTys;
2161 ResTys.push_back(ResTy);
2162 if (isUpdating)
2163 ResTys.push_back(MVT::i32);
2164 ResTys.push_back(MVT::Other);
2165
2166 SDValue Pred = getAL(CurDAG, dl);
2167 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2168 SDNode *VLd;
2170
2171 // Double registers and VLD1/VLD2 quad registers are directly supported.
2172 if (is64BitVector || NumVecs <= 2) {
2173 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2174 QOpcodes0[OpcodeIndex]);
2175 Ops.push_back(MemAddr);
2176 Ops.push_back(Align);
2177 if (isUpdating) {
2178 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2179 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2180 if (!IsImmUpdate) {
2181 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2182 // check for the opcode rather than the number of vector elements.
2183 if (isVLDfixed(Opc))
2185 Ops.push_back(Inc);
2186 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2187 // the operands if not such an opcode.
2188 } else if (!isVLDfixed(Opc))
2189 Ops.push_back(Reg0);
2190 }
2191 Ops.push_back(Pred);
2192 Ops.push_back(Reg0);
2193 Ops.push_back(Chain);
2194 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2195
2196 } else {
2197 // Otherwise, quad registers are loaded with two separate instructions,
2198 // where one loads the even registers and the other loads the odd registers.
2199 EVT AddrTy = MemAddr.getValueType();
2200
2201 // Load the even subregs. This is always an updating load, so that it
2202 // provides the address to the second load for the odd subregs.
2203 SDValue ImplDef =
2204 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2205 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2206 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2207 ResTy, AddrTy, MVT::Other, OpsA);
2208 Chain = SDValue(VLdA, 2);
2209
2210 // Load the odd subregs.
2211 Ops.push_back(SDValue(VLdA, 1));
2212 Ops.push_back(Align);
2213 if (isUpdating) {
2214 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2215 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2216 "only constant post-increment update allowed for VLD3/4");
2217 (void)Inc;
2218 Ops.push_back(Reg0);
2219 }
2220 Ops.push_back(SDValue(VLdA, 0));
2221 Ops.push_back(Pred);
2222 Ops.push_back(Reg0);
2223 Ops.push_back(Chain);
2224 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2225 }
2226
2227 // Transfer memoperands.
2228 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2229 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2230
2231 if (NumVecs == 1) {
2232 ReplaceNode(N, VLd);
2233 return;
2234 }
2235
2236 // Extract out the subregisters.
2237 SDValue SuperReg = SDValue(VLd, 0);
2238 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2239 ARM::qsub_3 == ARM::qsub_0 + 3,
2240 "Unexpected subreg numbering");
2241 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2242 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2243 ReplaceUses(SDValue(N, Vec),
2244 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2245 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2246 if (isUpdating)
2247 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2248 CurDAG->RemoveDeadNode(N);
2249}
2250
2251void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2252 const uint16_t *DOpcodes,
2253 const uint16_t *QOpcodes0,
2254 const uint16_t *QOpcodes1) {
2255 assert(Subtarget->hasNEON());
2256 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2257 SDLoc dl(N);
2258
2259 SDValue MemAddr, Align;
2260 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2261 // nodes are not intrinsics.
2262 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2263 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2264 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2265 return;
2266
2267 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2268
2269 SDValue Chain = N->getOperand(0);
2270 EVT VT = N->getOperand(Vec0Idx).getValueType();
2271 bool is64BitVector = VT.is64BitVector();
2272 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2273
2274 unsigned OpcodeIndex;
2275 switch (VT.getSimpleVT().SimpleTy) {
2276 default: llvm_unreachable("unhandled vst type");
2277 // Double-register operations:
2278 case MVT::v8i8: OpcodeIndex = 0; break;
2279 case MVT::v4f16:
2280 case MVT::v4bf16:
2281 case MVT::v4i16: OpcodeIndex = 1; break;
2282 case MVT::v2f32:
2283 case MVT::v2i32: OpcodeIndex = 2; break;
2284 case MVT::v1i64: OpcodeIndex = 3; break;
2285 // Quad-register operations:
2286 case MVT::v16i8: OpcodeIndex = 0; break;
2287 case MVT::v8f16:
2288 case MVT::v8bf16:
2289 case MVT::v8i16: OpcodeIndex = 1; break;
2290 case MVT::v4f32:
2291 case MVT::v4i32: OpcodeIndex = 2; break;
2292 case MVT::v2f64:
2293 case MVT::v2i64: OpcodeIndex = 3; break;
2294 }
2295
2296 std::vector<EVT> ResTys;
2297 if (isUpdating)
2298 ResTys.push_back(MVT::i32);
2299 ResTys.push_back(MVT::Other);
2300
2301 SDValue Pred = getAL(CurDAG, dl);
2302 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2304
2305 // Double registers and VST1/VST2 quad registers are directly supported.
2306 if (is64BitVector || NumVecs <= 2) {
2307 SDValue SrcReg;
2308 if (NumVecs == 1) {
2309 SrcReg = N->getOperand(Vec0Idx);
2310 } else if (is64BitVector) {
2311 // Form a REG_SEQUENCE to force register allocation.
2312 SDValue V0 = N->getOperand(Vec0Idx + 0);
2313 SDValue V1 = N->getOperand(Vec0Idx + 1);
2314 if (NumVecs == 2)
2315 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2316 else {
2317 SDValue V2 = N->getOperand(Vec0Idx + 2);
2318 // If it's a vst3, form a quad D-register and leave the last part as
2319 // an undef.
2320 SDValue V3 = (NumVecs == 3)
2321 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2322 : N->getOperand(Vec0Idx + 3);
2323 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2324 }
2325 } else {
2326 // Form a QQ register.
2327 SDValue Q0 = N->getOperand(Vec0Idx);
2328 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2329 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2330 }
2331
2332 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2333 QOpcodes0[OpcodeIndex]);
2334 Ops.push_back(MemAddr);
2335 Ops.push_back(Align);
2336 if (isUpdating) {
2337 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2338 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2339 if (!IsImmUpdate) {
2340 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2341 // check for the opcode rather than the number of vector elements.
2342 if (isVSTfixed(Opc))
2344 Ops.push_back(Inc);
2345 }
2346 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2347 // the operands if not such an opcode.
2348 else if (!isVSTfixed(Opc))
2349 Ops.push_back(Reg0);
2350 }
2351 Ops.push_back(SrcReg);
2352 Ops.push_back(Pred);
2353 Ops.push_back(Reg0);
2354 Ops.push_back(Chain);
2355 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2356
2357 // Transfer memoperands.
2358 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2359
2360 ReplaceNode(N, VSt);
2361 return;
2362 }
2363
2364 // Otherwise, quad registers are stored with two separate instructions,
2365 // where one stores the even registers and the other stores the odd registers.
2366
2367 // Form the QQQQ REG_SEQUENCE.
2368 SDValue V0 = N->getOperand(Vec0Idx + 0);
2369 SDValue V1 = N->getOperand(Vec0Idx + 1);
2370 SDValue V2 = N->getOperand(Vec0Idx + 2);
2371 SDValue V3 = (NumVecs == 3)
2372 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2373 : N->getOperand(Vec0Idx + 3);
2374 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2375
2376 // Store the even D registers. This is always an updating store, so that it
2377 // provides the address to the second store for the odd subregs.
2378 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2379 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2380 MemAddr.getValueType(),
2381 MVT::Other, OpsA);
2382 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2383 Chain = SDValue(VStA, 1);
2384
2385 // Store the odd D registers.
2386 Ops.push_back(SDValue(VStA, 0));
2387 Ops.push_back(Align);
2388 if (isUpdating) {
2389 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2390 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2391 "only constant post-increment update allowed for VST3/4");
2392 (void)Inc;
2393 Ops.push_back(Reg0);
2394 }
2395 Ops.push_back(RegSeq);
2396 Ops.push_back(Pred);
2397 Ops.push_back(Reg0);
2398 Ops.push_back(Chain);
2399 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2400 Ops);
2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2402 ReplaceNode(N, VStB);
2403}
2404
2405void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2406 unsigned NumVecs,
2407 const uint16_t *DOpcodes,
2408 const uint16_t *QOpcodes) {
2409 assert(Subtarget->hasNEON());
2410 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2411 SDLoc dl(N);
2412
2413 SDValue MemAddr, Align;
2414 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2415 // nodes are not intrinsics.
2416 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2417 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2418 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2419 return;
2420
2421 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2422
2423 SDValue Chain = N->getOperand(0);
2424 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2425 EVT VT = N->getOperand(Vec0Idx).getValueType();
2426 bool is64BitVector = VT.is64BitVector();
2427
2428 unsigned Alignment = 0;
2429 if (NumVecs != 3) {
2430 Alignment = Align->getAsZExtVal();
2431 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2432 if (Alignment > NumBytes)
2433 Alignment = NumBytes;
2434 if (Alignment < 8 && Alignment < NumBytes)
2435 Alignment = 0;
2436 // Alignment must be a power of two; make sure of that.
2437 Alignment = (Alignment & -Alignment);
2438 if (Alignment == 1)
2439 Alignment = 0;
2440 }
2441 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2442
2443 unsigned OpcodeIndex;
2444 switch (VT.getSimpleVT().SimpleTy) {
2445 default: llvm_unreachable("unhandled vld/vst lane type");
2446 // Double-register operations:
2447 case MVT::v8i8: OpcodeIndex = 0; break;
2448 case MVT::v4f16:
2449 case MVT::v4bf16:
2450 case MVT::v4i16: OpcodeIndex = 1; break;
2451 case MVT::v2f32:
2452 case MVT::v2i32: OpcodeIndex = 2; break;
2453 // Quad-register operations:
2454 case MVT::v8f16:
2455 case MVT::v8bf16:
2456 case MVT::v8i16: OpcodeIndex = 0; break;
2457 case MVT::v4f32:
2458 case MVT::v4i32: OpcodeIndex = 1; break;
2459 }
2460
2461 std::vector<EVT> ResTys;
2462 if (IsLoad) {
2463 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2464 if (!is64BitVector)
2465 ResTyElts *= 2;
2466 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2467 MVT::i64, ResTyElts));
2468 }
2469 if (isUpdating)
2470 ResTys.push_back(MVT::i32);
2471 ResTys.push_back(MVT::Other);
2472
2473 SDValue Pred = getAL(CurDAG, dl);
2474 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2475
2477 Ops.push_back(MemAddr);
2478 Ops.push_back(Align);
2479 if (isUpdating) {
2480 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2481 bool IsImmUpdate =
2482 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2483 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2484 }
2485
2486 SDValue SuperReg;
2487 SDValue V0 = N->getOperand(Vec0Idx + 0);
2488 SDValue V1 = N->getOperand(Vec0Idx + 1);
2489 if (NumVecs == 2) {
2490 if (is64BitVector)
2491 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2492 else
2493 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2494 } else {
2495 SDValue V2 = N->getOperand(Vec0Idx + 2);
2496 SDValue V3 = (NumVecs == 3)
2497 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2498 : N->getOperand(Vec0Idx + 3);
2499 if (is64BitVector)
2500 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2501 else
2502 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2503 }
2504 Ops.push_back(SuperReg);
2505 Ops.push_back(getI32Imm(Lane, dl));
2506 Ops.push_back(Pred);
2507 Ops.push_back(Reg0);
2508 Ops.push_back(Chain);
2509
2510 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2511 QOpcodes[OpcodeIndex]);
2512 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2513 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2514 if (!IsLoad) {
2515 ReplaceNode(N, VLdLn);
2516 return;
2517 }
2518
2519 // Extract the subregisters.
2520 SuperReg = SDValue(VLdLn, 0);
2521 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2522 ARM::qsub_3 == ARM::qsub_0 + 3,
2523 "Unexpected subreg numbering");
2524 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2525 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2526 ReplaceUses(SDValue(N, Vec),
2527 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2528 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2529 if (isUpdating)
2530 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2531 CurDAG->RemoveDeadNode(N);
2532}
2533
2534template <typename SDValueVector>
2535void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2536 SDValue PredicateMask) {
2537 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2538 Ops.push_back(PredicateMask);
2539 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2540}
2541
2542template <typename SDValueVector>
2543void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2544 SDValue PredicateMask,
2545 SDValue Inactive) {
2546 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2547 Ops.push_back(PredicateMask);
2548 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2549 Ops.push_back(Inactive);
2550}
2551
2552template <typename SDValueVector>
2553void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2554 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2555 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2556 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2557}
2558
2559template <typename SDValueVector>
2560void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2561 EVT InactiveTy) {
2562 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2563 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2564 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2565 Ops.push_back(SDValue(
2566 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2567}
2568
2569void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2570 bool Predicated) {
2571 SDLoc Loc(N);
2573
2574 uint16_t Opcode;
2575 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2576 case 32:
2577 Opcode = Opcodes[0];
2578 break;
2579 case 64:
2580 Opcode = Opcodes[1];
2581 break;
2582 default:
2583 llvm_unreachable("bad vector element size in SelectMVE_WB");
2584 }
2585
2586 Ops.push_back(N->getOperand(2)); // vector of base addresses
2587
2588 int32_t ImmValue = N->getConstantOperandVal(3);
2589 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2590
2591 if (Predicated)
2592 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2593 else
2594 AddEmptyMVEPredicateToOps(Ops, Loc);
2595
2596 Ops.push_back(N->getOperand(0)); // chain
2597
2599 VTs.push_back(N->getValueType(1));
2600 VTs.push_back(N->getValueType(0));
2601 VTs.push_back(N->getValueType(2));
2602
2603 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2604 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2605 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2606 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2607 transferMemOperands(N, New);
2608 CurDAG->RemoveDeadNode(N);
2609}
2610
2611void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2612 bool Immediate,
2613 bool HasSaturationOperand) {
2614 SDLoc Loc(N);
2616
2617 // Two 32-bit halves of the value to be shifted
2618 Ops.push_back(N->getOperand(1));
2619 Ops.push_back(N->getOperand(2));
2620
2621 // The shift count
2622 if (Immediate) {
2623 int32_t ImmValue = N->getConstantOperandVal(3);
2624 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2625 } else {
2626 Ops.push_back(N->getOperand(3));
2627 }
2628
2629 // The immediate saturation operand, if any
2630 if (HasSaturationOperand) {
2631 int32_t SatOp = N->getConstantOperandVal(4);
2632 int SatBit = (SatOp == 64 ? 0 : 1);
2633 Ops.push_back(getI32Imm(SatBit, Loc));
2634 }
2635
2636 // MVE scalar shifts are IT-predicable, so include the standard
2637 // predicate arguments.
2638 Ops.push_back(getAL(CurDAG, Loc));
2639 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2640
2641 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2642}
2643
2644void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2645 uint16_t OpcodeWithNoCarry,
2646 bool Add, bool Predicated) {
2647 SDLoc Loc(N);
2649 uint16_t Opcode;
2650
2651 unsigned FirstInputOp = Predicated ? 2 : 1;
2652
2653 // Two input vectors and the input carry flag
2654 Ops.push_back(N->getOperand(FirstInputOp));
2655 Ops.push_back(N->getOperand(FirstInputOp + 1));
2656 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2657 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2658 uint32_t CarryMask = 1 << 29;
2659 uint32_t CarryExpected = Add ? 0 : CarryMask;
2660 if (CarryInConstant &&
2661 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2662 Opcode = OpcodeWithNoCarry;
2663 } else {
2664 Ops.push_back(CarryIn);
2665 Opcode = OpcodeWithCarry;
2666 }
2667
2668 if (Predicated)
2669 AddMVEPredicateToOps(Ops, Loc,
2670 N->getOperand(FirstInputOp + 3), // predicate
2671 N->getOperand(FirstInputOp - 1)); // inactive
2672 else
2673 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2674
2675 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2676}
2677
2678void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2679 SDLoc Loc(N);
2681
2682 // One vector input, followed by a 32-bit word of bits to shift in
2683 // and then an immediate shift count
2684 Ops.push_back(N->getOperand(1));
2685 Ops.push_back(N->getOperand(2));
2686 int32_t ImmValue = N->getConstantOperandVal(3);
2687 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2688
2689 if (Predicated)
2690 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2691 else
2692 AddEmptyMVEPredicateToOps(Ops, Loc);
2693
2694 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2695}
2696
2697static bool SDValueToConstBool(SDValue SDVal) {
2698 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2699 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2700 uint64_t Value = SDValConstant->getZExtValue();
2701 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2702 return Value;
2703}
2704
2705void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2706 const uint16_t *OpcodesS,
2707 const uint16_t *OpcodesU,
2708 size_t Stride, size_t TySize) {
2709 assert(TySize < Stride && "Invalid TySize");
2710 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2711 bool IsSub = SDValueToConstBool(N->getOperand(2));
2712 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2713 if (IsUnsigned) {
2714 assert(!IsSub &&
2715 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2716 assert(!IsExchange &&
2717 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2718 }
2719
2720 auto OpIsZero = [N](size_t OpNo) {
2721 return isNullConstant(N->getOperand(OpNo));
2722 };
2723
2724 // If the input accumulator value is not zero, select an instruction with
2725 // accumulator, otherwise select an instruction without accumulator
2726 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2727
2728 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2729 if (IsSub)
2730 Opcodes += 4 * Stride;
2731 if (IsExchange)
2732 Opcodes += 2 * Stride;
2733 if (IsAccum)
2734 Opcodes += Stride;
2735 uint16_t Opcode = Opcodes[TySize];
2736
2737 SDLoc Loc(N);
2739 // Push the accumulator operands, if they are used
2740 if (IsAccum) {
2741 Ops.push_back(N->getOperand(4));
2742 Ops.push_back(N->getOperand(5));
2743 }
2744 // Push the two vector operands
2745 Ops.push_back(N->getOperand(6));
2746 Ops.push_back(N->getOperand(7));
2747
2748 if (Predicated)
2749 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2750 else
2751 AddEmptyMVEPredicateToOps(Ops, Loc);
2752
2753 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2754}
2755
2756void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2757 const uint16_t *OpcodesS,
2758 const uint16_t *OpcodesU) {
2759 EVT VecTy = N->getOperand(6).getValueType();
2760 size_t SizeIndex;
2761 switch (VecTy.getVectorElementType().getSizeInBits()) {
2762 case 16:
2763 SizeIndex = 0;
2764 break;
2765 case 32:
2766 SizeIndex = 1;
2767 break;
2768 default:
2769 llvm_unreachable("bad vector element size");
2770 }
2771
2772 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2773}
2774
2775void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2776 const uint16_t *OpcodesS,
2777 const uint16_t *OpcodesU) {
2778 assert(
2779 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2780 32 &&
2781 "bad vector element size");
2782 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2783}
2784
2785void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2786 const uint16_t *const *Opcodes,
2787 bool HasWriteback) {
2788 EVT VT = N->getValueType(0);
2789 SDLoc Loc(N);
2790
2791 const uint16_t *OurOpcodes;
2792 switch (VT.getVectorElementType().getSizeInBits()) {
2793 case 8:
2794 OurOpcodes = Opcodes[0];
2795 break;
2796 case 16:
2797 OurOpcodes = Opcodes[1];
2798 break;
2799 case 32:
2800 OurOpcodes = Opcodes[2];
2801 break;
2802 default:
2803 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2804 }
2805
2806 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2807 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2808 unsigned PtrOperand = HasWriteback ? 1 : 2;
2809
2810 auto Data = SDValue(
2811 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2812 SDValue Chain = N->getOperand(0);
2813 // Add a MVE_VLDn instruction for each Vec, except the last
2814 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2815 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2816 auto LoadInst =
2817 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2818 Data = SDValue(LoadInst, 0);
2819 Chain = SDValue(LoadInst, 1);
2820 transferMemOperands(N, LoadInst);
2821 }
2822 // The last may need a writeback on it
2823 if (HasWriteback)
2824 ResultTys = {DataTy, MVT::i32, MVT::Other};
2825 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2826 auto LoadInst =
2827 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2828 transferMemOperands(N, LoadInst);
2829
2830 unsigned i;
2831 for (i = 0; i < NumVecs; i++)
2832 ReplaceUses(SDValue(N, i),
2833 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2834 SDValue(LoadInst, 0)));
2835 if (HasWriteback)
2836 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2837 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2838 CurDAG->RemoveDeadNode(N);
2839}
2840
2841void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2842 bool Wrapping, bool Predicated) {
2843 EVT VT = N->getValueType(0);
2844 SDLoc Loc(N);
2845
2846 uint16_t Opcode;
2847 switch (VT.getScalarSizeInBits()) {
2848 case 8:
2849 Opcode = Opcodes[0];
2850 break;
2851 case 16:
2852 Opcode = Opcodes[1];
2853 break;
2854 case 32:
2855 Opcode = Opcodes[2];
2856 break;
2857 default:
2858 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2859 }
2860
2862 unsigned OpIdx = 1;
2863
2864 SDValue Inactive;
2865 if (Predicated)
2866 Inactive = N->getOperand(OpIdx++);
2867
2868 Ops.push_back(N->getOperand(OpIdx++)); // base
2869 if (Wrapping)
2870 Ops.push_back(N->getOperand(OpIdx++)); // limit
2871
2872 SDValue ImmOp = N->getOperand(OpIdx++); // step
2873 int ImmValue = ImmOp->getAsZExtVal();
2874 Ops.push_back(getI32Imm(ImmValue, Loc));
2875
2876 if (Predicated)
2877 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2878 else
2879 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2880
2881 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2882}
2883
2884void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2885 size_t NumExtraOps, bool HasAccum) {
2886 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2887 SDLoc Loc(N);
2889
2890 unsigned OpIdx = 1;
2891
2892 // Convert and append the immediate operand designating the coprocessor.
2893 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2894 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2895 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2896
2897 // For accumulating variants copy the low and high order parts of the
2898 // accumulator into a register pair and add it to the operand vector.
2899 if (HasAccum) {
2900 SDValue AccLo = N->getOperand(OpIdx++);
2901 SDValue AccHi = N->getOperand(OpIdx++);
2902 if (IsBigEndian)
2903 std::swap(AccLo, AccHi);
2904 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2905 }
2906
2907 // Copy extra operands as-is.
2908 for (size_t I = 0; I < NumExtraOps; I++)
2909 Ops.push_back(N->getOperand(OpIdx++));
2910
2911 // Convert and append the immediate operand
2912 SDValue Imm = N->getOperand(OpIdx);
2913 uint32_t ImmVal = Imm->getAsZExtVal();
2914 Ops.push_back(getI32Imm(ImmVal, Loc));
2915
2916 // Accumulating variants are IT-predicable, add predicate operands.
2917 if (HasAccum) {
2918 SDValue Pred = getAL(CurDAG, Loc);
2919 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2920 Ops.push_back(Pred);
2921 Ops.push_back(PredReg);
2922 }
2923
2924 // Create the CDE intruction
2925 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2926 SDValue ResultPair = SDValue(InstrNode, 0);
2927
2928 // The original intrinsic had two outputs, and the output of the dual-register
2929 // CDE instruction is a register pair. We need to extract the two subregisters
2930 // and replace all uses of the original outputs with the extracted
2931 // subregisters.
2932 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2933 if (IsBigEndian)
2934 std::swap(SubRegs[0], SubRegs[1]);
2935
2936 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2937 if (SDValue(N, ResIdx).use_empty())
2938 continue;
2939 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2940 MVT::i32, ResultPair);
2941 ReplaceUses(SDValue(N, ResIdx), SubReg);
2942 }
2943
2944 CurDAG->RemoveDeadNode(N);
2945}
2946
2947void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2948 bool isUpdating, unsigned NumVecs,
2949 const uint16_t *DOpcodes,
2950 const uint16_t *QOpcodes0,
2951 const uint16_t *QOpcodes1) {
2952 assert(Subtarget->hasNEON());
2953 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2954 SDLoc dl(N);
2955
2956 SDValue MemAddr, Align;
2957 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2958 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2959 return;
2960
2961 SDValue Chain = N->getOperand(0);
2962 EVT VT = N->getValueType(0);
2963 bool is64BitVector = VT.is64BitVector();
2964
2965 unsigned Alignment = 0;
2966 if (NumVecs != 3) {
2967 Alignment = Align->getAsZExtVal();
2968 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2969 if (Alignment > NumBytes)
2970 Alignment = NumBytes;
2971 if (Alignment < 8 && Alignment < NumBytes)
2972 Alignment = 0;
2973 // Alignment must be a power of two; make sure of that.
2974 Alignment = (Alignment & -Alignment);
2975 if (Alignment == 1)
2976 Alignment = 0;
2977 }
2978 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2979
2980 unsigned OpcodeIndex;
2981 switch (VT.getSimpleVT().SimpleTy) {
2982 default: llvm_unreachable("unhandled vld-dup type");
2983 case MVT::v8i8:
2984 case MVT::v16i8: OpcodeIndex = 0; break;
2985 case MVT::v4i16:
2986 case MVT::v8i16:
2987 case MVT::v4f16:
2988 case MVT::v8f16:
2989 case MVT::v4bf16:
2990 case MVT::v8bf16:
2991 OpcodeIndex = 1; break;
2992 case MVT::v2f32:
2993 case MVT::v2i32:
2994 case MVT::v4f32:
2995 case MVT::v4i32: OpcodeIndex = 2; break;
2996 case MVT::v1f64:
2997 case MVT::v1i64: OpcodeIndex = 3; break;
2998 }
2999
3000 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3001 if (!is64BitVector)
3002 ResTyElts *= 2;
3003 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3004
3005 std::vector<EVT> ResTys;
3006 ResTys.push_back(ResTy);
3007 if (isUpdating)
3008 ResTys.push_back(MVT::i32);
3009 ResTys.push_back(MVT::Other);
3010
3011 SDValue Pred = getAL(CurDAG, dl);
3012 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3013
3015 Ops.push_back(MemAddr);
3016 Ops.push_back(Align);
3017 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3018 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3019 : QOpcodes1[OpcodeIndex];
3020 if (isUpdating) {
3021 SDValue Inc = N->getOperand(2);
3022 bool IsImmUpdate =
3023 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3024 if (IsImmUpdate) {
3025 if (!isVLDfixed(Opc))
3026 Ops.push_back(Reg0);
3027 } else {
3028 if (isVLDfixed(Opc))
3030 Ops.push_back(Inc);
3031 }
3032 }
3033 if (is64BitVector || NumVecs == 1) {
3034 // Double registers and VLD1 quad registers are directly supported.
3035 } else {
3036 SDValue ImplDef = SDValue(
3037 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3038 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3039 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3040 MVT::Other, OpsA);
3041 Ops.push_back(SDValue(VLdA, 0));
3042 Chain = SDValue(VLdA, 1);
3043 }
3044
3045 Ops.push_back(Pred);
3046 Ops.push_back(Reg0);
3047 Ops.push_back(Chain);
3048
3049 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3050
3051 // Transfer memoperands.
3052 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3053 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3054
3055 // Extract the subregisters.
3056 if (NumVecs == 1) {
3057 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3058 } else {
3059 SDValue SuperReg = SDValue(VLdDup, 0);
3060 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3061 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3062 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3063 ReplaceUses(SDValue(N, Vec),
3064 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3065 }
3066 }
3067 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3068 if (isUpdating)
3069 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3070 CurDAG->RemoveDeadNode(N);
3071}
3072
3073bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3074 if (!Subtarget->hasMVEIntegerOps())
3075 return false;
3076
3077 SDLoc dl(N);
3078
3079 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3080 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3081 // inserts of the correct type:
3082 SDValue Ins1 = SDValue(N, 0);
3083 SDValue Ins2 = N->getOperand(0);
3084 EVT VT = Ins1.getValueType();
3085 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3086 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3087 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3088 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3089 return false;
3090
3091 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3092 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3093 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3094 return false;
3095
3096 // If the inserted values will be able to use T/B already, leave it to the
3097 // existing tablegen patterns. For example VCVTT/VCVTB.
3098 SDValue Val1 = Ins1.getOperand(1);
3099 SDValue Val2 = Ins2.getOperand(1);
3100 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3101 return false;
3102
3103 // Check if the inserted values are both extracts.
3104 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3105 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3107 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3108 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3109 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3110 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3111 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3112 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3113 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3114 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3115 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3116
3117 // If the two extracted lanes are from the same place and adjacent, this
3118 // simplifies into a f32 lane move.
3119 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3120 ExtractLane1 == ExtractLane2 + 1) {
3121 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3122 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3123 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3124 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3125 NewExt);
3126 ReplaceUses(Ins1, NewIns);
3127 return true;
3128 }
3129
3130 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3131 // extracting odd lanes.
3132 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3133 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3134 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3135 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3136 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3137 if (ExtractLane1 % 2 != 0)
3138 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3139 if (ExtractLane2 % 2 != 0)
3140 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3141 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3142 SDValue NewIns =
3143 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3144 Ins2.getOperand(0), SDValue(VINS, 0));
3145 ReplaceUses(Ins1, NewIns);
3146 return true;
3147 }
3148 }
3149
3150 // The inserted values are not extracted - if they are f16 then insert them
3151 // directly using a VINS.
3152 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3153 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3154 SDValue NewIns =
3155 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3156 Ins2.getOperand(0), SDValue(VINS, 0));
3157 ReplaceUses(Ins1, NewIns);
3158 return true;
3159 }
3160
3161 return false;
3162}
3163
3164bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3165 SDNode *FMul,
3166 bool IsUnsigned,
3167 bool FixedToFloat) {
3168 auto Type = N->getValueType(0);
3169 unsigned ScalarBits = Type.getScalarSizeInBits();
3170 if (ScalarBits > 32)
3171 return false;
3172
3173 SDNodeFlags FMulFlags = FMul->getFlags();
3174 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3175 // allowed in 16 bit unsigned floats
3176 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3177 return false;
3178
3179 SDValue ImmNode = FMul->getOperand(1);
3180 SDValue VecVal = FMul->getOperand(0);
3181 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3182 VecVal->getOpcode() == ISD::SINT_TO_FP)
3183 VecVal = VecVal->getOperand(0);
3184
3185 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3186 return false;
3187
3188 if (ImmNode.getOpcode() == ISD::BITCAST) {
3189 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3190 return false;
3191 ImmNode = ImmNode.getOperand(0);
3192 }
3193
3194 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3195 return false;
3196
3197 APFloat ImmAPF(0.0f);
3198 switch (ImmNode.getOpcode()) {
3199 case ARMISD::VMOVIMM:
3200 case ARMISD::VDUP: {
3201 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3202 return false;
3203 unsigned Imm = ImmNode.getConstantOperandVal(0);
3204 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3205 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3206 ImmAPF =
3207 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3208 APInt(ScalarBits, Imm));
3209 break;
3210 }
3211 case ARMISD::VMOVFPIMM: {
3213 break;
3214 }
3215 default:
3216 return false;
3217 }
3218
3219 // Where n is the number of fractional bits, multiplying by 2^n will convert
3220 // from float to fixed and multiplying by 2^-n will convert from fixed to
3221 // float. Taking log2 of the factor (after taking the inverse in the case of
3222 // float to fixed) will give n.
3223 APFloat ToConvert = ImmAPF;
3224 if (FixedToFloat) {
3225 if (!ImmAPF.getExactInverse(&ToConvert))
3226 return false;
3227 }
3228 APSInt Converted(64, false);
3229 bool IsExact;
3231 &IsExact);
3232 if (!IsExact || !Converted.isPowerOf2())
3233 return false;
3234
3235 unsigned FracBits = Converted.logBase2();
3236 if (FracBits > ScalarBits)
3237 return false;
3238
3240 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3241 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3242
3243 unsigned int Opcode;
3244 switch (ScalarBits) {
3245 case 16:
3246 if (FixedToFloat)
3247 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3248 else
3249 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3250 break;
3251 case 32:
3252 if (FixedToFloat)
3253 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3254 else
3255 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3256 break;
3257 default:
3258 llvm_unreachable("unexpected number of scalar bits");
3259 break;
3260 }
3261
3262 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3263 return true;
3264}
3265
3266bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3267 // Transform a floating-point to fixed-point conversion to a VCVT
3268 if (!Subtarget->hasMVEFloatOps())
3269 return false;
3270 EVT Type = N->getValueType(0);
3271 if (!Type.isVector())
3272 return false;
3273 unsigned int ScalarBits = Type.getScalarSizeInBits();
3274
3275 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3276 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3277 SDNode *Node = N->getOperand(0).getNode();
3278
3279 // floating-point to fixed-point with one fractional bit gets turned into an
3280 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3281 if (Node->getOpcode() == ISD::FADD) {
3282 if (Node->getOperand(0) != Node->getOperand(1))
3283 return false;
3284 SDNodeFlags Flags = Node->getFlags();
3285 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3286 // allowed in 16 bit unsigned floats
3287 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3288 return false;
3289
3290 unsigned Opcode;
3291 switch (ScalarBits) {
3292 case 16:
3293 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3294 break;
3295 case 32:
3296 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3297 break;
3298 }
3299 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3300 CurDAG->getConstant(1, dl, MVT::i32)};
3301 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3302
3303 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3304 return true;
3305 }
3306
3307 if (Node->getOpcode() != ISD::FMUL)
3308 return false;
3309
3310 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3311}
3312
3313bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3314 // Transform a fixed-point to floating-point conversion to a VCVT
3315 if (!Subtarget->hasMVEFloatOps())
3316 return false;
3317 auto Type = N->getValueType(0);
3318 if (!Type.isVector())
3319 return false;
3320
3321 auto LHS = N->getOperand(0);
3322 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3323 return false;
3324
3325 return transformFixedFloatingPointConversion(
3326 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3327}
3328
3329bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3330 if (!Subtarget->hasV6T2Ops())
3331 return false;
3332
3333 unsigned Opc = isSigned
3334 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3335 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3336 SDLoc dl(N);
3337
3338 // For unsigned extracts, check for a shift right and mask
3339 unsigned And_imm = 0;
3340 if (N->getOpcode() == ISD::AND) {
3341 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3342
3343 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3344 if (And_imm & (And_imm + 1))
3345 return false;
3346
3347 unsigned Srl_imm = 0;
3348 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3349 Srl_imm)) {
3350 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3351
3352 // Mask off the unnecessary bits of the AND immediate; normally
3353 // DAGCombine will do this, but that might not happen if
3354 // targetShrinkDemandedConstant chooses a different immediate.
3355 And_imm &= -1U >> Srl_imm;
3356
3357 // Note: The width operand is encoded as width-1.
3358 unsigned Width = llvm::countr_one(And_imm) - 1;
3359 unsigned LSB = Srl_imm;
3360
3361 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3362
3363 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3364 // It's cheaper to use a right shift to extract the top bits.
3365 if (Subtarget->isThumb()) {
3366 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3367 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3368 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3369 getAL(CurDAG, dl), Reg0, Reg0 };
3370 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3371 return true;
3372 }
3373
3374 // ARM models shift instructions as MOVsi with shifter operand.
3376 SDValue ShOpc =
3377 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3378 MVT::i32);
3379 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3380 getAL(CurDAG, dl), Reg0, Reg0 };
3381 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3382 return true;
3383 }
3384
3385 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3386 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3387 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3388 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3389 getAL(CurDAG, dl), Reg0 };
3390 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3391 return true;
3392 }
3393 }
3394 return false;
3395 }
3396
3397 // Otherwise, we're looking for a shift of a shift
3398 unsigned Shl_imm = 0;
3399 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3400 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3401 unsigned Srl_imm = 0;
3402 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3403 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3404 // Note: The width operand is encoded as width-1.
3405 unsigned Width = 32 - Srl_imm - 1;
3406 int LSB = Srl_imm - Shl_imm;
3407 if (LSB < 0)
3408 return false;
3409 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3410 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3411 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3412 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3413 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3414 getAL(CurDAG, dl), Reg0 };
3415 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3416 return true;
3417 }
3418 }
3419
3420 // Or we are looking for a shift of an and, with a mask operand
3421 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3422 isShiftedMask_32(And_imm)) {
3423 unsigned Srl_imm = 0;
3424 unsigned LSB = llvm::countr_zero(And_imm);
3425 // Shift must be the same as the ands lsb
3426 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3427 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3428 unsigned MSB = llvm::Log2_32(And_imm);
3429 // Note: The width operand is encoded as width-1.
3430 unsigned Width = MSB - LSB;
3431 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3432 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3433 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3434 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3435 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3436 getAL(CurDAG, dl), Reg0 };
3437 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3438 return true;
3439 }
3440 }
3441
3442 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3443 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3444 unsigned LSB = 0;
3445 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3446 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3447 return false;
3448
3449 if (LSB + Width > 32)
3450 return false;
3451
3452 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3453 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3454 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3455 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3456 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3457 getAL(CurDAG, dl), Reg0 };
3458 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3459 return true;
3460 }
3461
3462 return false;
3463}
3464
3465/// Target-specific DAG combining for ISD::SUB.
3466/// Target-independent combining lowers SELECT_CC nodes of the form
3467/// select_cc setg[ge] X, 0, X, -X
3468/// select_cc setgt X, -1, X, -X
3469/// select_cc setl[te] X, 0, -X, X
3470/// select_cc setlt X, 1, -X, X
3471/// which represent Integer ABS into:
3472/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3473/// ARM instruction selection detects the latter and matches it to
3474/// ARM::ABS or ARM::t2ABS machine node.
3475bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3476 SDValue SUBSrc0 = N->getOperand(0);
3477 SDValue SUBSrc1 = N->getOperand(1);
3478 EVT VT = N->getValueType(0);
3479
3480 if (Subtarget->isThumb1Only())
3481 return false;
3482
3483 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3484 return false;
3485
3486 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3487 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3488 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3489 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3490 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3491 EVT XType = SRASrc0.getValueType();
3492 unsigned Size = XType.getSizeInBits() - 1;
3493
3494 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3495 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3496 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3497 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3498 return true;
3499 }
3500
3501 return false;
3502}
3503
3504/// We've got special pseudo-instructions for these
3505void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3506 unsigned Opcode;
3507 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3508 if (MemTy == MVT::i8)
3509 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3510 else if (MemTy == MVT::i16)
3511 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3512 else if (MemTy == MVT::i32)
3513 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3514 else
3515 llvm_unreachable("Unknown AtomicCmpSwap type");
3516
3517 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3518 N->getOperand(0)};
3519 SDNode *CmpSwap = CurDAG->getMachineNode(
3520 Opcode, SDLoc(N),
3521 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3522
3523 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3524 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3525
3526 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3527 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3528 CurDAG->RemoveDeadNode(N);
3529}
3530
3531static std::optional<std::pair<unsigned, unsigned>>
3533 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3534 unsigned LastOne = A.countr_zero();
3535 if (A.popcount() != (FirstOne - LastOne + 1))
3536 return std::nullopt;
3537 return std::make_pair(FirstOne, LastOne);
3538}
3539
3540void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3541 assert(N->getOpcode() == ARMISD::CMPZ);
3542 SwitchEQNEToPLMI = false;
3543
3544 if (!Subtarget->isThumb())
3545 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3546 // LSR don't exist as standalone instructions - they need the barrel shifter.
3547 return;
3548
3549 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3550 SDValue And = N->getOperand(0);
3551 if (!And->hasOneUse())
3552 return;
3553
3554 SDValue Zero = N->getOperand(1);
3555 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3556 return;
3557 SDValue X = And.getOperand(0);
3558 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3559
3560 if (!C)
3561 return;
3562 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3563 if (!Range)
3564 return;
3565
3566 // There are several ways to lower this:
3567 SDNode *NewN;
3568 SDLoc dl(N);
3569
3570 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3571 if (Subtarget->isThumb2()) {
3572 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3573 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3574 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3575 CurDAG->getRegister(0, MVT::i32) };
3576 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3577 } else {
3578 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3579 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3580 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3581 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3582 }
3583 };
3584
3585 if (Range->second == 0) {
3586 // 1. Mask includes the LSB -> Simply shift the top N bits off
3587 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3588 ReplaceNode(And.getNode(), NewN);
3589 } else if (Range->first == 31) {
3590 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3591 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3592 ReplaceNode(And.getNode(), NewN);
3593 } else if (Range->first == Range->second) {
3594 // 3. Only one bit is set. We can shift this into the sign bit and use a
3595 // PL/MI comparison.
3596 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3597 ReplaceNode(And.getNode(), NewN);
3598
3599 SwitchEQNEToPLMI = true;
3600 } else if (!Subtarget->hasV6T2Ops()) {
3601 // 4. Do a double shift to clear bottom and top bits, but only in
3602 // thumb-1 mode as in thumb-2 we can use UBFX.
3603 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3604 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3605 Range->second + (31 - Range->first));
3606 ReplaceNode(And.getNode(), NewN);
3607 }
3608}
3609
3610static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3611 unsigned Opc128[3]) {
3612 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3613 "Unexpected vector shuffle length");
3614 switch (VT.getScalarSizeInBits()) {
3615 default:
3616 llvm_unreachable("Unexpected vector shuffle element size");
3617 case 8:
3618 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3619 case 16:
3620 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3621 case 32:
3622 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3623 }
3624}
3625
3626void ARMDAGToDAGISel::Select(SDNode *N) {
3627 SDLoc dl(N);
3628
3629 if (N->isMachineOpcode()) {
3630 N->setNodeId(-1);
3631 return; // Already selected.
3632 }
3633
3634 switch (N->getOpcode()) {
3635 default: break;
3636 case ISD::STORE: {
3637 // For Thumb1, match an sp-relative store in C++. This is a little
3638 // unfortunate, but I don't think I can make the chain check work
3639 // otherwise. (The chain of the store has to be the same as the chain
3640 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3641 // a direct reference to "SP".)
3642 //
3643 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3644 // a different addressing mode from other four-byte stores.
3645 //
3646 // This pattern usually comes up with call arguments.
3647 StoreSDNode *ST = cast<StoreSDNode>(N);
3648 SDValue Ptr = ST->getBasePtr();
3649 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3650 int RHSC = 0;
3651 if (Ptr.getOpcode() == ISD::ADD &&
3652 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3653 Ptr = Ptr.getOperand(0);
3654
3655 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3656 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3657 Ptr.getOperand(0) == ST->getChain()) {
3658 SDValue Ops[] = {ST->getValue(),
3659 CurDAG->getRegister(ARM::SP, MVT::i32),
3660 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3661 getAL(CurDAG, dl),
3662 CurDAG->getRegister(0, MVT::i32),
3663 ST->getChain()};
3664 MachineSDNode *ResNode =
3665 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3666 MachineMemOperand *MemOp = ST->getMemOperand();
3667 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3668 ReplaceNode(N, ResNode);
3669 return;
3670 }
3671 }
3672 break;
3673 }
3675 if (tryWriteRegister(N))
3676 return;
3677 break;
3678 case ISD::READ_REGISTER:
3679 if (tryReadRegister(N))
3680 return;
3681 break;
3682 case ISD::INLINEASM:
3683 case ISD::INLINEASM_BR:
3684 if (tryInlineAsm(N))
3685 return;
3686 break;
3687 case ISD::SUB:
3688 // Select special operations if SUB node forms integer ABS pattern
3689 if (tryABSOp(N))
3690 return;
3691 // Other cases are autogenerated.
3692 break;
3693 case ISD::Constant: {
3694 unsigned Val = N->getAsZExtVal();
3695 // If we can't materialize the constant we need to use a literal pool
3696 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3697 !Subtarget->genExecuteOnly()) {
3698 SDValue CPIdx = CurDAG->getTargetConstantPool(
3699 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3700 TLI->getPointerTy(CurDAG->getDataLayout()));
3701
3702 SDNode *ResNode;
3703 if (Subtarget->isThumb()) {
3704 SDValue Ops[] = {
3705 CPIdx,
3706 getAL(CurDAG, dl),
3707 CurDAG->getRegister(0, MVT::i32),
3708 CurDAG->getEntryNode()
3709 };
3710 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3711 Ops);
3712 } else {
3713 SDValue Ops[] = {
3714 CPIdx,
3715 CurDAG->getTargetConstant(0, dl, MVT::i32),
3716 getAL(CurDAG, dl),
3717 CurDAG->getRegister(0, MVT::i32),
3718 CurDAG->getEntryNode()
3719 };
3720 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3721 Ops);
3722 }
3723 // Annotate the Node with memory operand information so that MachineInstr
3724 // queries work properly. This e.g. gives the register allocation the
3725 // required information for rematerialization.
3726 MachineFunction& MF = CurDAG->getMachineFunction();
3730
3731 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3732
3733 ReplaceNode(N, ResNode);
3734 return;
3735 }
3736
3737 // Other cases are autogenerated.
3738 break;
3739 }
3740 case ISD::FrameIndex: {
3741 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3742 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3743 SDValue TFI = CurDAG->getTargetFrameIndex(
3744 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3745 if (Subtarget->isThumb1Only()) {
3746 // Set the alignment of the frame object to 4, to avoid having to generate
3747 // more than one ADD
3748 MachineFrameInfo &MFI = MF->getFrameInfo();
3749 if (MFI.getObjectAlign(FI) < Align(4))
3750 MFI.setObjectAlignment(FI, Align(4));
3751 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3752 CurDAG->getTargetConstant(0, dl, MVT::i32));
3753 return;
3754 } else {
3755 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3756 ARM::t2ADDri : ARM::ADDri);
3757 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3758 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3759 CurDAG->getRegister(0, MVT::i32) };
3760 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3761 return;
3762 }
3763 }
3765 if (tryInsertVectorElt(N))
3766 return;
3767 break;
3768 }
3769 case ISD::SRL:
3770 if (tryV6T2BitfieldExtractOp(N, false))
3771 return;
3772 break;
3774 case ISD::SRA:
3775 if (tryV6T2BitfieldExtractOp(N, true))
3776 return;
3777 break;
3778 case ISD::FP_TO_UINT:
3779 case ISD::FP_TO_SINT:
3782 if (tryFP_TO_INT(N, dl))
3783 return;
3784 break;
3785 case ISD::FMUL:
3786 if (tryFMULFixed(N, dl))
3787 return;
3788 break;
3789 case ISD::MUL:
3790 if (Subtarget->isThumb1Only())
3791 break;
3792 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3793 unsigned RHSV = C->getZExtValue();
3794 if (!RHSV) break;
3795 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3796 unsigned ShImm = Log2_32(RHSV-1);
3797 if (ShImm >= 32)
3798 break;
3799 SDValue V = N->getOperand(0);
3800 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3801 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3802 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3803 if (Subtarget->isThumb()) {
3804 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3805 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3806 return;
3807 } else {
3808 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3809 Reg0 };
3810 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3811 return;
3812 }
3813 }
3814 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3815 unsigned ShImm = Log2_32(RHSV+1);
3816 if (ShImm >= 32)
3817 break;
3818 SDValue V = N->getOperand(0);
3819 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3820 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3821 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3822 if (Subtarget->isThumb()) {
3823 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3824 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3825 return;
3826 } else {
3827 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3828 Reg0 };
3829 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3830 return;
3831 }
3832 }
3833 }
3834 break;
3835 case ISD::AND: {
3836 // Check for unsigned bitfield extract
3837 if (tryV6T2BitfieldExtractOp(N, false))
3838 return;
3839
3840 // If an immediate is used in an AND node, it is possible that the immediate
3841 // can be more optimally materialized when negated. If this is the case we
3842 // can negate the immediate and use a BIC instead.
3843 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3844 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3845 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3846
3847 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3848 // immediate can be negated and fit in the immediate operand of
3849 // a t2BIC, don't do any manual transform here as this can be
3850 // handled by the generic ISel machinery.
3851 bool PreferImmediateEncoding =
3852 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3853 if (!PreferImmediateEncoding &&
3854 ConstantMaterializationCost(Imm, Subtarget) >
3855 ConstantMaterializationCost(~Imm, Subtarget)) {
3856 // The current immediate costs more to materialize than a negated
3857 // immediate, so negate the immediate and use a BIC.
3858 SDValue NewImm =
3859 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3860 // If the new constant didn't exist before, reposition it in the topological
3861 // ordering so it is just before N. Otherwise, don't touch its location.
3862 if (NewImm->getNodeId() == -1)
3863 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3864
3865 if (!Subtarget->hasThumb2()) {
3866 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3867 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3868 CurDAG->getRegister(0, MVT::i32)};
3869 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3870 return;
3871 } else {
3872 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3873 CurDAG->getRegister(0, MVT::i32),
3874 CurDAG->getRegister(0, MVT::i32)};
3875 ReplaceNode(N,
3876 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3877 return;
3878 }
3879 }
3880 }
3881
3882 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3885 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887 EVT VT = N->getValueType(0);
3888 if (VT != MVT::i32)
3889 break;
3890 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891 ? ARM::t2MOVTi16
3892 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3893 if (!Opc)
3894 break;
3895 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3896 N1C = dyn_cast<ConstantSDNode>(N1);
3897 if (!N1C)
3898 break;
3899 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900 SDValue N2 = N0.getOperand(1);
3901 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3902 if (!N2C)
3903 break;
3904 unsigned N1CVal = N1C->getZExtValue();
3905 unsigned N2CVal = N2C->getZExtValue();
3906 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3907 (N1CVal & 0xffffU) == 0xffffU &&
3908 (N2CVal & 0xffffU) == 0x0U) {
3909 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3910 dl, MVT::i32);
3911 SDValue Ops[] = { N0.getOperand(0), Imm16,
3912 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3914 return;
3915 }
3916 }
3917
3918 break;
3919 }
3920 case ARMISD::UMAAL: {
3921 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3923 N->getOperand(2), N->getOperand(3),
3924 getAL(CurDAG, dl),
3925 CurDAG->getRegister(0, MVT::i32) };
3926 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3927 return;
3928 }
3929 case ARMISD::UMLAL:{
3930 if (Subtarget->isThumb()) {
3931 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3932 N->getOperand(3), getAL(CurDAG, dl),
3933 CurDAG->getRegister(0, MVT::i32)};
3934 ReplaceNode(
3935 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3936 return;
3937 }else{
3938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3939 N->getOperand(3), getAL(CurDAG, dl),
3940 CurDAG->getRegister(0, MVT::i32),
3941 CurDAG->getRegister(0, MVT::i32) };
3942 ReplaceNode(N, CurDAG->getMachineNode(
3943 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944 MVT::i32, MVT::i32, Ops));
3945 return;
3946 }
3947 }
3948 case ARMISD::SMLAL:{
3949 if (Subtarget->isThumb()) {
3950 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3951 N->getOperand(3), getAL(CurDAG, dl),
3952 CurDAG->getRegister(0, MVT::i32)};
3953 ReplaceNode(
3954 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3955 return;
3956 }else{
3957 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3958 N->getOperand(3), getAL(CurDAG, dl),
3959 CurDAG->getRegister(0, MVT::i32),
3960 CurDAG->getRegister(0, MVT::i32) };
3961 ReplaceNode(N, CurDAG->getMachineNode(
3962 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963 MVT::i32, MVT::i32, Ops));
3964 return;
3965 }
3966 }
3967 case ARMISD::SUBE: {
3968 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3969 break;
3970 // Look for a pattern to match SMMLS
3971 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3973 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3974 !SDValue(N, 1).use_empty())
3975 break;
3976
3977 if (Subtarget->isThumb())
3978 assert(Subtarget->hasThumb2() &&
3979 "This pattern should not be generated for Thumb");
3980
3981 SDValue SmulLoHi = N->getOperand(1);
3982 SDValue Subc = N->getOperand(2);
3983 SDValue Zero = Subc.getOperand(0);
3984
3985 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3986 N->getOperand(1) != SmulLoHi.getValue(1) ||
3987 N->getOperand(2) != Subc.getValue(1))
3988 break;
3989
3990 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3992 N->getOperand(0), getAL(CurDAG, dl),
3993 CurDAG->getRegister(0, MVT::i32) };
3994 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3995 return;
3996 }
3997 case ISD::LOAD: {
3998 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999 return;
4000 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001 if (tryT2IndexedLoad(N))
4002 return;
4003 } else if (Subtarget->isThumb()) {
4004 if (tryT1IndexedLoad(N))
4005 return;
4006 } else if (tryARMIndexedLoad(N))
4007 return;
4008 // Other cases are autogenerated.
4009 break;
4010 }
4011 case ISD::MLOAD:
4012 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013 return;
4014 // Other cases are autogenerated.
4015 break;
4016 case ARMISD::WLSSETUP: {
4017 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4018 N->getOperand(0));
4019 ReplaceUses(N, New);
4020 CurDAG->RemoveDeadNode(N);
4021 return;
4022 }
4023 case ARMISD::WLS: {
4024 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4025 N->getOperand(1), N->getOperand(2),
4026 N->getOperand(0));
4027 ReplaceUses(N, New);
4028 CurDAG->RemoveDeadNode(N);
4029 return;
4030 }
4031 case ARMISD::LE: {
4032 SDValue Ops[] = { N->getOperand(1),
4033 N->getOperand(2),
4034 N->getOperand(0) };
4035 unsigned Opc = ARM::t2LoopEnd;
4036 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4037 ReplaceUses(N, New);
4038 CurDAG->RemoveDeadNode(N);
4039 return;
4040 }
4041 case ARMISD::LDRD: {
4042 if (Subtarget->isThumb2())
4043 break; // TableGen handles isel in this case.
4044 SDValue Base, RegOffset, ImmOffset;
4045 const SDValue &Chain = N->getOperand(0);
4046 const SDValue &Addr = N->getOperand(1);
4047 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4048 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4049 // The register-offset variant of LDRD mandates that the register
4050 // allocated to RegOffset is not reused in any of the remaining operands.
4051 // This restriction is currently not enforced. Therefore emitting this
4052 // variant is explicitly avoided.
4053 Base = Addr;
4054 RegOffset = CurDAG->getRegister(0, MVT::i32);
4055 }
4056 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4058 {MVT::Untyped, MVT::Other}, Ops);
4059 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4060 SDValue(New, 0));
4061 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4062 SDValue(New, 0));
4063 transferMemOperands(N, New);
4064 ReplaceUses(SDValue(N, 0), Lo);
4065 ReplaceUses(SDValue(N, 1), Hi);
4066 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4067 CurDAG->RemoveDeadNode(N);
4068 return;
4069 }
4070 case ARMISD::STRD: {
4071 if (Subtarget->isThumb2())
4072 break; // TableGen handles isel in this case.
4073 SDValue Base, RegOffset, ImmOffset;
4074 const SDValue &Chain = N->getOperand(0);
4075 const SDValue &Addr = N->getOperand(3);
4076 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4077 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4078 // The register-offset variant of STRD mandates that the register
4079 // allocated to RegOffset is not reused in any of the remaining operands.
4080 // This restriction is currently not enforced. Therefore emitting this
4081 // variant is explicitly avoided.
4082 Base = Addr;
4083 RegOffset = CurDAG->getRegister(0, MVT::i32);
4084 }
4085 SDNode *RegPair =
4086 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4087 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4088 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4089 transferMemOperands(N, New);
4090 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4091 CurDAG->RemoveDeadNode(N);
4092 return;
4093 }
4094 case ARMISD::LOOP_DEC: {
4095 SDValue Ops[] = { N->getOperand(1),
4096 N->getOperand(2),
4097 N->getOperand(0) };
4098 SDNode *Dec =
4099 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4100 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4101 ReplaceUses(N, Dec);
4102 CurDAG->RemoveDeadNode(N);
4103 return;
4104 }
4105 case ARMISD::BRCOND: {
4106 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108 // Pattern complexity = 6 cost = 1 size = 0
4109
4110 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112 // Pattern complexity = 6 cost = 1 size = 0
4113
4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116 // Pattern complexity = 6 cost = 1 size = 0
4117
4118 unsigned Opc = Subtarget->isThumb() ?
4119 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120 SDValue Chain = N->getOperand(0);
4121 SDValue N1 = N->getOperand(1);
4122 SDValue N2 = N->getOperand(2);
4123 SDValue N3 = N->getOperand(3);
4124 SDValue InGlue = N->getOperand(4);
4128
4129 unsigned CC = (unsigned)N2->getAsZExtVal();
4130
4131 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4132 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4133 SDValue Int = InGlue.getOperand(0);
4134 uint64_t ID = Int->getConstantOperandVal(1);
4135
4136 // Handle low-overhead loops.
4137 if (ID == Intrinsic::loop_decrement_reg) {
4138 SDValue Elements = Int.getOperand(2);
4139 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4140 dl, MVT::i32);
4141
4142 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4143 SDNode *LoopDec =
4144 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4145 CurDAG->getVTList(MVT::i32, MVT::Other),
4146 Args);
4147 ReplaceUses(Int.getNode(), LoopDec);
4148
4149 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4150 SDNode *LoopEnd =
4151 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4152
4153 ReplaceUses(N, LoopEnd);
4154 CurDAG->RemoveDeadNode(N);
4155 CurDAG->RemoveDeadNode(InGlue.getNode());
4156 CurDAG->RemoveDeadNode(Int.getNode());
4157 return;
4158 }
4159 }
4160
4161 bool SwitchEQNEToPLMI;
4162 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4163 InGlue = N->getOperand(4);
4164
4165 if (SwitchEQNEToPLMI) {
4166 switch ((ARMCC::CondCodes)CC) {
4167 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4168 case ARMCC::NE:
4170 break;
4171 case ARMCC::EQ:
4173 break;
4174 }
4175 }
4176 }
4177
4178 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4179 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4180 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4181 MVT::Glue, Ops);
4182 Chain = SDValue(ResNode, 0);
4183 if (N->getNumValues() == 2) {
4184 InGlue = SDValue(ResNode, 1);
4185 ReplaceUses(SDValue(N, 1), InGlue);
4186 }
4187 ReplaceUses(SDValue(N, 0),
4188 SDValue(Chain.getNode(), Chain.getResNo()));
4189 CurDAG->RemoveDeadNode(N);
4190 return;
4191 }
4192
4193 case ARMISD::CMPZ: {
4194 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4195 // This allows us to avoid materializing the expensive negative constant.
4196 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4197 // for its glue output.
4198 SDValue X = N->getOperand(0);
4199 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4200 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4201 int64_t Addend = -C->getSExtValue();
4202
4203 SDNode *Add = nullptr;
4204 // ADDS can be better than CMN if the immediate fits in a
4205 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4206 // Outside that range we can just use a CMN which is 32-bit but has a
4207 // 12-bit immediate range.
4208 if (Addend < 1<<8) {
4209 if (Subtarget->isThumb2()) {
4210 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4211 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4212 CurDAG->getRegister(0, MVT::i32) };
4213 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4214 } else {
4215 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4216 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4217 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4218 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4219 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4220 }
4221 }
4222 if (Add) {
4223 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4224 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4225 }
4226 }
4227 // Other cases are autogenerated.
4228 break;
4229 }
4230
4231 case ARMISD::CMOV: {
4232 SDValue InGlue = N->getOperand(4);
4233
4234 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4235 bool SwitchEQNEToPLMI;
4236 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4237
4238 if (SwitchEQNEToPLMI) {
4239 SDValue ARMcc = N->getOperand(2);
4241
4242 switch (CC) {
4243 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4244 case ARMCC::NE:
4245 CC = ARMCC::MI;
4246 break;
4247 case ARMCC::EQ:
4248 CC = ARMCC::PL;
4249 break;
4250 }
4251 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4252 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4253 N->getOperand(3), N->getOperand(4)};
4254 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4255 }
4256
4257 }
4258 // Other cases are autogenerated.
4259 break;
4260 }
4261 case ARMISD::VZIP: {
4262 EVT VT = N->getValueType(0);
4263 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4264 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4265 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4266 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4267 SDValue Pred = getAL(CurDAG, dl);
4268 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4269 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4270 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4271 return;
4272 }
4273 case ARMISD::VUZP: {
4274 EVT VT = N->getValueType(0);
4275 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4276 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4277 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4278 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4279 SDValue Pred = getAL(CurDAG, dl);
4280 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4281 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4282 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4283 return;
4284 }
4285 case ARMISD::VTRN: {
4286 EVT VT = N->getValueType(0);
4287 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4288 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4289 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4290 SDValue Pred = getAL(CurDAG, dl);
4291 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4292 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4293 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4294 return;
4295 }
4296 case ARMISD::BUILD_VECTOR: {
4297 EVT VecVT = N->getValueType(0);
4298 EVT EltVT = VecVT.getVectorElementType();
4299 unsigned NumElts = VecVT.getVectorNumElements();
4300 if (EltVT == MVT::f64) {
4301 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4302 ReplaceNode(
4303 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4304 return;
4305 }
4306 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4307 if (NumElts == 2) {
4308 ReplaceNode(
4309 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310 return;
4311 }
4312 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4313 ReplaceNode(N,
4314 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4315 N->getOperand(2), N->getOperand(3)));
4316 return;
4317 }
4318
4319 case ARMISD::VLD1DUP: {
4320 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4321 ARM::VLD1DUPd32 };
4322 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4323 ARM::VLD1DUPq32 };
4324 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4325 return;
4326 }
4327
4328 case ARMISD::VLD2DUP: {
4329 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4330 ARM::VLD2DUPd32 };
4331 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4332 return;
4333 }
4334
4335 case ARMISD::VLD3DUP: {
4336 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4337 ARM::VLD3DUPd16Pseudo,
4338 ARM::VLD3DUPd32Pseudo };
4339 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4340 return;
4341 }
4342
4343 case ARMISD::VLD4DUP: {
4344 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4345 ARM::VLD4DUPd16Pseudo,
4346 ARM::VLD4DUPd32Pseudo };
4347 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4348 return;
4349 }
4350
4351 case ARMISD::VLD1DUP_UPD: {
4352 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4353 ARM::VLD1DUPd16wb_fixed,
4354 ARM::VLD1DUPd32wb_fixed };
4355 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4356 ARM::VLD1DUPq16wb_fixed,
4357 ARM::VLD1DUPq32wb_fixed };
4358 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4359 return;
4360 }
4361
4362 case ARMISD::VLD2DUP_UPD: {
4363 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4364 ARM::VLD2DUPd16wb_fixed,
4365 ARM::VLD2DUPd32wb_fixed,
4366 ARM::VLD1q64wb_fixed };
4367 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4368 ARM::VLD2DUPq16EvenPseudo,
4369 ARM::VLD2DUPq32EvenPseudo };
4370 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4371 ARM::VLD2DUPq16OddPseudoWB_fixed,
4372 ARM::VLD2DUPq32OddPseudoWB_fixed };
4373 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4374 return;
4375 }
4376
4377 case ARMISD::VLD3DUP_UPD: {
4378 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4379 ARM::VLD3DUPd16Pseudo_UPD,
4380 ARM::VLD3DUPd32Pseudo_UPD,
4381 ARM::VLD1d64TPseudoWB_fixed };
4382 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4383 ARM::VLD3DUPq16EvenPseudo,
4384 ARM::VLD3DUPq32EvenPseudo };
4385 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4386 ARM::VLD3DUPq16OddPseudo_UPD,
4387 ARM::VLD3DUPq32OddPseudo_UPD };
4388 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4389 return;
4390 }
4391
4392 case ARMISD::VLD4DUP_UPD: {
4393 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4394 ARM::VLD4DUPd16Pseudo_UPD,
4395 ARM::VLD4DUPd32Pseudo_UPD,
4396 ARM::VLD1d64QPseudoWB_fixed };
4397 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4398 ARM::VLD4DUPq16EvenPseudo,
4399 ARM::VLD4DUPq32EvenPseudo };
4400 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4401 ARM::VLD4DUPq16OddPseudo_UPD,
4402 ARM::VLD4DUPq32OddPseudo_UPD };
4403 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4404 return;
4405 }
4406
4407 case ARMISD::VLD1_UPD: {
4408 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4409 ARM::VLD1d16wb_fixed,
4410 ARM::VLD1d32wb_fixed,
4411 ARM::VLD1d64wb_fixed };
4412 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4413 ARM::VLD1q16wb_fixed,
4414 ARM::VLD1q32wb_fixed,
4415 ARM::VLD1q64wb_fixed };
4416 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4417 return;
4418 }
4419
4420 case ARMISD::VLD2_UPD: {
4421 if (Subtarget->hasNEON()) {
4422 static const uint16_t DOpcodes[] = {
4423 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4424 ARM::VLD1q64wb_fixed};
4425 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4426 ARM::VLD2q16PseudoWB_fixed,
4427 ARM::VLD2q32PseudoWB_fixed};
4428 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4429 } else {
4430 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4431 ARM::MVE_VLD21_8_wb};
4432 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4433 ARM::MVE_VLD21_16_wb};
4434 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4435 ARM::MVE_VLD21_32_wb};
4436 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4437 SelectMVE_VLD(N, 2, Opcodes, true);
4438 }
4439 return;
4440 }
4441
4442 case ARMISD::VLD3_UPD: {
4443 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4444 ARM::VLD3d16Pseudo_UPD,
4445 ARM::VLD3d32Pseudo_UPD,
4446 ARM::VLD1d64TPseudoWB_fixed};
4447 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4448 ARM::VLD3q16Pseudo_UPD,
4449 ARM::VLD3q32Pseudo_UPD };
4450 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4451 ARM::VLD3q16oddPseudo_UPD,
4452 ARM::VLD3q32oddPseudo_UPD };
4453 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4454 return;
4455 }
4456
4457 case ARMISD::VLD4_UPD: {
4458 if (Subtarget->hasNEON()) {
4459 static const uint16_t DOpcodes[] = {
4460 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4461 ARM::VLD1d64QPseudoWB_fixed};
4462 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4463 ARM::VLD4q16Pseudo_UPD,
4464 ARM::VLD4q32Pseudo_UPD};
4465 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4466 ARM::VLD4q16oddPseudo_UPD,
4467 ARM::VLD4q32oddPseudo_UPD};
4468 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4469 } else {
4470 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4471 ARM::MVE_VLD42_8,
4472 ARM::MVE_VLD43_8_wb};
4473 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4474 ARM::MVE_VLD42_16,
4475 ARM::MVE_VLD43_16_wb};
4476 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4477 ARM::MVE_VLD42_32,
4478 ARM::MVE_VLD43_32_wb};
4479 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4480 SelectMVE_VLD(N, 4, Opcodes, true);
4481 }
4482 return;
4483 }
4484
4485 case ARMISD::VLD1x2_UPD: {
4486 if (Subtarget->hasNEON()) {
4487 static const uint16_t DOpcodes[] = {
4488 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4489 ARM::VLD1q64wb_fixed};
4490 static const uint16_t QOpcodes[] = {
4491 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4492 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4493 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4494 return;
4495 }
4496 break;
4497 }
4498
4499 case ARMISD::VLD1x3_UPD: {
4500 if (Subtarget->hasNEON()) {
4501 static const uint16_t DOpcodes[] = {
4502 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4503 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4504 static const uint16_t QOpcodes0[] = {
4505 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4506 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4507 static const uint16_t QOpcodes1[] = {
4508 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4509 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4510 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4511 return;
4512 }
4513 break;
4514 }
4515
4516 case ARMISD::VLD1x4_UPD: {
4517 if (Subtarget->hasNEON()) {
4518 static const uint16_t DOpcodes[] = {
4519 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4520 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4521 static const uint16_t QOpcodes0[] = {
4522 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4523 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4524 static const uint16_t QOpcodes1[] = {
4525 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4526 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4527 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4528 return;
4529 }
4530 break;
4531 }
4532
4533 case ARMISD::VLD2LN_UPD: {
4534 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4535 ARM::VLD2LNd16Pseudo_UPD,
4536 ARM::VLD2LNd32Pseudo_UPD };
4537 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4538 ARM::VLD2LNq32Pseudo_UPD };
4539 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4540 return;
4541 }
4542
4543 case ARMISD::VLD3LN_UPD: {
4544 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4545 ARM::VLD3LNd16Pseudo_UPD,
4546 ARM::VLD3LNd32Pseudo_UPD };
4547 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4548 ARM::VLD3LNq32Pseudo_UPD };
4549 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4550 return;
4551 }
4552
4553 case ARMISD::VLD4LN_UPD: {
4554 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4555 ARM::VLD4LNd16Pseudo_UPD,
4556 ARM::VLD4LNd32Pseudo_UPD };
4557 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4558 ARM::VLD4LNq32Pseudo_UPD };
4559 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4560 return;
4561 }
4562
4563 case ARMISD::VST1_UPD: {
4564 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4565 ARM::VST1d16wb_fixed,
4566 ARM::VST1d32wb_fixed,
4567 ARM::VST1d64wb_fixed };
4568 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4569 ARM::VST1q16wb_fixed,
4570 ARM::VST1q32wb_fixed,
4571 ARM::VST1q64wb_fixed };
4572 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4573 return;
4574 }
4575
4576 case ARMISD::VST2_UPD: {
4577 if (Subtarget->hasNEON()) {
4578 static const uint16_t DOpcodes[] = {
4579 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4580 ARM::VST1q64wb_fixed};
4581 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4582 ARM::VST2q16PseudoWB_fixed,
4583 ARM::VST2q32PseudoWB_fixed};
4584 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4585 return;
4586 }
4587 break;
4588 }
4589
4590 case ARMISD::VST3_UPD: {
4591 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4592 ARM::VST3d16Pseudo_UPD,
4593 ARM::VST3d32Pseudo_UPD,
4594 ARM::VST1d64TPseudoWB_fixed};
4595 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4596 ARM::VST3q16Pseudo_UPD,
4597 ARM::VST3q32Pseudo_UPD };
4598 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4599 ARM::VST3q16oddPseudo_UPD,
4600 ARM::VST3q32oddPseudo_UPD };
4601 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4602 return;
4603 }
4604
4605 case ARMISD::VST4_UPD: {
4606 if (Subtarget->hasNEON()) {
4607 static const uint16_t DOpcodes[] = {
4608 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4609 ARM::VST1d64QPseudoWB_fixed};
4610 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4611 ARM::VST4q16Pseudo_UPD,
4612 ARM::VST4q32Pseudo_UPD};
4613 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4614 ARM::VST4q16oddPseudo_UPD,
4615 ARM::VST4q32oddPseudo_UPD};
4616 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4617 return;
4618 }
4619 break;
4620 }
4621
4622 case ARMISD::VST1x2_UPD: {
4623 if (Subtarget->hasNEON()) {
4624 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4625 ARM::VST1q16wb_fixed,
4626 ARM::VST1q32wb_fixed,
4627 ARM::VST1q64wb_fixed};
4628 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4629 ARM::VST1d16QPseudoWB_fixed,
4630 ARM::VST1d32QPseudoWB_fixed,
4631 ARM::VST1d64QPseudoWB_fixed };
4632 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4633 return;
4634 }
4635 break;
4636 }
4637
4638 case ARMISD::VST1x3_UPD: {
4639 if (Subtarget->hasNEON()) {
4640 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4641 ARM::VST1d16TPseudoWB_fixed,
4642 ARM::VST1d32TPseudoWB_fixed,
4643 ARM::VST1d64TPseudoWB_fixed };
4644 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4645 ARM::VST1q16LowTPseudo_UPD,
4646 ARM::VST1q32LowTPseudo_UPD,
4647 ARM::VST1q64LowTPseudo_UPD };
4648 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4649 ARM::VST1q16HighTPseudo_UPD,
4650 ARM::VST1q32HighTPseudo_UPD,
4651 ARM::VST1q64HighTPseudo_UPD };
4652 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4653 return;
4654 }
4655 break;
4656 }
4657
4658 case ARMISD::VST1x4_UPD: {
4659 if (Subtarget->hasNEON()) {
4660 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4661 ARM::VST1d16QPseudoWB_fixed,
4662 ARM::VST1d32QPseudoWB_fixed,
4663 ARM::VST1d64QPseudoWB_fixed };
4664 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4665 ARM::VST1q16LowQPseudo_UPD,
4666 ARM::VST1q32LowQPseudo_UPD,
4667 ARM::VST1q64LowQPseudo_UPD };
4668 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4669 ARM::VST1q16HighQPseudo_UPD,
4670 ARM::VST1q32HighQPseudo_UPD,
4671 ARM::VST1q64HighQPseudo_UPD };
4672 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4673 return;
4674 }
4675 break;
4676 }
4677 case ARMISD::VST2LN_UPD: {
4678 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4679 ARM::VST2LNd16Pseudo_UPD,
4680 ARM::VST2LNd32Pseudo_UPD };
4681 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4682 ARM::VST2LNq32Pseudo_UPD };
4683 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4684 return;
4685 }
4686
4687 case ARMISD::VST3LN_UPD: {
4688 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4689 ARM::VST3LNd16Pseudo_UPD,
4690 ARM::VST3LNd32Pseudo_UPD };
4691 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4692 ARM::VST3LNq32Pseudo_UPD };
4693 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4694 return;
4695 }
4696
4697 case ARMISD::VST4LN_UPD: {
4698 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4699 ARM::VST4LNd16Pseudo_UPD,
4700 ARM::VST4LNd32Pseudo_UPD };
4701 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4702 ARM::VST4LNq32Pseudo_UPD };
4703 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4704 return;
4705 }
4706
4709 unsigned IntNo = N->getConstantOperandVal(1);
4710 switch (IntNo) {
4711 default:
4712 break;
4713
4714 case Intrinsic::arm_mrrc:
4715 case Intrinsic::arm_mrrc2: {
4716 SDLoc dl(N);
4717 SDValue Chain = N->getOperand(0);
4718 unsigned Opc;
4719
4720 if (Subtarget->isThumb())
4721 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4722 else
4723 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4724
4726 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4727 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4728 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4729
4730 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4731 // instruction will always be '1111' but it is possible in assembly language to specify
4732 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4733 if (Opc != ARM::MRRC2) {
4734 Ops.push_back(getAL(CurDAG, dl));
4735 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4736 }
4737
4738 Ops.push_back(Chain);
4739
4740 // Writes to two registers.
4741 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4742
4743 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4744 return;
4745 }
4746 case Intrinsic::arm_ldaexd:
4747 case Intrinsic::arm_ldrexd: {
4748 SDLoc dl(N);
4749 SDValue Chain = N->getOperand(0);
4750 SDValue MemAddr = N->getOperand(2);
4751 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4752
4753 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4754 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4755 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4756
4757 // arm_ldrexd returns a i64 value in {i32, i32}
4758 std::vector<EVT> ResTys;
4759 if (isThumb) {
4760 ResTys.push_back(MVT::i32);
4761 ResTys.push_back(MVT::i32);
4762 } else
4763 ResTys.push_back(MVT::Untyped);
4764 ResTys.push_back(MVT::Other);
4765
4766 // Place arguments in the right order.
4767 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4768 CurDAG->getRegister(0, MVT::i32), Chain};
4769 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4770 // Transfer memoperands.
4771 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4772 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4773
4774 // Remap uses.
4775 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4776 if (!SDValue(N, 0).use_empty()) {
4778 if (isThumb)
4779 Result = SDValue(Ld, 0);
4780 else {
4781 SDValue SubRegIdx =
4782 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4783 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4784 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4785 Result = SDValue(ResNode,0);
4786 }
4787 ReplaceUses(SDValue(N, 0), Result);
4788 }
4789 if (!SDValue(N, 1).use_empty()) {
4791 if (isThumb)
4792 Result = SDValue(Ld, 1);
4793 else {
4794 SDValue SubRegIdx =
4795 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4796 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4797 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4798 Result = SDValue(ResNode,0);
4799 }
4800 ReplaceUses(SDValue(N, 1), Result);
4801 }
4802 ReplaceUses(SDValue(N, 2), OutChain);
4803 CurDAG->RemoveDeadNode(N);
4804 return;
4805 }
4806 case Intrinsic::arm_stlexd:
4807 case Intrinsic::arm_strexd: {
4808 SDLoc dl(N);
4809 SDValue Chain = N->getOperand(0);
4810 SDValue Val0 = N->getOperand(2);
4811 SDValue Val1 = N->getOperand(3);
4812 SDValue MemAddr = N->getOperand(4);
4813
4814 // Store exclusive double return a i32 value which is the return status
4815 // of the issued store.
4816 const EVT ResTys[] = {MVT::i32, MVT::Other};
4817
4818 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4819 // Place arguments in the right order.
4821 if (isThumb) {
4822 Ops.push_back(Val0);
4823 Ops.push_back(Val1);
4824 } else
4825 // arm_strexd uses GPRPair.
4826 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4827 Ops.push_back(MemAddr);
4828 Ops.push_back(getAL(CurDAG, dl));
4829 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4830 Ops.push_back(Chain);
4831
4832 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4833 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4834 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4835
4836 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4837 // Transfer memoperands.
4838 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4839 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4840
4841 ReplaceNode(N, St);
4842 return;
4843 }
4844
4845 case Intrinsic::arm_neon_vld1: {
4846 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4847 ARM::VLD1d32, ARM::VLD1d64 };
4848 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4849 ARM::VLD1q32, ARM::VLD1q64};
4850 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4851 return;
4852 }
4853
4854 case Intrinsic::arm_neon_vld1x2: {
4855 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4856 ARM::VLD1q32, ARM::VLD1q64 };
4857 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4858 ARM::VLD1d16QPseudo,
4859 ARM::VLD1d32QPseudo,
4860 ARM::VLD1d64QPseudo };
4861 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4862 return;
4863 }
4864
4865 case Intrinsic::arm_neon_vld1x3: {
4866 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4867 ARM::VLD1d16TPseudo,
4868 ARM::VLD1d32TPseudo,
4869 ARM::VLD1d64TPseudo };
4870 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4871 ARM::VLD1q16LowTPseudo_UPD,
4872 ARM::VLD1q32LowTPseudo_UPD,
4873 ARM::VLD1q64LowTPseudo_UPD };
4874 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4875 ARM::VLD1q16HighTPseudo,
4876 ARM::VLD1q32HighTPseudo,
4877 ARM::VLD1q64HighTPseudo };
4878 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4879 return;
4880 }
4881
4882 case Intrinsic::arm_neon_vld1x4: {
4883 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4884 ARM::VLD1d16QPseudo,
4885 ARM::VLD1d32QPseudo,
4886 ARM::VLD1d64QPseudo };
4887 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4888 ARM::VLD1q16LowQPseudo_UPD,
4889 ARM::VLD1q32LowQPseudo_UPD,
4890 ARM::VLD1q64LowQPseudo_UPD };
4891 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4892 ARM::VLD1q16HighQPseudo,
4893 ARM::VLD1q32HighQPseudo,
4894 ARM::VLD1q64HighQPseudo };
4895 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4896 return;
4897 }
4898
4899 case Intrinsic::arm_neon_vld2: {
4900 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4901 ARM::VLD2d32, ARM::VLD1q64 };
4902 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4903 ARM::VLD2q32Pseudo };
4904 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4905 return;
4906 }
4907
4908 case Intrinsic::arm_neon_vld3: {
4909 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4910 ARM::VLD3d16Pseudo,
4911 ARM::VLD3d32Pseudo,
4912 ARM::VLD1d64TPseudo };
4913 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4914 ARM::VLD3q16Pseudo_UPD,
4915 ARM::VLD3q32Pseudo_UPD };
4916 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4917 ARM::VLD3q16oddPseudo,
4918 ARM::VLD3q32oddPseudo };
4919 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4920 return;
4921 }
4922
4923 case Intrinsic::arm_neon_vld4: {
4924 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4925 ARM::VLD4d16Pseudo,
4926 ARM::VLD4d32Pseudo,
4927 ARM::VLD1d64QPseudo };
4928 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4929 ARM::VLD4q16Pseudo_UPD,
4930 ARM::VLD4q32Pseudo_UPD };
4931 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4932 ARM::VLD4q16oddPseudo,
4933 ARM::VLD4q32oddPseudo };
4934 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4935 return;
4936 }
4937
4938 case Intrinsic::arm_neon_vld2dup: {
4939 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4940 ARM::VLD2DUPd32, ARM::VLD1q64 };
4941 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4942 ARM::VLD2DUPq16EvenPseudo,
4943 ARM::VLD2DUPq32EvenPseudo };
4944 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4945 ARM::VLD2DUPq16OddPseudo,
4946 ARM::VLD2DUPq32OddPseudo };
4947 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4948 DOpcodes, QOpcodes0, QOpcodes1);
4949 return;
4950 }
4951
4952 case Intrinsic::arm_neon_vld3dup: {
4953 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4954 ARM::VLD3DUPd16Pseudo,
4955 ARM::VLD3DUPd32Pseudo,
4956 ARM::VLD1d64TPseudo };
4957 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4958 ARM::VLD3DUPq16EvenPseudo,
4959 ARM::VLD3DUPq32EvenPseudo };
4960 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4961 ARM::VLD3DUPq16OddPseudo,
4962 ARM::VLD3DUPq32OddPseudo };
4963 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4964 DOpcodes, QOpcodes0, QOpcodes1);
4965 return;
4966 }
4967
4968 case Intrinsic::arm_neon_vld4dup: {
4969 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4970 ARM::VLD4DUPd16Pseudo,
4971 ARM::VLD4DUPd32Pseudo,
4972 ARM::VLD1d64QPseudo };
4973 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4974 ARM::VLD4DUPq16EvenPseudo,
4975 ARM::VLD4DUPq32EvenPseudo };
4976 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4977 ARM::VLD4DUPq16OddPseudo,
4978 ARM::VLD4DUPq32OddPseudo };
4979 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4980 DOpcodes, QOpcodes0, QOpcodes1);
4981 return;
4982 }
4983
4984 case Intrinsic::arm_neon_vld2lane: {
4985 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4986 ARM::VLD2LNd16Pseudo,
4987 ARM::VLD2LNd32Pseudo };
4988 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4989 ARM::VLD2LNq32Pseudo };
4990 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4991 return;
4992 }
4993
4994 case Intrinsic::arm_neon_vld3lane: {
4995 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4996 ARM::VLD3LNd16Pseudo,
4997 ARM::VLD3LNd32Pseudo };
4998 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4999 ARM::VLD3LNq32Pseudo };
5000 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5001 return;
5002 }
5003
5004 case Intrinsic::arm_neon_vld4lane: {
5005 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5006 ARM::VLD4LNd16Pseudo,
5007 ARM::VLD4LNd32Pseudo };
5008 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5009 ARM::VLD4LNq32Pseudo };
5010 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5011 return;
5012 }
5013
5014 case Intrinsic::arm_neon_vst1: {
5015 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5016 ARM::VST1d32, ARM::VST1d64 };
5017 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5018 ARM::VST1q32, ARM::VST1q64 };
5019 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5020 return;
5021 }
5022
5023 case Intrinsic::arm_neon_vst1x2: {
5024 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5025 ARM::VST1q32, ARM::VST1q64 };
5026 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5027 ARM::VST1d16QPseudo,
5028 ARM::VST1d32QPseudo,
5029 ARM::VST1d64QPseudo };
5030 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5031 return;
5032 }
5033
5034 case Intrinsic::arm_neon_vst1x3: {
5035 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5036 ARM::VST1d16TPseudo,
5037 ARM::VST1d32TPseudo,
5038 ARM::VST1d64TPseudo };
5039 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5040 ARM::VST1q16LowTPseudo_UPD,
5041 ARM::VST1q32LowTPseudo_UPD,
5042 ARM::VST1q64LowTPseudo_UPD };
5043 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5044 ARM::VST1q16HighTPseudo,
5045 ARM::VST1q32HighTPseudo,
5046 ARM::VST1q64HighTPseudo };
5047 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5048 return;
5049 }
5050
5051 case Intrinsic::arm_neon_vst1x4: {
5052 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5053 ARM::VST1d16QPseudo,
5054 ARM::VST1d32QPseudo,
5055 ARM::VST1d64QPseudo };
5056 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5057 ARM::VST1q16LowQPseudo_UPD,
5058 ARM::VST1q32LowQPseudo_UPD,
5059 ARM::VST1q64LowQPseudo_UPD };
5060 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5061 ARM::VST1q16HighQPseudo,
5062 ARM::VST1q32HighQPseudo,
5063 ARM::VST1q64HighQPseudo };
5064 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5065 return;
5066 }
5067
5068 case Intrinsic::arm_neon_vst2: {
5069 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5070 ARM::VST2d32, ARM::VST1q64 };
5071 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5072 ARM::VST2q32Pseudo };
5073 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5074 return;
5075 }
5076
5077 case Intrinsic::arm_neon_vst3: {
5078 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5079 ARM::VST3d16Pseudo,
5080 ARM::VST3d32Pseudo,
5081 ARM::VST1d64TPseudo };
5082 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5083 ARM::VST3q16Pseudo_UPD,
5084 ARM::VST3q32Pseudo_UPD };
5085 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5086 ARM::VST3q16oddPseudo,
5087 ARM::VST3q32oddPseudo };
5088 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5089 return;
5090 }
5091
5092 case Intrinsic::arm_neon_vst4: {
5093 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5094 ARM::VST4d16Pseudo,
5095 ARM::VST4d32Pseudo,
5096 ARM::VST1d64QPseudo };
5097 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5098 ARM::VST4q16Pseudo_UPD,
5099 ARM::VST4q32Pseudo_UPD };
5100 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5101 ARM::VST4q16oddPseudo,
5102 ARM::VST4q32oddPseudo };
5103 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5104 return;
5105 }
5106
5107 case Intrinsic::arm_neon_vst2lane: {
5108 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5109 ARM::VST2LNd16Pseudo,
5110 ARM::VST2LNd32Pseudo };
5111 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5112 ARM::VST2LNq32Pseudo };
5113 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5114 return;
5115 }
5116
5117 case Intrinsic::arm_neon_vst3lane: {
5118 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5119 ARM::VST3LNd16Pseudo,
5120 ARM::VST3LNd32Pseudo };
5121 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5122 ARM::VST3LNq32Pseudo };
5123 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5124 return;
5125 }
5126
5127 case Intrinsic::arm_neon_vst4lane: {
5128 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5129 ARM::VST4LNd16Pseudo,
5130 ARM::VST4LNd32Pseudo };
5131 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5132 ARM::VST4LNq32Pseudo };
5133 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5134 return;
5135 }
5136
5137 case Intrinsic::arm_mve_vldr_gather_base_wb:
5138 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5139 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5140 ARM::MVE_VLDRDU64_qi_pre};
5141 SelectMVE_WB(N, Opcodes,
5142 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5143 return;
5144 }
5145
5146 case Intrinsic::arm_mve_vld2q: {
5147 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5148 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5149 ARM::MVE_VLD21_16};
5150 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5151 ARM::MVE_VLD21_32};
5152 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5153 SelectMVE_VLD(N, 2, Opcodes, false);
5154 return;
5155 }
5156
5157 case Intrinsic::arm_mve_vld4q: {
5158 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5159 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5160 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5161 ARM::MVE_VLD42_16,
5162 ARM::MVE_VLD43_16};
5163 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5164 ARM::MVE_VLD42_32,
5165 ARM::MVE_VLD43_32};
5166 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5167 SelectMVE_VLD(N, 4, Opcodes, false);
5168 return;
5169 }
5170 }
5171 break;
5172 }
5173
5175 unsigned IntNo = N->getConstantOperandVal(0);
5176 switch (IntNo) {
5177 default:
5178 break;
5179
5180 // Scalar f32 -> bf16
5181 case Intrinsic::arm_neon_vcvtbfp2bf: {
5182 SDLoc dl(N);
5183 const SDValue &Src = N->getOperand(1);
5184 llvm::EVT DestTy = N->getValueType(0);
5185 SDValue Pred = getAL(CurDAG, dl);
5186 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5187 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5188 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5189 return;
5190 }
5191
5192 // Vector v4f32 -> v4bf16
5193 case Intrinsic::arm_neon_vcvtfp2bf: {
5194 SDLoc dl(N);
5195 const SDValue &Src = N->getOperand(1);
5196 SDValue Pred = getAL(CurDAG, dl);
5197 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5198 SDValue Ops[] = { Src, Pred, Reg0 };
5199 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5200 return;
5201 }
5202
5203 case Intrinsic::arm_mve_urshrl:
5204 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5205 return;
5206 case Intrinsic::arm_mve_uqshll:
5207 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5208 return;
5209 case Intrinsic::arm_mve_srshrl:
5210 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5211 return;
5212 case Intrinsic::arm_mve_sqshll:
5213 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5214 return;
5215 case Intrinsic::arm_mve_uqrshll:
5216 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5217 return;
5218 case Intrinsic::arm_mve_sqrshrl:
5219 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5220 return;
5221
5222 case Intrinsic::arm_mve_vadc:
5223 case Intrinsic::arm_mve_vadc_predicated:
5224 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5225 IntNo == Intrinsic::arm_mve_vadc_predicated);
5226 return;
5227 case Intrinsic::arm_mve_vsbc:
5228 case Intrinsic::arm_mve_vsbc_predicated:
5229 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5230 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5231 return;
5232 case Intrinsic::arm_mve_vshlc:
5233 case Intrinsic::arm_mve_vshlc_predicated:
5234 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5235 return;
5236
5237 case Intrinsic::arm_mve_vmlldava:
5238 case Intrinsic::arm_mve_vmlldava_predicated: {
5239 static const uint16_t OpcodesU[] = {
5240 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5241 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5242 };
5243 static const uint16_t OpcodesS[] = {
5244 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5245 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5246 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5247 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5248 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5249 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5250 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5251 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5252 };
5253 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5254 OpcodesS, OpcodesU);
5255 return;
5256 }
5257
5258 case Intrinsic::arm_mve_vrmlldavha:
5259 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5260 static const uint16_t OpcodesU[] = {
5261 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5262 };
5263 static const uint16_t OpcodesS[] = {
5264 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5265 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5266 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5267 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5268 };
5269 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5270 OpcodesS, OpcodesU);
5271 return;
5272 }
5273
5274 case Intrinsic::arm_mve_vidup:
5275 case Intrinsic::arm_mve_vidup_predicated: {
5276 static const uint16_t Opcodes[] = {
5277 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5278 };
5279 SelectMVE_VxDUP(N, Opcodes, false,
5280 IntNo == Intrinsic::arm_mve_vidup_predicated);
5281 return;
5282 }
5283
5284 case Intrinsic::arm_mve_vddup:
5285 case Intrinsic::arm_mve_vddup_predicated: {
5286 static const uint16_t Opcodes[] = {
5287 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5288 };
5289 SelectMVE_VxDUP(N, Opcodes, false,
5290 IntNo == Intrinsic::arm_mve_vddup_predicated);
5291 return;
5292 }
5293
5294 case Intrinsic::arm_mve_viwdup:
5295 case Intrinsic::arm_mve_viwdup_predicated: {
5296 static const uint16_t Opcodes[] = {
5297 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5298 };
5299 SelectMVE_VxDUP(N, Opcodes, true,
5300 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5301 return;
5302 }
5303
5304 case Intrinsic::arm_mve_vdwdup:
5305 case Intrinsic::arm_mve_vdwdup_predicated: {
5306 static const uint16_t Opcodes[] = {
5307 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5308 };
5309 SelectMVE_VxDUP(N, Opcodes, true,
5310 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5311 return;
5312 }
5313
5314 case Intrinsic::arm_cde_cx1d:
5315 case Intrinsic::arm_cde_cx1da:
5316 case Intrinsic::arm_cde_cx2d:
5317 case Intrinsic::arm_cde_cx2da:
5318 case Intrinsic::arm_cde_cx3d:
5319 case Intrinsic::arm_cde_cx3da: {
5320 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5321 IntNo == Intrinsic::arm_cde_cx2da ||
5322 IntNo == Intrinsic::arm_cde_cx3da;
5323 size_t NumExtraOps;
5324 uint16_t Opcode;
5325 switch (IntNo) {
5326 case Intrinsic::arm_cde_cx1d:
5327 case Intrinsic::arm_cde_cx1da:
5328 NumExtraOps = 0;
5329 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5330 break;
5331 case Intrinsic::arm_cde_cx2d:
5332 case Intrinsic::arm_cde_cx2da:
5333 NumExtraOps = 1;
5334 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5335 break;
5336 case Intrinsic::arm_cde_cx3d:
5337 case Intrinsic::arm_cde_cx3da:
5338 NumExtraOps = 2;
5339 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5340 break;
5341 default:
5342 llvm_unreachable("Unexpected opcode");
5343 }
5344 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5345 return;
5346 }
5347 }
5348 break;
5349 }
5350
5352 SelectCMP_SWAP(N);
5353 return;
5354 }
5355
5356 SelectCode(N);
5357}
5358
5359// Inspect a register string of the form
5360// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5361// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5362// and obtain the integer operands from them, adding these operands to the
5363// provided vector.
5365 SelectionDAG *CurDAG,
5366 const SDLoc &DL,
5367 std::vector<SDValue> &Ops) {
5369 RegString.split(Fields, ':');
5370
5371 if (Fields.size() > 1) {
5372 bool AllIntFields = true;
5373
5374 for (StringRef Field : Fields) {
5375 // Need to trim out leading 'cp' characters and get the integer field.
5376 unsigned IntField;
5377 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5378 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5379 }
5380
5381 assert(AllIntFields &&
5382 "Unexpected non-integer value in special register string.");
5383 (void)AllIntFields;
5384 }
5385}
5386
5387// Maps a Banked Register string to its mask value. The mask value returned is
5388// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5389// mask operand, which expresses which register is to be used, e.g. r8, and in
5390// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5391// was invalid.
5392static inline int getBankedRegisterMask(StringRef RegString) {
5393 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5394 if (!TheReg)
5395 return -1;
5396 return TheReg->Encoding;
5397}
5398
5399// The flags here are common to those allowed for apsr in the A class cores and
5400// those allowed for the special registers in the M class cores. Returns a
5401// value representing which flags were present, -1 if invalid.
5402static inline int getMClassFlagsMask(StringRef Flags) {
5403 return StringSwitch<int>(Flags)
5404 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5405 // correct when flags are not permitted
5406 .Case("g", 0x1)
5407 .Case("nzcvq", 0x2)
5408 .Case("nzcvqg", 0x3)
5409 .Default(-1);
5410}
5411
5412// Maps MClass special registers string to its value for use in the
5413// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5414// Returns -1 to signify that the string was invalid.
5415static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5416 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5417 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5418 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5419 return -1;
5420 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5421}
5422
5424 // The mask operand contains the special register (R Bit) in bit 4, whether
5425 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5426 // bits 3-0 contains the fields to be accessed in the special register, set by
5427 // the flags provided with the register.
5428 int Mask = 0;
5429 if (Reg == "apsr") {
5430 // The flags permitted for apsr are the same flags that are allowed in
5431 // M class registers. We get the flag value and then shift the flags into
5432 // the correct place to combine with the mask.
5433 Mask = getMClassFlagsMask(Flags);
5434 if (Mask == -1)
5435 return -1;
5436 return Mask << 2;
5437 }
5438
5439 if (Reg != "cpsr" && Reg != "spsr") {
5440 return -1;
5441 }
5442
5443 // This is the same as if the flags were "fc"
5444 if (Flags.empty() || Flags == "all")
5445 return Mask | 0x9;
5446
5447 // Inspect the supplied flags string and set the bits in the mask for
5448 // the relevant and valid flags allowed for cpsr and spsr.
5449 for (char Flag : Flags) {
5450 int FlagVal;
5451 switch (Flag) {
5452 case 'c':
5453 FlagVal = 0x1;
5454 break;
5455 case 'x':
5456 FlagVal = 0x2;
5457 break;
5458 case 's':
5459 FlagVal = 0x4;
5460 break;
5461 case 'f':
5462 FlagVal = 0x8;
5463 break;
5464 default:
5465 FlagVal = 0;
5466 }
5467
5468 // This avoids allowing strings where the same flag bit appears twice.
5469 if (!FlagVal || (Mask & FlagVal))
5470 return -1;
5471 Mask |= FlagVal;
5472 }
5473
5474 // If the register is spsr then we need to set the R bit.
5475 if (Reg == "spsr")
5476 Mask |= 0x10;
5477
5478 return Mask;
5479}
5480
5481// Lower the read_register intrinsic to ARM specific DAG nodes
5482// using the supplied metadata string to select the instruction node to use
5483// and the registers/masks to construct as operands for the node.
5484bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5485 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5486 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5487 bool IsThumb2 = Subtarget->isThumb2();
5488 SDLoc DL(N);
5489
5490 std::vector<SDValue> Ops;
5491 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5492
5493 if (!Ops.empty()) {
5494 // If the special register string was constructed of fields (as defined
5495 // in the ACLE) then need to lower to MRC node (32 bit) or
5496 // MRRC node(64 bit), we can make the distinction based on the number of
5497 // operands we have.
5498 unsigned Opcode;
5499 SmallVector<EVT, 3> ResTypes;
5500 if (Ops.size() == 5){
5501 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5502 ResTypes.append({ MVT::i32, MVT::Other });
5503 } else {
5504 assert(Ops.size() == 3 &&
5505 "Invalid number of fields in special register string.");
5506 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5507 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5508 }
5509
5510 Ops.push_back(getAL(CurDAG, DL));
5511 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5512 Ops.push_back(N->getOperand(0));
5513 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5514 return true;
5515 }
5516
5517 std::string SpecialReg = RegString->getString().lower();
5518
5519 int BankedReg = getBankedRegisterMask(SpecialReg);
5520 if (BankedReg != -1) {
5521 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5522 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5523 N->getOperand(0) };
5524 ReplaceNode(
5525 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5526 DL, MVT::i32, MVT::Other, Ops));
5527 return true;
5528 }
5529
5530 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5531 // corresponding to the register that is being read from. So we switch on the
5532 // string to find which opcode we need to use.
5533 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5534 .Case("fpscr", ARM::VMRS)
5535 .Case("fpexc", ARM::VMRS_FPEXC)
5536 .Case("fpsid", ARM::VMRS_FPSID)
5537 .Case("mvfr0", ARM::VMRS_MVFR0)
5538 .Case("mvfr1", ARM::VMRS_MVFR1)
5539 .Case("mvfr2", ARM::VMRS_MVFR2)
5540 .Case("fpinst", ARM::VMRS_FPINST)
5541 .Case("fpinst2", ARM::VMRS_FPINST2)
5542 .Default(0);
5543
5544 // If an opcode was found then we can lower the read to a VFP instruction.
5545 if (Opcode) {
5546 if (!Subtarget->hasVFP2Base())
5547 return false;
5548 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5549 return false;
5550
5551 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5552 N->getOperand(0) };
5553 ReplaceNode(N,
5554 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5555 return true;
5556 }
5557
5558 // If the target is M Class then need to validate that the register string
5559 // is an acceptable value, so check that a mask can be constructed from the
5560 // string.
5561 if (Subtarget->isMClass()) {
5562 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5563 if (SYSmValue == -1)
5564 return false;
5565
5566 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5567 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5568 N->getOperand(0) };
5569 ReplaceNode(
5570 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5571 return true;
5572 }
5573
5574 // Here we know the target is not M Class so we need to check if it is one
5575 // of the remaining possible values which are apsr, cpsr or spsr.
5576 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5577 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5578 N->getOperand(0) };
5579 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5580 DL, MVT::i32, MVT::Other, Ops));
5581 return true;
5582 }
5583
5584 if (SpecialReg == "spsr") {
5585 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5586 N->getOperand(0) };
5587 ReplaceNode(
5588 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5589 MVT::i32, MVT::Other, Ops));
5590 return true;
5591 }
5592
5593 return false;
5594}
5595
5596// Lower the write_register intrinsic to ARM specific DAG nodes
5597// using the supplied metadata string to select the instruction node to use
5598// and the registers/masks to use in the nodes
5599bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5600 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5601 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5602 bool IsThumb2 = Subtarget->isThumb2();
5603 SDLoc DL(N);
5604
5605 std::vector<SDValue> Ops;
5606 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5607
5608 if (!Ops.empty()) {
5609 // If the special register string was constructed of fields (as defined
5610 // in the ACLE) then need to lower to MCR node (32 bit) or
5611 // MCRR node(64 bit), we can make the distinction based on the number of
5612 // operands we have.
5613 unsigned Opcode;
5614 if (Ops.size() == 5) {
5615 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5616 Ops.insert(Ops.begin()+2, N->getOperand(2));
5617 } else {
5618 assert(Ops.size() == 3 &&
5619 "Invalid number of fields in special register string.");
5620 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5621 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5622 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5623 }
5624
5625 Ops.push_back(getAL(CurDAG, DL));
5626 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5627 Ops.push_back(N->getOperand(0));
5628
5629 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5630 return true;
5631 }
5632
5633 std::string SpecialReg = RegString->getString().lower();
5634 int BankedReg = getBankedRegisterMask(SpecialReg);
5635 if (BankedReg != -1) {
5636 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5637 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5638 N->getOperand(0) };
5639 ReplaceNode(
5640 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5641 DL, MVT::Other, Ops));
5642 return true;
5643 }
5644
5645 // The VFP registers are written to by creating SelectionDAG nodes with
5646 // opcodes corresponding to the register that is being written. So we switch
5647 // on the string to find which opcode we need to use.
5648 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5649 .Case("fpscr", ARM::VMSR)
5650 .Case("fpexc", ARM::VMSR_FPEXC)
5651 .Case("fpsid", ARM::VMSR_FPSID)
5652 .Case("fpinst", ARM::VMSR_FPINST)
5653 .Case("fpinst2", ARM::VMSR_FPINST2)
5654 .Default(0);
5655
5656 if (Opcode) {
5657 if (!Subtarget->hasVFP2Base())
5658 return false;
5659 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5660 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5661 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5662 return true;
5663 }
5664
5665 std::pair<StringRef, StringRef> Fields;
5666 Fields = StringRef(SpecialReg).rsplit('_');
5667 std::string Reg = Fields.first.str();
5668 StringRef Flags = Fields.second;
5669
5670 // If the target was M Class then need to validate the special register value
5671 // and retrieve the mask for use in the instruction node.
5672 if (Subtarget->isMClass()) {
5673 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5674 if (SYSmValue == -1)
5675 return false;
5676
5677 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5678 N->getOperand(2), getAL(CurDAG, DL),
5679 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5680 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5681 return true;
5682 }
5683
5684 // We then check to see if a valid mask can be constructed for one of the
5685 // register string values permitted for the A and R class cores. These values
5686 // are apsr, spsr and cpsr; these are also valid on older cores.
5687 int Mask = getARClassRegisterMask(Reg, Flags);
5688 if (Mask != -1) {
5689 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5690 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5691 N->getOperand(0) };
5692 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5693 DL, MVT::Other, Ops));
5694 return true;
5695 }
5696
5697 return false;
5698}
5699
5700bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5701 std::vector<SDValue> AsmNodeOperands;
5703 bool Changed = false;
5704 unsigned NumOps = N->getNumOperands();
5705
5706 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5707 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5708 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5709 // respectively. Since there is no constraint to explicitly specify a
5710 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5711 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5712 // them into a GPRPair.
5713
5714 SDLoc dl(N);
5715 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5716
5717 SmallVector<bool, 8> OpChanged;
5718 // Glue node will be appended late.
5719 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5720 SDValue op = N->getOperand(i);
5721 AsmNodeOperands.push_back(op);
5722
5724 continue;
5725
5726 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5727 Flag = InlineAsm::Flag(C->getZExtValue());
5728 else
5729 continue;
5730
5731 // Immediate operands to inline asm in the SelectionDAG are modeled with
5732 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5733 // the second is a constant with the value of the immediate. If we get here
5734 // and we have a Kind::Imm, skip the next operand, and continue.
5735 if (Flag.isImmKind()) {
5736 SDValue op = N->getOperand(++i);
5737 AsmNodeOperands.push_back(op);
5738 continue;
5739 }
5740
5741 const unsigned NumRegs = Flag.getNumOperandRegisters();
5742 if (NumRegs)
5743 OpChanged.push_back(false);
5744
5745 unsigned DefIdx = 0;
5746 bool IsTiedToChangedOp = false;
5747 // If it's a use that is tied with a previous def, it has no
5748 // reg class constraint.
5749 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5750 IsTiedToChangedOp = OpChanged[DefIdx];
5751
5752 // Memory operands to inline asm in the SelectionDAG are modeled with two
5753 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5754 // operand. If we get here and we have a Kind::Mem, skip the next operand
5755 // (so it doesn't get misinterpreted), and continue. We do this here because
5756 // it's important to update the OpChanged array correctly before moving on.
5757 if (Flag.isMemKind()) {
5758 SDValue op = N->getOperand(++i);
5759 AsmNodeOperands.push_back(op);
5760 continue;
5761 }
5762
5763 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5764 !Flag.isRegDefEarlyClobberKind())
5765 continue;
5766
5767 unsigned RC;
5768 const bool HasRC = Flag.hasRegClassConstraint(RC);
5769 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5770 || NumRegs != 2)
5771 continue;
5772
5773 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5774 SDValue V0 = N->getOperand(i+1);
5775 SDValue V1 = N->getOperand(i+2);
5776 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5777 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5778 SDValue PairedReg;
5780
5781 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5782 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5783 // the original GPRs.
5784
5785 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5786 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5787 SDValue Chain = SDValue(N,0);
5788
5789 SDNode *GU = N->getGluedUser();
5790 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5791 Chain.getValue(1));
5792
5793 // Extract values from a GPRPair reg and copy to the original GPR reg.
5794 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5795 RegCopy);
5796 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5797 RegCopy);
5798 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5799 RegCopy.getValue(1));
5800 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5801
5802 // Update the original glue user.
5803 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5804 Ops.push_back(T1.getValue(1));
5805 CurDAG->UpdateNodeOperands(GU, Ops);
5806 } else {
5807 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5808 // GPRPair and then pass the GPRPair to the inline asm.
5809 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5810
5811 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5812 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5813 Chain.getValue(1));
5814 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5815 T0.getValue(1));
5816 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5817
5818 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5819 // i32 VRs of inline asm with it.
5820 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5821 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5822 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5823
5824 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5825 Glue = Chain.getValue(1);
5826 }
5827
5828 Changed = true;
5829
5830 if(PairedReg.getNode()) {
5831 OpChanged[OpChanged.size() -1 ] = true;
5832 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5833 if (IsTiedToChangedOp)
5834 Flag.setMatchingOp(DefIdx);
5835 else
5836 Flag.setRegClass(ARM::GPRPairRegClassID);
5837 // Replace the current flag.
5838 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5839 Flag, dl, MVT::i32);
5840 // Add the new register node and skip the original two GPRs.
5841 AsmNodeOperands.push_back(PairedReg);
5842 // Skip the next two GPRs.
5843 i += 2;
5844 }
5845 }
5846
5847 if (Glue.getNode())
5848 AsmNodeOperands.push_back(Glue);
5849 if (!Changed)
5850 return false;
5851
5852 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5853 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5854 New->setNodeId(-1);
5855 ReplaceNode(N, New.getNode());
5856 return true;
5857}
5858
5859bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5860 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5861 std::vector<SDValue> &OutOps) {
5862 switch(ConstraintID) {
5863 default:
5864 llvm_unreachable("Unexpected asm memory constraint");
5865 case InlineAsm::ConstraintCode::m:
5866 case InlineAsm::ConstraintCode::o:
5867 case InlineAsm::ConstraintCode::Q:
5868 case InlineAsm::ConstraintCode::Um:
5869 case InlineAsm::ConstraintCode::Un:
5870 case InlineAsm::ConstraintCode::Uq:
5871 case InlineAsm::ConstraintCode::Us:
5872 case InlineAsm::ConstraintCode::Ut:
5873 case InlineAsm::ConstraintCode::Uv:
5874 case InlineAsm::ConstraintCode::Uy:
5875 // Require the address to be in a register. That is safe for all ARM
5876 // variants and it is hard to do anything much smarter without knowing
5877 // how the operand is used.
5878 OutOps.push_back(Op);
5879 return false;
5880 }
5881 return true;
5882}
5883
5884/// createARMISelDag - This pass converts a legalized DAG into a
5885/// ARM-specific DAG, ready for instruction scheduling.
5886///
5888 CodeGenOptLevel OptLevel) {
5889 return new ARMDAGToDAGISel(TM, OptLevel);
5890}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
#define PASS_NAME
#define DEBUG_TYPE
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define op(i)
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
Class for arbitrary precision integers.
Definition: APInt.h:76
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
bool isSwift() const
Definition: ARMSubtarget.h:327
bool isThumb1Only() const
Definition: ARMSubtarget.h:434
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:344
bool isThumb2() const
Definition: ARMSubtarget.h:435
bool isLikeA9() const
Definition: ARMSubtarget.h:330
bool hasVFP2Base() const
Definition: ARMSubtarget.h:341
bool isLittle() const
Definition: ARMSubtarget.h:477
bool isMClass() const
Definition: ARMSubtarget.h:436
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
Base class for LoadSDNode and StoreSDNode.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MLOAD node.
This is an abstract virtual class for memory operations.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
std::string lower() const
Definition: StringRef.cpp:111
std::pair< StringRef, StringRef > rsplit(StringRef Separator) const
Split into two substrings around the last occurrence of a separator string.
Definition: StringRef.h:729
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ FrameIndex
Definition: ISDOpcodes.h:80
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1097
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1094
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:252
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ NearestTiesToEven
roundTiesToEven.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const