LLVM 20.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Constants.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/IR/IntrinsicsARM.h"
33#include "llvm/IR/LLVMContext.h"
35#include "llvm/Support/Debug.h"
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "arm-isel"
43#define PASS_NAME "ARM Instruction Selection"
44
45static cl::opt<bool>
46DisableShifterOp("disable-shifter-op", cl::Hidden,
47 cl::desc("Disable isel of shifter-op"),
48 cl::init(false));
49
50//===--------------------------------------------------------------------===//
51/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52/// instructions for SelectionDAG operations.
53///
54namespace {
55
56class ARMDAGToDAGISel : public SelectionDAGISel {
57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58 /// make the right decision when generating code for different targets.
59 const ARMSubtarget *Subtarget;
60
61public:
62 ARMDAGToDAGISel() = delete;
63
64 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
65 : SelectionDAGISel(tm, OptLevel) {}
66
67 bool runOnMachineFunction(MachineFunction &MF) override {
68 // Reset the subtarget each time through.
69 Subtarget = &MF.getSubtarget<ARMSubtarget>();
71 return true;
72 }
73
74 void PreprocessISelDAG() override;
75
76 /// getI32Imm - Return a target constant of type i32 with the specified
77 /// value.
78 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
79 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
80 }
81
82 void Select(SDNode *N) override;
83
84 /// Return true as some complex patterns, like those that call
85 /// canExtractShiftFromMul can modify the DAG inplace.
86 bool ComplexPatternFuncMutatesDAG() const override { return true; }
87
88 bool hasNoVMLxHazardUse(SDNode *N) const;
89 bool isShifterOpProfitable(const SDValue &Shift,
90 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
91 bool SelectRegShifterOperand(SDValue N, SDValue &A,
92 SDValue &B, SDValue &C,
93 bool CheckProfitability = true);
94 bool SelectImmShifterOperand(SDValue N, SDValue &A,
95 SDValue &B, bool CheckProfitability = true);
96 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
97 SDValue &C) {
98 // Don't apply the profitability check
99 return SelectRegShifterOperand(N, A, B, C, false);
100 }
101 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
102 // Don't apply the profitability check
103 return SelectImmShifterOperand(N, A, B, false);
104 }
105 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
106 if (!N.hasOneUse())
107 return false;
108 return SelectImmShifterOperand(N, A, B, false);
109 }
110
111 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
112
113 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
115
116 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
117 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
118 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
119 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
120 return true;
121 }
122
123 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
126 SDValue &Offset, SDValue &Opc);
127 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
128 SDValue &Offset, SDValue &Opc);
129 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
130 bool SelectAddrMode3(SDValue N, SDValue &Base,
131 SDValue &Offset, SDValue &Opc);
132 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
133 SDValue &Offset, SDValue &Opc);
134 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
135 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
137 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
138 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
139
140 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
141
142 // Thumb Addressing Modes:
143 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
144 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
145 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
150 SDValue &OffImm);
151 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
152 SDValue &OffImm);
153 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
154 template <unsigned Shift>
155 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
156
157 // Thumb 2 Addressing Modes:
158 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
159 template <unsigned Shift>
160 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
161 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
162 SDValue &OffImm);
163 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
164 SDValue &OffImm);
165 template <unsigned Shift>
166 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
167 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
168 unsigned Shift);
169 template <unsigned Shift>
170 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
171 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
172 SDValue &OffReg, SDValue &ShImm);
173 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174
175 template<int Min, int Max>
176 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
177
178 inline bool is_so_imm(unsigned Imm) const {
179 return ARM_AM::getSOImmVal(Imm) != -1;
180 }
181
182 inline bool is_so_imm_not(unsigned Imm) const {
183 return ARM_AM::getSOImmVal(~Imm) != -1;
184 }
185
186 inline bool is_t2_so_imm(unsigned Imm) const {
187 return ARM_AM::getT2SOImmVal(Imm) != -1;
188 }
189
190 inline bool is_t2_so_imm_not(unsigned Imm) const {
191 return ARM_AM::getT2SOImmVal(~Imm) != -1;
192 }
193
194 // Include the pieces autogenerated from the target description.
195#include "ARMGenDAGISel.inc"
196
197private:
198 void transferMemOperands(SDNode *Src, SDNode *Dst);
199
200 /// Indexed (pre/post inc/dec) load matching code for ARM.
201 bool tryARMIndexedLoad(SDNode *N);
202 bool tryT1IndexedLoad(SDNode *N);
203 bool tryT2IndexedLoad(SDNode *N);
204 bool tryMVEIndexedLoad(SDNode *N);
205 bool tryFMULFixed(SDNode *N, SDLoc dl);
206 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
207 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
208 bool IsUnsigned,
209 bool FixedToFloat);
210
211 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
212 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
213 /// loads of D registers and even subregs and odd subregs of Q registers.
214 /// For NumVecs <= 2, QOpcodes1 is not used.
215 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
216 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
217 const uint16_t *QOpcodes1);
218
219 /// SelectVST - Select NEON store intrinsics. NumVecs should
220 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
221 /// stores of D registers and even subregs and odd subregs of Q registers.
222 /// For NumVecs <= 2, QOpcodes1 is not used.
223 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
224 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
225 const uint16_t *QOpcodes1);
226
227 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
228 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
229 /// load/store of D registers and Q registers.
230 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
231 unsigned NumVecs, const uint16_t *DOpcodes,
232 const uint16_t *QOpcodes);
233
234 /// Helper functions for setting up clusters of MVE predication operands.
235 template <typename SDValueVector>
236 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
237 SDValue PredicateMask);
238 template <typename SDValueVector>
239 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
240 SDValue PredicateMask, SDValue Inactive);
241
242 template <typename SDValueVector>
243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
244 template <typename SDValueVector>
245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
246
247 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
248 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
249
250 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
251 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
252 bool HasSaturationOperand);
253
254 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
255 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
256 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
257
258 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
259 /// vector lanes.
260 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
261
262 /// Select long MVE vector reductions with two vector operands
263 /// Stride is the number of vector element widths the instruction can operate
264 /// on:
265 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
266 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
267 /// Stride is used when addressing the OpcodesS array which contains multiple
268 /// opcodes for each element width.
269 /// TySize is the index into the list of element types listed above
270 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
271 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
272 size_t Stride, size_t TySize);
273
274 /// Select a 64-bit MVE vector reduction with two vector operands
275 /// arm_mve_vmlldava_[predicated]
276 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
277 const uint16_t *OpcodesU);
278 /// Select a 72-bit MVE vector rounding reduction with two vector operands
279 /// int_arm_mve_vrmlldavha[_predicated]
280 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
281 const uint16_t *OpcodesU);
282
283 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
284 /// should be 2 or 4. The opcode array specifies the instructions
285 /// used for 8, 16 and 32-bit lane sizes respectively, and each
286 /// pointer points to a set of NumVecs sub-opcodes used for the
287 /// different stages (e.g. VLD20 versus VLD21) of each load family.
288 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
289 const uint16_t *const *Opcodes, bool HasWriteback);
290
291 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
292 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
293 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
294 bool Wrapping, bool Predicated);
295
296 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
297 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
298 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
299 /// the accumulator and the immediate operand, i.e. 0
300 /// for CX1*, 1 for CX2*, 2 for CX3*
301 /// \arg \c HasAccum whether the instruction has an accumulator operand
302 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
303 bool HasAccum);
304
305 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
306 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
307 /// for loading D registers.
308 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
309 unsigned NumVecs, const uint16_t *DOpcodes,
310 const uint16_t *QOpcodes0 = nullptr,
311 const uint16_t *QOpcodes1 = nullptr);
312
313 /// Try to select SBFX/UBFX instructions for ARM.
314 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
315
316 bool tryInsertVectorElt(SDNode *N);
317
318 // Select special operations if node forms integer ABS pattern
319 bool tryABSOp(SDNode *N);
320
321 bool tryReadRegister(SDNode *N);
322 bool tryWriteRegister(SDNode *N);
323
324 bool tryInlineAsm(SDNode *N);
325
326 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
327
328 void SelectCMP_SWAP(SDNode *N);
329
330 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
331 /// inline asm expressions.
333 InlineAsm::ConstraintCode ConstraintID,
334 std::vector<SDValue> &OutOps) override;
335
336 // Form pairs of consecutive R, S, D, or Q registers.
338 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
339 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
340 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
341
342 // Form sequences of 4 consecutive S, D, or Q registers.
343 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
344 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
345 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346
347 // Get the alignment operand for a NEON VLD or VST instruction.
348 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
349 bool is64BitVector);
350
351 /// Checks if N is a multiplication by a constant where we can extract out a
352 /// power of two from the constant so that it can be used in a shift, but only
353 /// if it simplifies the materialization of the constant. Returns true if it
354 /// is, and assigns to PowerOfTwo the power of two that should be extracted
355 /// out and to NewMulConst the new constant to be multiplied by.
356 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
357 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
358
359 /// Replace N with M in CurDAG, in a way that also ensures that M gets
360 /// selected when N would have been selected.
361 void replaceDAGValue(const SDValue &N, SDValue M);
362};
363
364class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
365public:
366 static char ID;
367 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
369 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
370};
371}
372
373char ARMDAGToDAGISelLegacy::ID = 0;
374
375INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
376
377/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
378/// operand. If so Imm will receive the 32-bit value.
379static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
380 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
381 Imm = N->getAsZExtVal();
382 return true;
383 }
384 return false;
385}
386
387// isInt32Immediate - This method tests to see if a constant operand.
388// If so Imm will receive the 32 bit value.
389static bool isInt32Immediate(SDValue N, unsigned &Imm) {
390 return isInt32Immediate(N.getNode(), Imm);
391}
392
393// isOpcWithIntImmediate - This method tests to see if the node is a specific
394// opcode and that it has a immediate integer right operand.
395// If so Imm will receive the 32 bit value.
396static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
397 return N->getOpcode() == Opc &&
398 isInt32Immediate(N->getOperand(1).getNode(), Imm);
399}
400
401/// Check whether a particular node is a constant value representable as
402/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
403///
404/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
405static bool isScaledConstantInRange(SDValue Node, int Scale,
406 int RangeMin, int RangeMax,
407 int &ScaledConstant) {
408 assert(Scale > 0 && "Invalid scale!");
409
410 // Check that this is a constant.
411 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
412 if (!C)
413 return false;
414
415 ScaledConstant = (int) C->getZExtValue();
416 if ((ScaledConstant % Scale) != 0)
417 return false;
418
419 ScaledConstant /= Scale;
420 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
421}
422
423void ARMDAGToDAGISel::PreprocessISelDAG() {
424 if (!Subtarget->hasV6T2Ops())
425 return;
426
427 bool isThumb2 = Subtarget->isThumb();
428 // We use make_early_inc_range to avoid invalidation issues.
429 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
430 if (N.getOpcode() != ISD::ADD)
431 continue;
432
433 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
434 // leading zeros, followed by consecutive set bits, followed by 1 or 2
435 // trailing zeros, e.g. 1020.
436 // Transform the expression to
437 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
438 // of trailing zeros of c2. The left shift would be folded as an shifter
439 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
440 // node (UBFX).
441
442 SDValue N0 = N.getOperand(0);
443 SDValue N1 = N.getOperand(1);
444 unsigned And_imm = 0;
445 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
446 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
447 std::swap(N0, N1);
448 }
449 if (!And_imm)
450 continue;
451
452 // Check if the AND mask is an immediate of the form: 000.....1111111100
453 unsigned TZ = llvm::countr_zero(And_imm);
454 if (TZ != 1 && TZ != 2)
455 // Be conservative here. Shifter operands aren't always free. e.g. On
456 // Swift, left shifter operand of 1 / 2 for free but others are not.
457 // e.g.
458 // ubfx r3, r1, #16, #8
459 // ldr.w r3, [r0, r3, lsl #2]
460 // vs.
461 // mov.w r9, #1020
462 // and.w r2, r9, r1, lsr #14
463 // ldr r2, [r0, r2]
464 continue;
465 And_imm >>= TZ;
466 if (And_imm & (And_imm + 1))
467 continue;
468
469 // Look for (and (srl X, c1), c2).
470 SDValue Srl = N1.getOperand(0);
471 unsigned Srl_imm = 0;
472 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
473 (Srl_imm <= 2))
474 continue;
475
476 // Make sure first operand is not a shifter operand which would prevent
477 // folding of the left shift.
478 SDValue CPTmp0;
479 SDValue CPTmp1;
480 SDValue CPTmp2;
481 if (isThumb2) {
482 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
483 continue;
484 } else {
485 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
486 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
487 continue;
488 }
489
490 // Now make the transformation.
491 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
492 Srl.getOperand(0),
493 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
494 MVT::i32));
495 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
496 Srl,
497 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
498 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
499 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
500 CurDAG->UpdateNodeOperands(&N, N0, N1);
501 }
502}
503
504/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
505/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
506/// least on current ARM implementations) which should be avoidded.
507bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
508 if (OptLevel == CodeGenOptLevel::None)
509 return true;
510
511 if (!Subtarget->hasVMLxHazards())
512 return true;
513
514 if (!N->hasOneUse())
515 return false;
516
517 SDNode *Use = *N->use_begin();
518 if (Use->getOpcode() == ISD::CopyToReg)
519 return true;
520 if (Use->isMachineOpcode()) {
521 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
522 CurDAG->getSubtarget().getInstrInfo());
523
524 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
525 if (MCID.mayStore())
526 return true;
527 unsigned Opcode = MCID.getOpcode();
528 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
529 return true;
530 // vmlx feeding into another vmlx. We actually want to unfold
531 // the use later in the MLxExpansion pass. e.g.
532 // vmla
533 // vmla (stall 8 cycles)
534 //
535 // vmul (5 cycles)
536 // vadd (5 cycles)
537 // vmla
538 // This adds up to about 18 - 19 cycles.
539 //
540 // vmla
541 // vmul (stall 4 cycles)
542 // vadd adds up to about 14 cycles.
543 return TII->isFpMLxInstruction(Opcode);
544 }
545
546 return false;
547}
548
549bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
550 ARM_AM::ShiftOpc ShOpcVal,
551 unsigned ShAmt) {
552 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
553 return true;
554 if (Shift.hasOneUse())
555 return true;
556 // R << 2 is free.
557 return ShOpcVal == ARM_AM::lsl &&
558 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
559}
560
561bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
562 unsigned MaxShift,
563 unsigned &PowerOfTwo,
564 SDValue &NewMulConst) const {
565 assert(N.getOpcode() == ISD::MUL);
566 assert(MaxShift > 0);
567
568 // If the multiply is used in more than one place then changing the constant
569 // will make other uses incorrect, so don't.
570 if (!N.hasOneUse()) return false;
571 // Check if the multiply is by a constant
572 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
573 if (!MulConst) return false;
574 // If the constant is used in more than one place then modifying it will mean
575 // we need to materialize two constants instead of one, which is a bad idea.
576 if (!MulConst->hasOneUse()) return false;
577 unsigned MulConstVal = MulConst->getZExtValue();
578 if (MulConstVal == 0) return false;
579
580 // Find the largest power of 2 that MulConstVal is a multiple of
581 PowerOfTwo = MaxShift;
582 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
583 --PowerOfTwo;
584 if (PowerOfTwo == 0) return false;
585 }
586
587 // Only optimise if the new cost is better
588 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
589 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
590 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
591 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
592 return NewCost < OldCost;
593}
594
595void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
596 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
597 ReplaceUses(N, M);
598}
599
600bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
601 SDValue &BaseReg,
602 SDValue &Opc,
603 bool CheckProfitability) {
605 return false;
606
607 // If N is a multiply-by-constant and it's profitable to extract a shift and
608 // use it in a shifted operand do so.
609 if (N.getOpcode() == ISD::MUL) {
610 unsigned PowerOfTwo = 0;
611 SDValue NewMulConst;
612 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
613 HandleSDNode Handle(N);
614 SDLoc Loc(N);
615 replaceDAGValue(N.getOperand(1), NewMulConst);
616 BaseReg = Handle.getValue();
617 Opc = CurDAG->getTargetConstant(
618 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
619 return true;
620 }
621 }
622
623 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
624
625 // Don't match base register only case. That is matched to a separate
626 // lower complexity pattern with explicit register operand.
627 if (ShOpcVal == ARM_AM::no_shift) return false;
628
629 BaseReg = N.getOperand(0);
630 unsigned ShImmVal = 0;
631 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
632 if (!RHS) return false;
633 ShImmVal = RHS->getZExtValue() & 31;
634 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
635 SDLoc(N), MVT::i32);
636 return true;
637}
638
639bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
640 SDValue &BaseReg,
641 SDValue &ShReg,
642 SDValue &Opc,
643 bool CheckProfitability) {
645 return false;
646
647 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
648
649 // Don't match base register only case. That is matched to a separate
650 // lower complexity pattern with explicit register operand.
651 if (ShOpcVal == ARM_AM::no_shift) return false;
652
653 BaseReg = N.getOperand(0);
654 unsigned ShImmVal = 0;
655 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
656 if (RHS) return false;
657
658 ShReg = N.getOperand(1);
659 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
660 return false;
661 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
662 SDLoc(N), MVT::i32);
663 return true;
664}
665
666// Determine whether an ISD::OR's operands are suitable to turn the operation
667// into an addition, which often has more compact encodings.
668bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
669 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
670 Out = N;
671 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
672}
673
674
675bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
676 SDValue &Base,
677 SDValue &OffImm) {
678 // Match simple R + imm12 operands.
679
680 // Base only.
681 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
682 !CurDAG->isBaseWithConstantOffset(N)) {
683 if (N.getOpcode() == ISD::FrameIndex) {
684 // Match frame index.
685 int FI = cast<FrameIndexSDNode>(N)->getIndex();
686 Base = CurDAG->getTargetFrameIndex(
687 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
688 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
689 return true;
690 }
691
692 if (N.getOpcode() == ARMISD::Wrapper &&
693 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
694 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
695 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
696 Base = N.getOperand(0);
697 } else
698 Base = N;
699 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
700 return true;
701 }
702
703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
704 int RHSC = (int)RHS->getSExtValue();
705 if (N.getOpcode() == ISD::SUB)
706 RHSC = -RHSC;
707
708 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
709 Base = N.getOperand(0);
710 if (Base.getOpcode() == ISD::FrameIndex) {
711 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
712 Base = CurDAG->getTargetFrameIndex(
713 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
714 }
715 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32,
716 /*isTarget=*/true);
717 return true;
718 }
719 }
720
721 // Base only.
722 Base = N;
723 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
724 return true;
725}
726
727
728
729bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
730 SDValue &Opc) {
731 if (N.getOpcode() == ISD::MUL &&
732 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
733 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
734 // X * [3,5,9] -> X + X * [2,4,8] etc.
735 int RHSC = (int)RHS->getZExtValue();
736 if (RHSC & 1) {
737 RHSC = RHSC & ~1;
739 if (RHSC < 0) {
741 RHSC = - RHSC;
742 }
743 if (isPowerOf2_32(RHSC)) {
744 unsigned ShAmt = Log2_32(RHSC);
745 Base = Offset = N.getOperand(0);
746 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
748 SDLoc(N), MVT::i32);
749 return true;
750 }
751 }
752 }
753 }
754
755 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
756 // ISD::OR that is equivalent to an ISD::ADD.
757 !CurDAG->isBaseWithConstantOffset(N))
758 return false;
759
760 // Leave simple R +/- imm12 operands for LDRi12
761 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
762 int RHSC;
763 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
764 -0x1000+1, 0x1000, RHSC)) // 12 bits.
765 return false;
766 }
767
768 // Otherwise this is R +/- [possibly shifted] R.
770 ARM_AM::ShiftOpc ShOpcVal =
771 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
772 unsigned ShAmt = 0;
773
774 Base = N.getOperand(0);
775 Offset = N.getOperand(1);
776
777 if (ShOpcVal != ARM_AM::no_shift) {
778 // Check to see if the RHS of the shift is a constant, if not, we can't fold
779 // it.
780 if (ConstantSDNode *Sh =
781 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
782 ShAmt = Sh->getZExtValue();
783 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
784 Offset = N.getOperand(1).getOperand(0);
785 else {
786 ShAmt = 0;
787 ShOpcVal = ARM_AM::no_shift;
788 }
789 } else {
790 ShOpcVal = ARM_AM::no_shift;
791 }
792 }
793
794 // Try matching (R shl C) + (R).
795 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
796 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
797 N.getOperand(0).hasOneUse())) {
798 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
799 if (ShOpcVal != ARM_AM::no_shift) {
800 // Check to see if the RHS of the shift is a constant, if not, we can't
801 // fold it.
802 if (ConstantSDNode *Sh =
803 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
804 ShAmt = Sh->getZExtValue();
805 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
806 Offset = N.getOperand(0).getOperand(0);
807 Base = N.getOperand(1);
808 } else {
809 ShAmt = 0;
810 ShOpcVal = ARM_AM::no_shift;
811 }
812 } else {
813 ShOpcVal = ARM_AM::no_shift;
814 }
815 }
816 }
817
818 // If Offset is a multiply-by-constant and it's profitable to extract a shift
819 // and use it in a shifted operand do so.
820 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
821 unsigned PowerOfTwo = 0;
822 SDValue NewMulConst;
823 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
824 HandleSDNode Handle(Offset);
825 replaceDAGValue(Offset.getOperand(1), NewMulConst);
826 Offset = Handle.getValue();
827 ShAmt = PowerOfTwo;
828 ShOpcVal = ARM_AM::lsl;
829 }
830 }
831
832 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
833 SDLoc(N), MVT::i32);
834 return true;
835}
836
837bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
838 SDValue &Offset, SDValue &Opc) {
839 unsigned Opcode = Op->getOpcode();
840 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
841 ? cast<LoadSDNode>(Op)->getAddressingMode()
842 : cast<StoreSDNode>(Op)->getAddressingMode();
845 int Val;
846 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
847 return false;
848
849 Offset = N;
850 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
851 unsigned ShAmt = 0;
852 if (ShOpcVal != ARM_AM::no_shift) {
853 // Check to see if the RHS of the shift is a constant, if not, we can't fold
854 // it.
855 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
856 ShAmt = Sh->getZExtValue();
857 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
858 Offset = N.getOperand(0);
859 else {
860 ShAmt = 0;
861 ShOpcVal = ARM_AM::no_shift;
862 }
863 } else {
864 ShOpcVal = ARM_AM::no_shift;
865 }
866 }
867
868 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
869 SDLoc(N), MVT::i32);
870 return true;
871}
872
873bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
874 SDValue &Offset, SDValue &Opc) {
875 unsigned Opcode = Op->getOpcode();
876 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
877 ? cast<LoadSDNode>(Op)->getAddressingMode()
878 : cast<StoreSDNode>(Op)->getAddressingMode();
881 int Val;
882 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
883 if (AddSub == ARM_AM::sub) Val *= -1;
884 Offset = CurDAG->getRegister(0, MVT::i32);
885 Opc =
886 CurDAG->getSignedConstant(Val, SDLoc(Op), MVT::i32, /*isTarget*/ true);
887 return true;
888 }
889
890 return false;
891}
892
893
894bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
895 SDValue &Offset, SDValue &Opc) {
896 unsigned Opcode = Op->getOpcode();
897 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
898 ? cast<LoadSDNode>(Op)->getAddressingMode()
899 : cast<StoreSDNode>(Op)->getAddressingMode();
902 int Val;
903 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
904 Offset = CurDAG->getRegister(0, MVT::i32);
905 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
907 SDLoc(Op), MVT::i32);
908 return true;
909 }
910
911 return false;
912}
913
914bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
915 Base = N;
916 return true;
917}
918
919bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
921 SDValue &Opc) {
922 if (N.getOpcode() == ISD::SUB) {
923 // X - C is canonicalize to X + -C, no need to handle it here.
924 Base = N.getOperand(0);
925 Offset = N.getOperand(1);
926 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
927 MVT::i32);
928 return true;
929 }
930
931 if (!CurDAG->isBaseWithConstantOffset(N)) {
932 Base = N;
933 if (N.getOpcode() == ISD::FrameIndex) {
934 int FI = cast<FrameIndexSDNode>(N)->getIndex();
935 Base = CurDAG->getTargetFrameIndex(
936 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
937 }
938 Offset = CurDAG->getRegister(0, MVT::i32);
939 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
940 MVT::i32);
941 return true;
942 }
943
944 // If the RHS is +/- imm8, fold into addr mode.
945 int RHSC;
946 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
947 -256 + 1, 256, RHSC)) { // 8 bits.
948 Base = N.getOperand(0);
949 if (Base.getOpcode() == ISD::FrameIndex) {
950 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
951 Base = CurDAG->getTargetFrameIndex(
952 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
953 }
954 Offset = CurDAG->getRegister(0, MVT::i32);
955
957 if (RHSC < 0) {
959 RHSC = -RHSC;
960 }
961 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
962 MVT::i32);
963 return true;
964 }
965
966 Base = N.getOperand(0);
967 Offset = N.getOperand(1);
968 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
969 MVT::i32);
970 return true;
971}
972
973bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
974 SDValue &Offset, SDValue &Opc) {
975 unsigned Opcode = Op->getOpcode();
976 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
977 ? cast<LoadSDNode>(Op)->getAddressingMode()
978 : cast<StoreSDNode>(Op)->getAddressingMode();
981 int Val;
982 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
983 Offset = CurDAG->getRegister(0, MVT::i32);
984 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
985 MVT::i32);
986 return true;
987 }
988
989 Offset = N;
990 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
991 MVT::i32);
992 return true;
993}
994
995bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
996 bool FP16) {
997 if (!CurDAG->isBaseWithConstantOffset(N)) {
998 Base = N;
999 if (N.getOpcode() == ISD::FrameIndex) {
1000 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1001 Base = CurDAG->getTargetFrameIndex(
1002 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1003 } else if (N.getOpcode() == ARMISD::Wrapper &&
1004 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1005 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1006 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1007 Base = N.getOperand(0);
1008 }
1009 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1010 SDLoc(N), MVT::i32);
1011 return true;
1012 }
1013
1014 // If the RHS is +/- imm8, fold into addr mode.
1015 int RHSC;
1016 const int Scale = FP16 ? 2 : 4;
1017
1018 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1019 Base = N.getOperand(0);
1020 if (Base.getOpcode() == ISD::FrameIndex) {
1021 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1022 Base = CurDAG->getTargetFrameIndex(
1023 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1024 }
1025
1027 if (RHSC < 0) {
1029 RHSC = -RHSC;
1030 }
1031
1032 if (FP16)
1033 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1034 SDLoc(N), MVT::i32);
1035 else
1036 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1037 SDLoc(N), MVT::i32);
1038
1039 return true;
1040 }
1041
1042 Base = N;
1043
1044 if (FP16)
1045 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1046 SDLoc(N), MVT::i32);
1047 else
1048 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1049 SDLoc(N), MVT::i32);
1050
1051 return true;
1052}
1053
1054bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1056 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1057}
1058
1059bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1061 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1062}
1063
1064bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1065 SDValue &Align) {
1066 Addr = N;
1067
1068 unsigned Alignment = 0;
1069
1070 MemSDNode *MemN = cast<MemSDNode>(Parent);
1071
1072 if (isa<LSBaseSDNode>(MemN) ||
1073 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1074 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1075 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1076 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1077 // The maximum alignment is equal to the memory size being referenced.
1078 llvm::Align MMOAlign = MemN->getAlign();
1079 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1080 if (MMOAlign.value() >= MemSize && MemSize > 1)
1081 Alignment = MemSize;
1082 } else {
1083 // All other uses of addrmode6 are for intrinsics. For now just record
1084 // the raw alignment value; it will be refined later based on the legal
1085 // alignment operands for the intrinsic.
1086 Alignment = MemN->getAlign().value();
1087 }
1088
1089 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1090 return true;
1091}
1092
1093bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1094 SDValue &Offset) {
1095 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1097 if (AM != ISD::POST_INC)
1098 return false;
1099 Offset = N;
1100 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1101 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1102 Offset = CurDAG->getRegister(0, MVT::i32);
1103 }
1104 return true;
1105}
1106
1107bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1108 SDValue &Offset, SDValue &Label) {
1109 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1110 Offset = N.getOperand(0);
1111 SDValue N1 = N.getOperand(1);
1112 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1113 return true;
1114 }
1115
1116 return false;
1117}
1118
1119
1120//===----------------------------------------------------------------------===//
1121// Thumb Addressing Modes
1122//===----------------------------------------------------------------------===//
1123
1125 // Negative numbers are difficult to materialise in thumb1. If we are
1126 // selecting the add of a negative, instead try to select ri with a zero
1127 // offset, so create the add node directly which will become a sub.
1128 if (N.getOpcode() != ISD::ADD)
1129 return false;
1130
1131 // Look for an imm which is not legal for ld/st, but is legal for sub.
1132 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1133 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1134
1135 return false;
1136}
1137
1138bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1139 SDValue &Offset) {
1140 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1141 if (!isNullConstant(N))
1142 return false;
1143
1144 Base = Offset = N;
1145 return true;
1146 }
1147
1148 Base = N.getOperand(0);
1149 Offset = N.getOperand(1);
1150 return true;
1151}
1152
1153bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1154 SDValue &Offset) {
1156 return false; // Select ri instead
1157 return SelectThumbAddrModeRRSext(N, Base, Offset);
1158}
1159
1160bool
1161ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1162 SDValue &Base, SDValue &OffImm) {
1164 Base = N;
1165 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1166 return true;
1167 }
1168
1169 if (!CurDAG->isBaseWithConstantOffset(N)) {
1170 if (N.getOpcode() == ISD::ADD) {
1171 return false; // We want to select register offset instead
1172 } else if (N.getOpcode() == ARMISD::Wrapper &&
1173 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1174 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1175 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1176 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1177 Base = N.getOperand(0);
1178 } else {
1179 Base = N;
1180 }
1181
1182 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1183 return true;
1184 }
1185
1186 // If the RHS is + imm5 * scale, fold into addr mode.
1187 int RHSC;
1188 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1189 Base = N.getOperand(0);
1190 OffImm =
1191 CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, /*isTarget=*/true);
1192 return true;
1193 }
1194
1195 // Offset is too large, so use register offset instead.
1196 return false;
1197}
1198
1199bool
1200ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1201 SDValue &OffImm) {
1202 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1203}
1204
1205bool
1206ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1207 SDValue &OffImm) {
1208 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1209}
1210
1211bool
1212ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1213 SDValue &OffImm) {
1214 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1215}
1216
1217bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1218 SDValue &Base, SDValue &OffImm) {
1219 if (N.getOpcode() == ISD::FrameIndex) {
1220 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1221 // Only multiples of 4 are allowed for the offset, so the frame object
1222 // alignment must be at least 4.
1223 MachineFrameInfo &MFI = MF->getFrameInfo();
1224 if (MFI.getObjectAlign(FI) < Align(4))
1225 MFI.setObjectAlignment(FI, Align(4));
1226 Base = CurDAG->getTargetFrameIndex(
1227 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1228 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1229 return true;
1230 }
1231
1232 if (!CurDAG->isBaseWithConstantOffset(N))
1233 return false;
1234
1235 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1236 // If the RHS is + imm8 * scale, fold into addr mode.
1237 int RHSC;
1238 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1239 Base = N.getOperand(0);
1240 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1241 // Make sure the offset is inside the object, or we might fail to
1242 // allocate an emergency spill slot. (An out-of-range access is UB, but
1243 // it could show up anyway.)
1244 MachineFrameInfo &MFI = MF->getFrameInfo();
1245 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1246 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1247 // indexed by the LHS must be 4-byte aligned.
1248 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1249 MFI.setObjectAlignment(FI, Align(4));
1250 if (MFI.getObjectAlign(FI) >= Align(4)) {
1251 Base = CurDAG->getTargetFrameIndex(
1252 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1253 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32,
1254 /*isTarget=*/true);
1255 return true;
1256 }
1257 }
1258 }
1259 }
1260
1261 return false;
1262}
1263
1264template <unsigned Shift>
1265bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1266 SDValue &OffImm) {
1267 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1268 int RHSC;
1269 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1270 RHSC)) {
1271 Base = N.getOperand(0);
1272 if (N.getOpcode() == ISD::SUB)
1273 RHSC = -RHSC;
1274 OffImm = CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N),
1275 MVT::i32, /*isTarget=*/true);
1276 return true;
1277 }
1278 }
1279
1280 // Base only.
1281 Base = N;
1282 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1283 return true;
1284}
1285
1286
1287//===----------------------------------------------------------------------===//
1288// Thumb 2 Addressing Modes
1289//===----------------------------------------------------------------------===//
1290
1291
1292bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1293 SDValue &Base, SDValue &OffImm) {
1294 // Match simple R + imm12 operands.
1295
1296 // Base only.
1297 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1298 !CurDAG->isBaseWithConstantOffset(N)) {
1299 if (N.getOpcode() == ISD::FrameIndex) {
1300 // Match frame index.
1301 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1302 Base = CurDAG->getTargetFrameIndex(
1303 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1304 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1305 return true;
1306 }
1307
1308 if (N.getOpcode() == ARMISD::Wrapper &&
1309 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1310 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1311 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1312 Base = N.getOperand(0);
1313 if (Base.getOpcode() == ISD::TargetConstantPool)
1314 return false; // We want to select t2LDRpci instead.
1315 } else
1316 Base = N;
1317 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1318 return true;
1319 }
1320
1321 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1322 if (SelectT2AddrModeImm8(N, Base, OffImm))
1323 // Let t2LDRi8 handle (R - imm8).
1324 return false;
1325
1326 int RHSC = (int)RHS->getZExtValue();
1327 if (N.getOpcode() == ISD::SUB)
1328 RHSC = -RHSC;
1329
1330 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1331 Base = N.getOperand(0);
1332 if (Base.getOpcode() == ISD::FrameIndex) {
1333 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1334 Base = CurDAG->getTargetFrameIndex(
1335 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1336 }
1337 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32,
1338 /*isTarget=*/true);
1339 return true;
1340 }
1341 }
1342
1343 // Base only.
1344 Base = N;
1345 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1346 return true;
1347}
1348
1349template <unsigned Shift>
1350bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1351 SDValue &OffImm) {
1352 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1353 int RHSC;
1354 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1355 Base = N.getOperand(0);
1356 if (Base.getOpcode() == ISD::FrameIndex) {
1357 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1358 Base = CurDAG->getTargetFrameIndex(
1359 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1360 }
1361
1362 if (N.getOpcode() == ISD::SUB)
1363 RHSC = -RHSC;
1364 OffImm =
1365 CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32,
1366 /*isTarget=*/true);
1367 return true;
1368 }
1369 }
1370
1371 // Base only.
1372 Base = N;
1373 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1374 return true;
1375}
1376
1377bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1378 SDValue &Base, SDValue &OffImm) {
1379 // Match simple R - imm8 operands.
1380 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1381 !CurDAG->isBaseWithConstantOffset(N))
1382 return false;
1383
1384 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1385 int RHSC = (int)RHS->getSExtValue();
1386 if (N.getOpcode() == ISD::SUB)
1387 RHSC = -RHSC;
1388
1389 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1390 Base = N.getOperand(0);
1391 if (Base.getOpcode() == ISD::FrameIndex) {
1392 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1393 Base = CurDAG->getTargetFrameIndex(
1394 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1395 }
1396 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32,
1397 /*isTarget=*/true);
1398 return true;
1399 }
1400 }
1401
1402 return false;
1403}
1404
1405bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1406 SDValue &OffImm){
1407 unsigned Opcode = Op->getOpcode();
1408 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1409 ? cast<LoadSDNode>(Op)->getAddressingMode()
1410 : cast<StoreSDNode>(Op)->getAddressingMode();
1411 int RHSC;
1412 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1413 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1414 ? CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32,
1415 /*isTarget=*/true)
1416 : CurDAG->getSignedConstant(-RHSC, SDLoc(N), MVT::i32,
1417 /*isTarget=*/true);
1418 return true;
1419 }
1420
1421 return false;
1422}
1423
1424template <unsigned Shift>
1425bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1426 SDValue &OffImm) {
1427 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1428 int RHSC;
1429 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1430 RHSC)) {
1431 Base = N.getOperand(0);
1432 if (Base.getOpcode() == ISD::FrameIndex) {
1433 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1434 Base = CurDAG->getTargetFrameIndex(
1435 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1436 }
1437
1438 if (N.getOpcode() == ISD::SUB)
1439 RHSC = -RHSC;
1440 OffImm = CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N),
1441 MVT::i32, /*isTarget=*/true);
1442 return true;
1443 }
1444 }
1445
1446 // Base only.
1447 Base = N;
1448 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1449 return true;
1450}
1451
1452template <unsigned Shift>
1453bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1454 SDValue &OffImm) {
1455 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1456}
1457
1458bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1459 SDValue &OffImm,
1460 unsigned Shift) {
1461 unsigned Opcode = Op->getOpcode();
1463 switch (Opcode) {
1464 case ISD::LOAD:
1465 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1466 break;
1467 case ISD::STORE:
1468 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1469 break;
1470 case ISD::MLOAD:
1471 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1472 break;
1473 case ISD::MSTORE:
1474 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1475 break;
1476 default:
1477 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1478 }
1479
1480 int RHSC;
1481 // 7 bit constant, shifted by Shift.
1482 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1483 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1484 ? CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N),
1485 MVT::i32, /*isTarget=*/true)
1486 : CurDAG->getSignedConstant(-RHSC * (1 << Shift), SDLoc(N),
1487 MVT::i32, /*isTarget=*/true);
1488 return true;
1489 }
1490 return false;
1491}
1492
1493template <int Min, int Max>
1494bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1495 int Val;
1496 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1497 OffImm =
1498 CurDAG->getSignedConstant(Val, SDLoc(N), MVT::i32, /*isTarget=*/true);
1499 return true;
1500 }
1501 return false;
1502}
1503
1504bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1505 SDValue &Base,
1506 SDValue &OffReg, SDValue &ShImm) {
1507 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1508 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1509 return false;
1510
1511 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1512 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1513 int RHSC = (int)RHS->getZExtValue();
1514 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1515 return false;
1516 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1517 return false;
1518 }
1519
1520 // Look for (R + R) or (R + (R << [1,2,3])).
1521 unsigned ShAmt = 0;
1522 Base = N.getOperand(0);
1523 OffReg = N.getOperand(1);
1524
1525 // Swap if it is ((R << c) + R).
1527 if (ShOpcVal != ARM_AM::lsl) {
1528 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1529 if (ShOpcVal == ARM_AM::lsl)
1530 std::swap(Base, OffReg);
1531 }
1532
1533 if (ShOpcVal == ARM_AM::lsl) {
1534 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1535 // it.
1536 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1537 ShAmt = Sh->getZExtValue();
1538 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1539 OffReg = OffReg.getOperand(0);
1540 else {
1541 ShAmt = 0;
1542 }
1543 }
1544 }
1545
1546 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1547 // and use it in a shifted operand do so.
1548 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1549 unsigned PowerOfTwo = 0;
1550 SDValue NewMulConst;
1551 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1552 HandleSDNode Handle(OffReg);
1553 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1554 OffReg = Handle.getValue();
1555 ShAmt = PowerOfTwo;
1556 }
1557 }
1558
1559 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1560
1561 return true;
1562}
1563
1564bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1565 SDValue &OffImm) {
1566 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1567 // instructions.
1568 Base = N;
1569 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1570
1571 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1572 return true;
1573
1574 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1575 if (!RHS)
1576 return true;
1577
1578 uint32_t RHSC = (int)RHS->getZExtValue();
1579 if (RHSC > 1020 || RHSC % 4 != 0)
1580 return true;
1581
1582 Base = N.getOperand(0);
1583 if (Base.getOpcode() == ISD::FrameIndex) {
1584 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1585 Base = CurDAG->getTargetFrameIndex(
1586 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1587 }
1588
1589 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1590 return true;
1591}
1592
1593//===--------------------------------------------------------------------===//
1594
1595/// getAL - Returns a ARMCC::AL immediate node.
1596static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1597 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1598}
1599
1600void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1601 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1602 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1603}
1604
1605bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1606 LoadSDNode *LD = cast<LoadSDNode>(N);
1607 ISD::MemIndexedMode AM = LD->getAddressingMode();
1608 if (AM == ISD::UNINDEXED)
1609 return false;
1610
1611 EVT LoadedVT = LD->getMemoryVT();
1612 SDValue Offset, AMOpc;
1613 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1614 unsigned Opcode = 0;
1615 bool Match = false;
1616 if (LoadedVT == MVT::i32 && isPre &&
1617 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1618 Opcode = ARM::LDR_PRE_IMM;
1619 Match = true;
1620 } else if (LoadedVT == MVT::i32 && !isPre &&
1621 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1622 Opcode = ARM::LDR_POST_IMM;
1623 Match = true;
1624 } else if (LoadedVT == MVT::i32 &&
1625 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1626 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1627 Match = true;
1628
1629 } else if (LoadedVT == MVT::i16 &&
1630 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1631 Match = true;
1632 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1633 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1634 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1635 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1636 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1637 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1638 Match = true;
1639 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1640 }
1641 } else {
1642 if (isPre &&
1643 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1644 Match = true;
1645 Opcode = ARM::LDRB_PRE_IMM;
1646 } else if (!isPre &&
1647 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1648 Match = true;
1649 Opcode = ARM::LDRB_POST_IMM;
1650 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1651 Match = true;
1652 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1653 }
1654 }
1655 }
1656
1657 if (Match) {
1658 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1659 SDValue Chain = LD->getChain();
1660 SDValue Base = LD->getBasePtr();
1661 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1662 CurDAG->getRegister(0, MVT::i32), Chain };
1663 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1664 MVT::Other, Ops);
1665 transferMemOperands(N, New);
1666 ReplaceNode(N, New);
1667 return true;
1668 } else {
1669 SDValue Chain = LD->getChain();
1670 SDValue Base = LD->getBasePtr();
1671 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1672 CurDAG->getRegister(0, MVT::i32), Chain };
1673 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1674 MVT::Other, Ops);
1675 transferMemOperands(N, New);
1676 ReplaceNode(N, New);
1677 return true;
1678 }
1679 }
1680
1681 return false;
1682}
1683
1684bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1685 LoadSDNode *LD = cast<LoadSDNode>(N);
1686 EVT LoadedVT = LD->getMemoryVT();
1687 ISD::MemIndexedMode AM = LD->getAddressingMode();
1688 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1689 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1690 return false;
1691
1692 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1693 if (!COffs || COffs->getZExtValue() != 4)
1694 return false;
1695
1696 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1697 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1698 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1699 // ISel.
1700 SDValue Chain = LD->getChain();
1701 SDValue Base = LD->getBasePtr();
1702 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1703 CurDAG->getRegister(0, MVT::i32), Chain };
1704 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1705 MVT::i32, MVT::Other, Ops);
1706 transferMemOperands(N, New);
1707 ReplaceNode(N, New);
1708 return true;
1709}
1710
1711bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1712 LoadSDNode *LD = cast<LoadSDNode>(N);
1713 ISD::MemIndexedMode AM = LD->getAddressingMode();
1714 if (AM == ISD::UNINDEXED)
1715 return false;
1716
1717 EVT LoadedVT = LD->getMemoryVT();
1718 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1720 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1721 unsigned Opcode = 0;
1722 bool Match = false;
1723 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1724 switch (LoadedVT.getSimpleVT().SimpleTy) {
1725 case MVT::i32:
1726 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1727 break;
1728 case MVT::i16:
1729 if (isSExtLd)
1730 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1731 else
1732 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1733 break;
1734 case MVT::i8:
1735 case MVT::i1:
1736 if (isSExtLd)
1737 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1738 else
1739 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1740 break;
1741 default:
1742 return false;
1743 }
1744 Match = true;
1745 }
1746
1747 if (Match) {
1748 SDValue Chain = LD->getChain();
1749 SDValue Base = LD->getBasePtr();
1750 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1751 CurDAG->getRegister(0, MVT::i32), Chain };
1752 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1753 MVT::Other, Ops);
1754 transferMemOperands(N, New);
1755 ReplaceNode(N, New);
1756 return true;
1757 }
1758
1759 return false;
1760}
1761
1762bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1763 EVT LoadedVT;
1764 unsigned Opcode = 0;
1765 bool isSExtLd, isPre;
1766 Align Alignment;
1767 ARMVCC::VPTCodes Pred;
1768 SDValue PredReg;
1769 SDValue Chain, Base, Offset;
1770
1771 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1772 ISD::MemIndexedMode AM = LD->getAddressingMode();
1773 if (AM == ISD::UNINDEXED)
1774 return false;
1775 LoadedVT = LD->getMemoryVT();
1776 if (!LoadedVT.isVector())
1777 return false;
1778
1779 Chain = LD->getChain();
1780 Base = LD->getBasePtr();
1781 Offset = LD->getOffset();
1782 Alignment = LD->getAlign();
1783 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1784 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1785 Pred = ARMVCC::None;
1786 PredReg = CurDAG->getRegister(0, MVT::i32);
1787 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1788 ISD::MemIndexedMode AM = LD->getAddressingMode();
1789 if (AM == ISD::UNINDEXED)
1790 return false;
1791 LoadedVT = LD->getMemoryVT();
1792 if (!LoadedVT.isVector())
1793 return false;
1794
1795 Chain = LD->getChain();
1796 Base = LD->getBasePtr();
1797 Offset = LD->getOffset();
1798 Alignment = LD->getAlign();
1799 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1800 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1801 Pred = ARMVCC::Then;
1802 PredReg = LD->getMask();
1803 } else
1804 llvm_unreachable("Expected a Load or a Masked Load!");
1805
1806 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1807 // as opposed to a vldrw.32). This can allow extra addressing modes or
1808 // alignments for what is otherwise an equivalent instruction.
1809 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1810
1811 SDValue NewOffset;
1812 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1813 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1814 if (isSExtLd)
1815 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1816 else
1817 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1818 } else if (LoadedVT == MVT::v8i8 &&
1819 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1820 if (isSExtLd)
1821 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1822 else
1823 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1824 } else if (LoadedVT == MVT::v4i8 &&
1825 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1826 if (isSExtLd)
1827 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1828 else
1829 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1830 } else if (Alignment >= Align(4) &&
1831 (CanChangeType || LoadedVT == MVT::v4i32 ||
1832 LoadedVT == MVT::v4f32) &&
1833 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1834 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1835 else if (Alignment >= Align(2) &&
1836 (CanChangeType || LoadedVT == MVT::v8i16 ||
1837 LoadedVT == MVT::v8f16) &&
1838 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1839 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1840 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1841 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1842 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1843 else
1844 return false;
1845
1846 SDValue Ops[] = {Base,
1847 NewOffset,
1848 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1849 PredReg,
1850 CurDAG->getRegister(0, MVT::i32), // tp_reg
1851 Chain};
1852 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1853 N->getValueType(0), MVT::Other, Ops);
1854 transferMemOperands(N, New);
1855 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1856 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1857 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1858 CurDAG->RemoveDeadNode(N);
1859 return true;
1860}
1861
1862/// Form a GPRPair pseudo register from a pair of GPR regs.
1863SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1864 SDLoc dl(V0.getNode());
1865 SDValue RegClass =
1866 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1867 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1868 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1869 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871}
1872
1873/// Form a D register from a pair of S registers.
1874SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875 SDLoc dl(V0.getNode());
1876 SDValue RegClass =
1877 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1878 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1879 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1880 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882}
1883
1884/// Form a quad register from a pair of D registers.
1885SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1886 SDLoc dl(V0.getNode());
1887 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1888 MVT::i32);
1889 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1890 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1891 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1892 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1893}
1894
1895/// Form 4 consecutive D registers from a pair of Q registers.
1896SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1897 SDLoc dl(V0.getNode());
1898 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1899 MVT::i32);
1900 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1901 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1902 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1903 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1904}
1905
1906/// Form 4 consecutive S registers.
1907SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1908 SDValue V2, SDValue V3) {
1909 SDLoc dl(V0.getNode());
1910 SDValue RegClass =
1911 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1912 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1913 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1914 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1915 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1916 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1917 V2, SubReg2, V3, SubReg3 };
1918 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1919}
1920
1921/// Form 4 consecutive D registers.
1922SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1923 SDValue V2, SDValue V3) {
1924 SDLoc dl(V0.getNode());
1925 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1926 MVT::i32);
1927 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1928 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1929 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1930 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1931 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1932 V2, SubReg2, V3, SubReg3 };
1933 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1934}
1935
1936/// Form 4 consecutive Q registers.
1937SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1938 SDValue V2, SDValue V3) {
1939 SDLoc dl(V0.getNode());
1940 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1941 MVT::i32);
1942 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1943 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1944 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1945 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1946 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1947 V2, SubReg2, V3, SubReg3 };
1948 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1949}
1950
1951/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1952/// of a NEON VLD or VST instruction. The supported values depend on the
1953/// number of registers being loaded.
1954SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1955 unsigned NumVecs, bool is64BitVector) {
1956 unsigned NumRegs = NumVecs;
1957 if (!is64BitVector && NumVecs < 3)
1958 NumRegs *= 2;
1959
1960 unsigned Alignment = Align->getAsZExtVal();
1961 if (Alignment >= 32 && NumRegs == 4)
1962 Alignment = 32;
1963 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1964 Alignment = 16;
1965 else if (Alignment >= 8)
1966 Alignment = 8;
1967 else
1968 Alignment = 0;
1969
1970 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1971}
1972
1973static bool isVLDfixed(unsigned Opc)
1974{
1975 switch (Opc) {
1976 default: return false;
1977 case ARM::VLD1d8wb_fixed : return true;
1978 case ARM::VLD1d16wb_fixed : return true;
1979 case ARM::VLD1d64Qwb_fixed : return true;
1980 case ARM::VLD1d32wb_fixed : return true;
1981 case ARM::VLD1d64wb_fixed : return true;
1982 case ARM::VLD1d8TPseudoWB_fixed : return true;
1983 case ARM::VLD1d16TPseudoWB_fixed : return true;
1984 case ARM::VLD1d32TPseudoWB_fixed : return true;
1985 case ARM::VLD1d64TPseudoWB_fixed : return true;
1986 case ARM::VLD1d8QPseudoWB_fixed : return true;
1987 case ARM::VLD1d16QPseudoWB_fixed : return true;
1988 case ARM::VLD1d32QPseudoWB_fixed : return true;
1989 case ARM::VLD1d64QPseudoWB_fixed : return true;
1990 case ARM::VLD1q8wb_fixed : return true;
1991 case ARM::VLD1q16wb_fixed : return true;
1992 case ARM::VLD1q32wb_fixed : return true;
1993 case ARM::VLD1q64wb_fixed : return true;
1994 case ARM::VLD1DUPd8wb_fixed : return true;
1995 case ARM::VLD1DUPd16wb_fixed : return true;
1996 case ARM::VLD1DUPd32wb_fixed : return true;
1997 case ARM::VLD1DUPq8wb_fixed : return true;
1998 case ARM::VLD1DUPq16wb_fixed : return true;
1999 case ARM::VLD1DUPq32wb_fixed : return true;
2000 case ARM::VLD2d8wb_fixed : return true;
2001 case ARM::VLD2d16wb_fixed : return true;
2002 case ARM::VLD2d32wb_fixed : return true;
2003 case ARM::VLD2q8PseudoWB_fixed : return true;
2004 case ARM::VLD2q16PseudoWB_fixed : return true;
2005 case ARM::VLD2q32PseudoWB_fixed : return true;
2006 case ARM::VLD2DUPd8wb_fixed : return true;
2007 case ARM::VLD2DUPd16wb_fixed : return true;
2008 case ARM::VLD2DUPd32wb_fixed : return true;
2009 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
2010 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
2011 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
2012 }
2013}
2014
2015static bool isVSTfixed(unsigned Opc)
2016{
2017 switch (Opc) {
2018 default: return false;
2019 case ARM::VST1d8wb_fixed : return true;
2020 case ARM::VST1d16wb_fixed : return true;
2021 case ARM::VST1d32wb_fixed : return true;
2022 case ARM::VST1d64wb_fixed : return true;
2023 case ARM::VST1q8wb_fixed : return true;
2024 case ARM::VST1q16wb_fixed : return true;
2025 case ARM::VST1q32wb_fixed : return true;
2026 case ARM::VST1q64wb_fixed : return true;
2027 case ARM::VST1d8TPseudoWB_fixed : return true;
2028 case ARM::VST1d16TPseudoWB_fixed : return true;
2029 case ARM::VST1d32TPseudoWB_fixed : return true;
2030 case ARM::VST1d64TPseudoWB_fixed : return true;
2031 case ARM::VST1d8QPseudoWB_fixed : return true;
2032 case ARM::VST1d16QPseudoWB_fixed : return true;
2033 case ARM::VST1d32QPseudoWB_fixed : return true;
2034 case ARM::VST1d64QPseudoWB_fixed : return true;
2035 case ARM::VST2d8wb_fixed : return true;
2036 case ARM::VST2d16wb_fixed : return true;
2037 case ARM::VST2d32wb_fixed : return true;
2038 case ARM::VST2q8PseudoWB_fixed : return true;
2039 case ARM::VST2q16PseudoWB_fixed : return true;
2040 case ARM::VST2q32PseudoWB_fixed : return true;
2041 }
2042}
2043
2044// Get the register stride update opcode of a VLD/VST instruction that
2045// is otherwise equivalent to the given fixed stride updating instruction.
2046static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2047 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2048 && "Incorrect fixed stride updating instruction.");
2049 switch (Opc) {
2050 default: break;
2051 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2052 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2053 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2054 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2055 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2056 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2057 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2058 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2059 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2060 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2061 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2062 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2063 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2064 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2065 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2066 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2067 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2068 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2069 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2070 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2071 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2072 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2073 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2074 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2075 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2076 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2077 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2078
2079 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2080 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2081 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2082 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2083 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2084 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2085 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2086 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2087 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2088 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2089 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2090 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2091 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2092 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2093 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2094 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2095
2096 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2097 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2098 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2099 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2100 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2101 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2102
2103 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2104 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2105 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2106 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2107 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2108 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2109
2110 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2111 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2112 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2113 }
2114 return Opc; // If not one we handle, return it unchanged.
2115}
2116
2117/// Returns true if the given increment is a Constant known to be equal to the
2118/// access size performed by a NEON load/store. This means the "[rN]!" form can
2119/// be used.
2120static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2121 auto C = dyn_cast<ConstantSDNode>(Inc);
2122 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2123}
2124
2125void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2126 const uint16_t *DOpcodes,
2127 const uint16_t *QOpcodes0,
2128 const uint16_t *QOpcodes1) {
2129 assert(Subtarget->hasNEON());
2130 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2131 SDLoc dl(N);
2132
2133 SDValue MemAddr, Align;
2134 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2135 // nodes are not intrinsics.
2136 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2137 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2138 return;
2139
2140 SDValue Chain = N->getOperand(0);
2141 EVT VT = N->getValueType(0);
2142 bool is64BitVector = VT.is64BitVector();
2143 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2144
2145 unsigned OpcodeIndex;
2146 switch (VT.getSimpleVT().SimpleTy) {
2147 default: llvm_unreachable("unhandled vld type");
2148 // Double-register operations:
2149 case MVT::v8i8: OpcodeIndex = 0; break;
2150 case MVT::v4f16:
2151 case MVT::v4bf16:
2152 case MVT::v4i16: OpcodeIndex = 1; break;
2153 case MVT::v2f32:
2154 case MVT::v2i32: OpcodeIndex = 2; break;
2155 case MVT::v1i64: OpcodeIndex = 3; break;
2156 // Quad-register operations:
2157 case MVT::v16i8: OpcodeIndex = 0; break;
2158 case MVT::v8f16:
2159 case MVT::v8bf16:
2160 case MVT::v8i16: OpcodeIndex = 1; break;
2161 case MVT::v4f32:
2162 case MVT::v4i32: OpcodeIndex = 2; break;
2163 case MVT::v2f64:
2164 case MVT::v2i64: OpcodeIndex = 3; break;
2165 }
2166
2167 EVT ResTy;
2168 if (NumVecs == 1)
2169 ResTy = VT;
2170 else {
2171 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2172 if (!is64BitVector)
2173 ResTyElts *= 2;
2174 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2175 }
2176 std::vector<EVT> ResTys;
2177 ResTys.push_back(ResTy);
2178 if (isUpdating)
2179 ResTys.push_back(MVT::i32);
2180 ResTys.push_back(MVT::Other);
2181
2182 SDValue Pred = getAL(CurDAG, dl);
2183 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2184 SDNode *VLd;
2186
2187 // Double registers and VLD1/VLD2 quad registers are directly supported.
2188 if (is64BitVector || NumVecs <= 2) {
2189 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2190 QOpcodes0[OpcodeIndex]);
2191 Ops.push_back(MemAddr);
2192 Ops.push_back(Align);
2193 if (isUpdating) {
2194 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2195 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2196 if (!IsImmUpdate) {
2197 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2198 // check for the opcode rather than the number of vector elements.
2199 if (isVLDfixed(Opc))
2201 Ops.push_back(Inc);
2202 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2203 // the operands if not such an opcode.
2204 } else if (!isVLDfixed(Opc))
2205 Ops.push_back(Reg0);
2206 }
2207 Ops.push_back(Pred);
2208 Ops.push_back(Reg0);
2209 Ops.push_back(Chain);
2210 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2211
2212 } else {
2213 // Otherwise, quad registers are loaded with two separate instructions,
2214 // where one loads the even registers and the other loads the odd registers.
2215 EVT AddrTy = MemAddr.getValueType();
2216
2217 // Load the even subregs. This is always an updating load, so that it
2218 // provides the address to the second load for the odd subregs.
2219 SDValue ImplDef =
2220 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2221 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2222 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2223 ResTy, AddrTy, MVT::Other, OpsA);
2224 Chain = SDValue(VLdA, 2);
2225
2226 // Load the odd subregs.
2227 Ops.push_back(SDValue(VLdA, 1));
2228 Ops.push_back(Align);
2229 if (isUpdating) {
2230 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2231 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2232 "only constant post-increment update allowed for VLD3/4");
2233 (void)Inc;
2234 Ops.push_back(Reg0);
2235 }
2236 Ops.push_back(SDValue(VLdA, 0));
2237 Ops.push_back(Pred);
2238 Ops.push_back(Reg0);
2239 Ops.push_back(Chain);
2240 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2241 }
2242
2243 // Transfer memoperands.
2244 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2245 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2246
2247 if (NumVecs == 1) {
2248 ReplaceNode(N, VLd);
2249 return;
2250 }
2251
2252 // Extract out the subregisters.
2253 SDValue SuperReg = SDValue(VLd, 0);
2254 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2255 ARM::qsub_3 == ARM::qsub_0 + 3,
2256 "Unexpected subreg numbering");
2257 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2258 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2259 ReplaceUses(SDValue(N, Vec),
2260 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2261 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2262 if (isUpdating)
2263 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2264 CurDAG->RemoveDeadNode(N);
2265}
2266
2267void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2268 const uint16_t *DOpcodes,
2269 const uint16_t *QOpcodes0,
2270 const uint16_t *QOpcodes1) {
2271 assert(Subtarget->hasNEON());
2272 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2273 SDLoc dl(N);
2274
2275 SDValue MemAddr, Align;
2276 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2277 // nodes are not intrinsics.
2278 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2279 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2280 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2281 return;
2282
2283 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2284
2285 SDValue Chain = N->getOperand(0);
2286 EVT VT = N->getOperand(Vec0Idx).getValueType();
2287 bool is64BitVector = VT.is64BitVector();
2288 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2289
2290 unsigned OpcodeIndex;
2291 switch (VT.getSimpleVT().SimpleTy) {
2292 default: llvm_unreachable("unhandled vst type");
2293 // Double-register operations:
2294 case MVT::v8i8: OpcodeIndex = 0; break;
2295 case MVT::v4f16:
2296 case MVT::v4bf16:
2297 case MVT::v4i16: OpcodeIndex = 1; break;
2298 case MVT::v2f32:
2299 case MVT::v2i32: OpcodeIndex = 2; break;
2300 case MVT::v1i64: OpcodeIndex = 3; break;
2301 // Quad-register operations:
2302 case MVT::v16i8: OpcodeIndex = 0; break;
2303 case MVT::v8f16:
2304 case MVT::v8bf16:
2305 case MVT::v8i16: OpcodeIndex = 1; break;
2306 case MVT::v4f32:
2307 case MVT::v4i32: OpcodeIndex = 2; break;
2308 case MVT::v2f64:
2309 case MVT::v2i64: OpcodeIndex = 3; break;
2310 }
2311
2312 std::vector<EVT> ResTys;
2313 if (isUpdating)
2314 ResTys.push_back(MVT::i32);
2315 ResTys.push_back(MVT::Other);
2316
2317 SDValue Pred = getAL(CurDAG, dl);
2318 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2320
2321 // Double registers and VST1/VST2 quad registers are directly supported.
2322 if (is64BitVector || NumVecs <= 2) {
2323 SDValue SrcReg;
2324 if (NumVecs == 1) {
2325 SrcReg = N->getOperand(Vec0Idx);
2326 } else if (is64BitVector) {
2327 // Form a REG_SEQUENCE to force register allocation.
2328 SDValue V0 = N->getOperand(Vec0Idx + 0);
2329 SDValue V1 = N->getOperand(Vec0Idx + 1);
2330 if (NumVecs == 2)
2331 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2332 else {
2333 SDValue V2 = N->getOperand(Vec0Idx + 2);
2334 // If it's a vst3, form a quad D-register and leave the last part as
2335 // an undef.
2336 SDValue V3 = (NumVecs == 3)
2337 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2338 : N->getOperand(Vec0Idx + 3);
2339 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2340 }
2341 } else {
2342 // Form a QQ register.
2343 SDValue Q0 = N->getOperand(Vec0Idx);
2344 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2345 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2346 }
2347
2348 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2349 QOpcodes0[OpcodeIndex]);
2350 Ops.push_back(MemAddr);
2351 Ops.push_back(Align);
2352 if (isUpdating) {
2353 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2354 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2355 if (!IsImmUpdate) {
2356 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2357 // check for the opcode rather than the number of vector elements.
2358 if (isVSTfixed(Opc))
2360 Ops.push_back(Inc);
2361 }
2362 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2363 // the operands if not such an opcode.
2364 else if (!isVSTfixed(Opc))
2365 Ops.push_back(Reg0);
2366 }
2367 Ops.push_back(SrcReg);
2368 Ops.push_back(Pred);
2369 Ops.push_back(Reg0);
2370 Ops.push_back(Chain);
2371 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2372
2373 // Transfer memoperands.
2374 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2375
2376 ReplaceNode(N, VSt);
2377 return;
2378 }
2379
2380 // Otherwise, quad registers are stored with two separate instructions,
2381 // where one stores the even registers and the other stores the odd registers.
2382
2383 // Form the QQQQ REG_SEQUENCE.
2384 SDValue V0 = N->getOperand(Vec0Idx + 0);
2385 SDValue V1 = N->getOperand(Vec0Idx + 1);
2386 SDValue V2 = N->getOperand(Vec0Idx + 2);
2387 SDValue V3 = (NumVecs == 3)
2388 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2389 : N->getOperand(Vec0Idx + 3);
2390 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2391
2392 // Store the even D registers. This is always an updating store, so that it
2393 // provides the address to the second store for the odd subregs.
2394 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2395 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2396 MemAddr.getValueType(),
2397 MVT::Other, OpsA);
2398 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2399 Chain = SDValue(VStA, 1);
2400
2401 // Store the odd D registers.
2402 Ops.push_back(SDValue(VStA, 0));
2403 Ops.push_back(Align);
2404 if (isUpdating) {
2405 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2406 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2407 "only constant post-increment update allowed for VST3/4");
2408 (void)Inc;
2409 Ops.push_back(Reg0);
2410 }
2411 Ops.push_back(RegSeq);
2412 Ops.push_back(Pred);
2413 Ops.push_back(Reg0);
2414 Ops.push_back(Chain);
2415 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2416 Ops);
2417 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2418 ReplaceNode(N, VStB);
2419}
2420
2421void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2422 unsigned NumVecs,
2423 const uint16_t *DOpcodes,
2424 const uint16_t *QOpcodes) {
2425 assert(Subtarget->hasNEON());
2426 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2427 SDLoc dl(N);
2428
2429 SDValue MemAddr, Align;
2430 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2431 // nodes are not intrinsics.
2432 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2433 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2434 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2435 return;
2436
2437 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2438
2439 SDValue Chain = N->getOperand(0);
2440 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2441 EVT VT = N->getOperand(Vec0Idx).getValueType();
2442 bool is64BitVector = VT.is64BitVector();
2443
2444 unsigned Alignment = 0;
2445 if (NumVecs != 3) {
2446 Alignment = Align->getAsZExtVal();
2447 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2448 if (Alignment > NumBytes)
2449 Alignment = NumBytes;
2450 if (Alignment < 8 && Alignment < NumBytes)
2451 Alignment = 0;
2452 // Alignment must be a power of two; make sure of that.
2453 Alignment = (Alignment & -Alignment);
2454 if (Alignment == 1)
2455 Alignment = 0;
2456 }
2457 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2458
2459 unsigned OpcodeIndex;
2460 switch (VT.getSimpleVT().SimpleTy) {
2461 default: llvm_unreachable("unhandled vld/vst lane type");
2462 // Double-register operations:
2463 case MVT::v8i8: OpcodeIndex = 0; break;
2464 case MVT::v4f16:
2465 case MVT::v4bf16:
2466 case MVT::v4i16: OpcodeIndex = 1; break;
2467 case MVT::v2f32:
2468 case MVT::v2i32: OpcodeIndex = 2; break;
2469 // Quad-register operations:
2470 case MVT::v8f16:
2471 case MVT::v8bf16:
2472 case MVT::v8i16: OpcodeIndex = 0; break;
2473 case MVT::v4f32:
2474 case MVT::v4i32: OpcodeIndex = 1; break;
2475 }
2476
2477 std::vector<EVT> ResTys;
2478 if (IsLoad) {
2479 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2480 if (!is64BitVector)
2481 ResTyElts *= 2;
2482 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2483 MVT::i64, ResTyElts));
2484 }
2485 if (isUpdating)
2486 ResTys.push_back(MVT::i32);
2487 ResTys.push_back(MVT::Other);
2488
2489 SDValue Pred = getAL(CurDAG, dl);
2490 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2491
2493 Ops.push_back(MemAddr);
2494 Ops.push_back(Align);
2495 if (isUpdating) {
2496 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2497 bool IsImmUpdate =
2498 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2499 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2500 }
2501
2502 SDValue SuperReg;
2503 SDValue V0 = N->getOperand(Vec0Idx + 0);
2504 SDValue V1 = N->getOperand(Vec0Idx + 1);
2505 if (NumVecs == 2) {
2506 if (is64BitVector)
2507 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2508 else
2509 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2510 } else {
2511 SDValue V2 = N->getOperand(Vec0Idx + 2);
2512 SDValue V3 = (NumVecs == 3)
2513 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2514 : N->getOperand(Vec0Idx + 3);
2515 if (is64BitVector)
2516 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2517 else
2518 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2519 }
2520 Ops.push_back(SuperReg);
2521 Ops.push_back(getI32Imm(Lane, dl));
2522 Ops.push_back(Pred);
2523 Ops.push_back(Reg0);
2524 Ops.push_back(Chain);
2525
2526 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2527 QOpcodes[OpcodeIndex]);
2528 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2529 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2530 if (!IsLoad) {
2531 ReplaceNode(N, VLdLn);
2532 return;
2533 }
2534
2535 // Extract the subregisters.
2536 SuperReg = SDValue(VLdLn, 0);
2537 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2538 ARM::qsub_3 == ARM::qsub_0 + 3,
2539 "Unexpected subreg numbering");
2540 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2541 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2542 ReplaceUses(SDValue(N, Vec),
2543 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2544 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2545 if (isUpdating)
2546 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2547 CurDAG->RemoveDeadNode(N);
2548}
2549
2550template <typename SDValueVector>
2551void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2552 SDValue PredicateMask) {
2553 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2554 Ops.push_back(PredicateMask);
2555 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2556}
2557
2558template <typename SDValueVector>
2559void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2560 SDValue PredicateMask,
2561 SDValue Inactive) {
2562 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2563 Ops.push_back(PredicateMask);
2564 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2565 Ops.push_back(Inactive);
2566}
2567
2568template <typename SDValueVector>
2569void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2570 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2571 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2572 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2573}
2574
2575template <typename SDValueVector>
2576void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2577 EVT InactiveTy) {
2578 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2579 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2580 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2581 Ops.push_back(SDValue(
2582 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2583}
2584
2585void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2586 bool Predicated) {
2587 SDLoc Loc(N);
2589
2590 uint16_t Opcode;
2591 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2592 case 32:
2593 Opcode = Opcodes[0];
2594 break;
2595 case 64:
2596 Opcode = Opcodes[1];
2597 break;
2598 default:
2599 llvm_unreachable("bad vector element size in SelectMVE_WB");
2600 }
2601
2602 Ops.push_back(N->getOperand(2)); // vector of base addresses
2603
2604 int32_t ImmValue = N->getConstantOperandVal(3);
2605 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2606
2607 if (Predicated)
2608 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2609 else
2610 AddEmptyMVEPredicateToOps(Ops, Loc);
2611
2612 Ops.push_back(N->getOperand(0)); // chain
2613
2615 VTs.push_back(N->getValueType(1));
2616 VTs.push_back(N->getValueType(0));
2617 VTs.push_back(N->getValueType(2));
2618
2619 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2620 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2621 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2622 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2623 transferMemOperands(N, New);
2624 CurDAG->RemoveDeadNode(N);
2625}
2626
2627void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2628 bool Immediate,
2629 bool HasSaturationOperand) {
2630 SDLoc Loc(N);
2632
2633 // Two 32-bit halves of the value to be shifted
2634 Ops.push_back(N->getOperand(1));
2635 Ops.push_back(N->getOperand(2));
2636
2637 // The shift count
2638 if (Immediate) {
2639 int32_t ImmValue = N->getConstantOperandVal(3);
2640 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2641 } else {
2642 Ops.push_back(N->getOperand(3));
2643 }
2644
2645 // The immediate saturation operand, if any
2646 if (HasSaturationOperand) {
2647 int32_t SatOp = N->getConstantOperandVal(4);
2648 int SatBit = (SatOp == 64 ? 0 : 1);
2649 Ops.push_back(getI32Imm(SatBit, Loc));
2650 }
2651
2652 // MVE scalar shifts are IT-predicable, so include the standard
2653 // predicate arguments.
2654 Ops.push_back(getAL(CurDAG, Loc));
2655 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2656
2657 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2658}
2659
2660void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2661 uint16_t OpcodeWithNoCarry,
2662 bool Add, bool Predicated) {
2663 SDLoc Loc(N);
2665 uint16_t Opcode;
2666
2667 unsigned FirstInputOp = Predicated ? 2 : 1;
2668
2669 // Two input vectors and the input carry flag
2670 Ops.push_back(N->getOperand(FirstInputOp));
2671 Ops.push_back(N->getOperand(FirstInputOp + 1));
2672 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2673 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2674 uint32_t CarryMask = 1 << 29;
2675 uint32_t CarryExpected = Add ? 0 : CarryMask;
2676 if (CarryInConstant &&
2677 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2678 Opcode = OpcodeWithNoCarry;
2679 } else {
2680 Ops.push_back(CarryIn);
2681 Opcode = OpcodeWithCarry;
2682 }
2683
2684 if (Predicated)
2685 AddMVEPredicateToOps(Ops, Loc,
2686 N->getOperand(FirstInputOp + 3), // predicate
2687 N->getOperand(FirstInputOp - 1)); // inactive
2688 else
2689 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2690
2691 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2692}
2693
2694void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2695 SDLoc Loc(N);
2697
2698 // One vector input, followed by a 32-bit word of bits to shift in
2699 // and then an immediate shift count
2700 Ops.push_back(N->getOperand(1));
2701 Ops.push_back(N->getOperand(2));
2702 int32_t ImmValue = N->getConstantOperandVal(3);
2703 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2704
2705 if (Predicated)
2706 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2707 else
2708 AddEmptyMVEPredicateToOps(Ops, Loc);
2709
2710 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2711}
2712
2713static bool SDValueToConstBool(SDValue SDVal) {
2714 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2715 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2716 uint64_t Value = SDValConstant->getZExtValue();
2717 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2718 return Value;
2719}
2720
2721void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2722 const uint16_t *OpcodesS,
2723 const uint16_t *OpcodesU,
2724 size_t Stride, size_t TySize) {
2725 assert(TySize < Stride && "Invalid TySize");
2726 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2727 bool IsSub = SDValueToConstBool(N->getOperand(2));
2728 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2729 if (IsUnsigned) {
2730 assert(!IsSub &&
2731 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2732 assert(!IsExchange &&
2733 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2734 }
2735
2736 auto OpIsZero = [N](size_t OpNo) {
2737 return isNullConstant(N->getOperand(OpNo));
2738 };
2739
2740 // If the input accumulator value is not zero, select an instruction with
2741 // accumulator, otherwise select an instruction without accumulator
2742 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2743
2744 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2745 if (IsSub)
2746 Opcodes += 4 * Stride;
2747 if (IsExchange)
2748 Opcodes += 2 * Stride;
2749 if (IsAccum)
2750 Opcodes += Stride;
2751 uint16_t Opcode = Opcodes[TySize];
2752
2753 SDLoc Loc(N);
2755 // Push the accumulator operands, if they are used
2756 if (IsAccum) {
2757 Ops.push_back(N->getOperand(4));
2758 Ops.push_back(N->getOperand(5));
2759 }
2760 // Push the two vector operands
2761 Ops.push_back(N->getOperand(6));
2762 Ops.push_back(N->getOperand(7));
2763
2764 if (Predicated)
2765 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2766 else
2767 AddEmptyMVEPredicateToOps(Ops, Loc);
2768
2769 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2770}
2771
2772void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2773 const uint16_t *OpcodesS,
2774 const uint16_t *OpcodesU) {
2775 EVT VecTy = N->getOperand(6).getValueType();
2776 size_t SizeIndex;
2777 switch (VecTy.getVectorElementType().getSizeInBits()) {
2778 case 16:
2779 SizeIndex = 0;
2780 break;
2781 case 32:
2782 SizeIndex = 1;
2783 break;
2784 default:
2785 llvm_unreachable("bad vector element size");
2786 }
2787
2788 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2789}
2790
2791void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2792 const uint16_t *OpcodesS,
2793 const uint16_t *OpcodesU) {
2794 assert(
2795 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2796 32 &&
2797 "bad vector element size");
2798 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2799}
2800
2801void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2802 const uint16_t *const *Opcodes,
2803 bool HasWriteback) {
2804 EVT VT = N->getValueType(0);
2805 SDLoc Loc(N);
2806
2807 const uint16_t *OurOpcodes;
2808 switch (VT.getVectorElementType().getSizeInBits()) {
2809 case 8:
2810 OurOpcodes = Opcodes[0];
2811 break;
2812 case 16:
2813 OurOpcodes = Opcodes[1];
2814 break;
2815 case 32:
2816 OurOpcodes = Opcodes[2];
2817 break;
2818 default:
2819 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2820 }
2821
2822 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2823 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2824 unsigned PtrOperand = HasWriteback ? 1 : 2;
2825
2826 auto Data = SDValue(
2827 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2828 SDValue Chain = N->getOperand(0);
2829 // Add a MVE_VLDn instruction for each Vec, except the last
2830 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2831 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2832 auto LoadInst =
2833 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2834 Data = SDValue(LoadInst, 0);
2835 Chain = SDValue(LoadInst, 1);
2836 transferMemOperands(N, LoadInst);
2837 }
2838 // The last may need a writeback on it
2839 if (HasWriteback)
2840 ResultTys = {DataTy, MVT::i32, MVT::Other};
2841 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2842 auto LoadInst =
2843 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2844 transferMemOperands(N, LoadInst);
2845
2846 unsigned i;
2847 for (i = 0; i < NumVecs; i++)
2848 ReplaceUses(SDValue(N, i),
2849 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2850 SDValue(LoadInst, 0)));
2851 if (HasWriteback)
2852 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2853 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2854 CurDAG->RemoveDeadNode(N);
2855}
2856
2857void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2858 bool Wrapping, bool Predicated) {
2859 EVT VT = N->getValueType(0);
2860 SDLoc Loc(N);
2861
2862 uint16_t Opcode;
2863 switch (VT.getScalarSizeInBits()) {
2864 case 8:
2865 Opcode = Opcodes[0];
2866 break;
2867 case 16:
2868 Opcode = Opcodes[1];
2869 break;
2870 case 32:
2871 Opcode = Opcodes[2];
2872 break;
2873 default:
2874 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2875 }
2876
2878 unsigned OpIdx = 1;
2879
2880 SDValue Inactive;
2881 if (Predicated)
2882 Inactive = N->getOperand(OpIdx++);
2883
2884 Ops.push_back(N->getOperand(OpIdx++)); // base
2885 if (Wrapping)
2886 Ops.push_back(N->getOperand(OpIdx++)); // limit
2887
2888 SDValue ImmOp = N->getOperand(OpIdx++); // step
2889 int ImmValue = ImmOp->getAsZExtVal();
2890 Ops.push_back(getI32Imm(ImmValue, Loc));
2891
2892 if (Predicated)
2893 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2894 else
2895 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2896
2897 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2898}
2899
2900void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2901 size_t NumExtraOps, bool HasAccum) {
2902 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2903 SDLoc Loc(N);
2905
2906 unsigned OpIdx = 1;
2907
2908 // Convert and append the immediate operand designating the coprocessor.
2909 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2910 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2911 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2912
2913 // For accumulating variants copy the low and high order parts of the
2914 // accumulator into a register pair and add it to the operand vector.
2915 if (HasAccum) {
2916 SDValue AccLo = N->getOperand(OpIdx++);
2917 SDValue AccHi = N->getOperand(OpIdx++);
2918 if (IsBigEndian)
2919 std::swap(AccLo, AccHi);
2920 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2921 }
2922
2923 // Copy extra operands as-is.
2924 for (size_t I = 0; I < NumExtraOps; I++)
2925 Ops.push_back(N->getOperand(OpIdx++));
2926
2927 // Convert and append the immediate operand
2928 SDValue Imm = N->getOperand(OpIdx);
2929 uint32_t ImmVal = Imm->getAsZExtVal();
2930 Ops.push_back(getI32Imm(ImmVal, Loc));
2931
2932 // Accumulating variants are IT-predicable, add predicate operands.
2933 if (HasAccum) {
2934 SDValue Pred = getAL(CurDAG, Loc);
2935 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2936 Ops.push_back(Pred);
2937 Ops.push_back(PredReg);
2938 }
2939
2940 // Create the CDE intruction
2941 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2942 SDValue ResultPair = SDValue(InstrNode, 0);
2943
2944 // The original intrinsic had two outputs, and the output of the dual-register
2945 // CDE instruction is a register pair. We need to extract the two subregisters
2946 // and replace all uses of the original outputs with the extracted
2947 // subregisters.
2948 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2949 if (IsBigEndian)
2950 std::swap(SubRegs[0], SubRegs[1]);
2951
2952 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2953 if (SDValue(N, ResIdx).use_empty())
2954 continue;
2955 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2956 MVT::i32, ResultPair);
2957 ReplaceUses(SDValue(N, ResIdx), SubReg);
2958 }
2959
2960 CurDAG->RemoveDeadNode(N);
2961}
2962
2963void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2964 bool isUpdating, unsigned NumVecs,
2965 const uint16_t *DOpcodes,
2966 const uint16_t *QOpcodes0,
2967 const uint16_t *QOpcodes1) {
2968 assert(Subtarget->hasNEON());
2969 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2970 SDLoc dl(N);
2971
2972 SDValue MemAddr, Align;
2973 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2974 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2975 return;
2976
2977 SDValue Chain = N->getOperand(0);
2978 EVT VT = N->getValueType(0);
2979 bool is64BitVector = VT.is64BitVector();
2980
2981 unsigned Alignment = 0;
2982 if (NumVecs != 3) {
2983 Alignment = Align->getAsZExtVal();
2984 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2985 if (Alignment > NumBytes)
2986 Alignment = NumBytes;
2987 if (Alignment < 8 && Alignment < NumBytes)
2988 Alignment = 0;
2989 // Alignment must be a power of two; make sure of that.
2990 Alignment = (Alignment & -Alignment);
2991 if (Alignment == 1)
2992 Alignment = 0;
2993 }
2994 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2995
2996 unsigned OpcodeIndex;
2997 switch (VT.getSimpleVT().SimpleTy) {
2998 default: llvm_unreachable("unhandled vld-dup type");
2999 case MVT::v8i8:
3000 case MVT::v16i8: OpcodeIndex = 0; break;
3001 case MVT::v4i16:
3002 case MVT::v8i16:
3003 case MVT::v4f16:
3004 case MVT::v8f16:
3005 case MVT::v4bf16:
3006 case MVT::v8bf16:
3007 OpcodeIndex = 1; break;
3008 case MVT::v2f32:
3009 case MVT::v2i32:
3010 case MVT::v4f32:
3011 case MVT::v4i32: OpcodeIndex = 2; break;
3012 case MVT::v1f64:
3013 case MVT::v1i64: OpcodeIndex = 3; break;
3014 }
3015
3016 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3017 if (!is64BitVector)
3018 ResTyElts *= 2;
3019 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3020
3021 std::vector<EVT> ResTys;
3022 ResTys.push_back(ResTy);
3023 if (isUpdating)
3024 ResTys.push_back(MVT::i32);
3025 ResTys.push_back(MVT::Other);
3026
3027 SDValue Pred = getAL(CurDAG, dl);
3028 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3029
3031 Ops.push_back(MemAddr);
3032 Ops.push_back(Align);
3033 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3034 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3035 : QOpcodes1[OpcodeIndex];
3036 if (isUpdating) {
3037 SDValue Inc = N->getOperand(2);
3038 bool IsImmUpdate =
3039 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3040 if (IsImmUpdate) {
3041 if (!isVLDfixed(Opc))
3042 Ops.push_back(Reg0);
3043 } else {
3044 if (isVLDfixed(Opc))
3046 Ops.push_back(Inc);
3047 }
3048 }
3049 if (is64BitVector || NumVecs == 1) {
3050 // Double registers and VLD1 quad registers are directly supported.
3051 } else {
3052 SDValue ImplDef = SDValue(
3053 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3054 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3055 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3056 MVT::Other, OpsA);
3057 Ops.push_back(SDValue(VLdA, 0));
3058 Chain = SDValue(VLdA, 1);
3059 }
3060
3061 Ops.push_back(Pred);
3062 Ops.push_back(Reg0);
3063 Ops.push_back(Chain);
3064
3065 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3066
3067 // Transfer memoperands.
3068 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3069 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3070
3071 // Extract the subregisters.
3072 if (NumVecs == 1) {
3073 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3074 } else {
3075 SDValue SuperReg = SDValue(VLdDup, 0);
3076 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3077 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3078 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3079 ReplaceUses(SDValue(N, Vec),
3080 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3081 }
3082 }
3083 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3084 if (isUpdating)
3085 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3086 CurDAG->RemoveDeadNode(N);
3087}
3088
3089bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3090 if (!Subtarget->hasMVEIntegerOps())
3091 return false;
3092
3093 SDLoc dl(N);
3094
3095 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3096 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3097 // inserts of the correct type:
3098 SDValue Ins1 = SDValue(N, 0);
3099 SDValue Ins2 = N->getOperand(0);
3100 EVT VT = Ins1.getValueType();
3101 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3102 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3103 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3104 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3105 return false;
3106
3107 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3108 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3109 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3110 return false;
3111
3112 // If the inserted values will be able to use T/B already, leave it to the
3113 // existing tablegen patterns. For example VCVTT/VCVTB.
3114 SDValue Val1 = Ins1.getOperand(1);
3115 SDValue Val2 = Ins2.getOperand(1);
3116 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3117 return false;
3118
3119 // Check if the inserted values are both extracts.
3120 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3121 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3123 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3124 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3125 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3126 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3127 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3128 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3129 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3130 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3131 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3132
3133 // If the two extracted lanes are from the same place and adjacent, this
3134 // simplifies into a f32 lane move.
3135 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3136 ExtractLane1 == ExtractLane2 + 1) {
3137 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3138 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3139 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3140 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3141 NewExt);
3142 ReplaceUses(Ins1, NewIns);
3143 return true;
3144 }
3145
3146 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3147 // extracting odd lanes.
3148 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3149 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3150 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3151 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3152 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3153 if (ExtractLane1 % 2 != 0)
3154 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3155 if (ExtractLane2 % 2 != 0)
3156 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3157 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3158 SDValue NewIns =
3159 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3160 Ins2.getOperand(0), SDValue(VINS, 0));
3161 ReplaceUses(Ins1, NewIns);
3162 return true;
3163 }
3164 }
3165
3166 // The inserted values are not extracted - if they are f16 then insert them
3167 // directly using a VINS.
3168 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3169 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3170 SDValue NewIns =
3171 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3172 Ins2.getOperand(0), SDValue(VINS, 0));
3173 ReplaceUses(Ins1, NewIns);
3174 return true;
3175 }
3176
3177 return false;
3178}
3179
3180bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3181 SDNode *FMul,
3182 bool IsUnsigned,
3183 bool FixedToFloat) {
3184 auto Type = N->getValueType(0);
3185 unsigned ScalarBits = Type.getScalarSizeInBits();
3186 if (ScalarBits > 32)
3187 return false;
3188
3189 SDNodeFlags FMulFlags = FMul->getFlags();
3190 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3191 // allowed in 16 bit unsigned floats
3192 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3193 return false;
3194
3195 SDValue ImmNode = FMul->getOperand(1);
3196 SDValue VecVal = FMul->getOperand(0);
3197 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3198 VecVal->getOpcode() == ISD::SINT_TO_FP)
3199 VecVal = VecVal->getOperand(0);
3200
3201 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3202 return false;
3203
3204 if (ImmNode.getOpcode() == ISD::BITCAST) {
3205 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3206 return false;
3207 ImmNode = ImmNode.getOperand(0);
3208 }
3209
3210 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3211 return false;
3212
3213 APFloat ImmAPF(0.0f);
3214 switch (ImmNode.getOpcode()) {
3215 case ARMISD::VMOVIMM:
3216 case ARMISD::VDUP: {
3217 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3218 return false;
3219 unsigned Imm = ImmNode.getConstantOperandVal(0);
3220 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3221 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3222 ImmAPF =
3223 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3224 APInt(ScalarBits, Imm));
3225 break;
3226 }
3227 case ARMISD::VMOVFPIMM: {
3229 break;
3230 }
3231 default:
3232 return false;
3233 }
3234
3235 // Where n is the number of fractional bits, multiplying by 2^n will convert
3236 // from float to fixed and multiplying by 2^-n will convert from fixed to
3237 // float. Taking log2 of the factor (after taking the inverse in the case of
3238 // float to fixed) will give n.
3239 APFloat ToConvert = ImmAPF;
3240 if (FixedToFloat) {
3241 if (!ImmAPF.getExactInverse(&ToConvert))
3242 return false;
3243 }
3244 APSInt Converted(64, false);
3245 bool IsExact;
3247 &IsExact);
3248 if (!IsExact || !Converted.isPowerOf2())
3249 return false;
3250
3251 unsigned FracBits = Converted.logBase2();
3252 if (FracBits > ScalarBits)
3253 return false;
3254
3256 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3257 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3258
3259 unsigned int Opcode;
3260 switch (ScalarBits) {
3261 case 16:
3262 if (FixedToFloat)
3263 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3264 else
3265 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3266 break;
3267 case 32:
3268 if (FixedToFloat)
3269 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3270 else
3271 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3272 break;
3273 default:
3274 llvm_unreachable("unexpected number of scalar bits");
3275 break;
3276 }
3277
3278 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3279 return true;
3280}
3281
3282bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3283 // Transform a floating-point to fixed-point conversion to a VCVT
3284 if (!Subtarget->hasMVEFloatOps())
3285 return false;
3286 EVT Type = N->getValueType(0);
3287 if (!Type.isVector())
3288 return false;
3289 unsigned int ScalarBits = Type.getScalarSizeInBits();
3290
3291 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3292 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3293 SDNode *Node = N->getOperand(0).getNode();
3294
3295 // floating-point to fixed-point with one fractional bit gets turned into an
3296 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3297 if (Node->getOpcode() == ISD::FADD) {
3298 if (Node->getOperand(0) != Node->getOperand(1))
3299 return false;
3300 SDNodeFlags Flags = Node->getFlags();
3301 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3302 // allowed in 16 bit unsigned floats
3303 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3304 return false;
3305
3306 unsigned Opcode;
3307 switch (ScalarBits) {
3308 case 16:
3309 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3310 break;
3311 case 32:
3312 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3313 break;
3314 }
3315 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3316 CurDAG->getConstant(1, dl, MVT::i32)};
3317 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3318
3319 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3320 return true;
3321 }
3322
3323 if (Node->getOpcode() != ISD::FMUL)
3324 return false;
3325
3326 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3327}
3328
3329bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3330 // Transform a fixed-point to floating-point conversion to a VCVT
3331 if (!Subtarget->hasMVEFloatOps())
3332 return false;
3333 auto Type = N->getValueType(0);
3334 if (!Type.isVector())
3335 return false;
3336
3337 auto LHS = N->getOperand(0);
3338 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3339 return false;
3340
3341 return transformFixedFloatingPointConversion(
3342 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3343}
3344
3345bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3346 if (!Subtarget->hasV6T2Ops())
3347 return false;
3348
3349 unsigned Opc = isSigned
3350 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3351 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3352 SDLoc dl(N);
3353
3354 // For unsigned extracts, check for a shift right and mask
3355 unsigned And_imm = 0;
3356 if (N->getOpcode() == ISD::AND) {
3357 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3358
3359 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3360 if (And_imm & (And_imm + 1))
3361 return false;
3362
3363 unsigned Srl_imm = 0;
3364 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3365 Srl_imm)) {
3366 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3367
3368 // Mask off the unnecessary bits of the AND immediate; normally
3369 // DAGCombine will do this, but that might not happen if
3370 // targetShrinkDemandedConstant chooses a different immediate.
3371 And_imm &= -1U >> Srl_imm;
3372
3373 // Note: The width operand is encoded as width-1.
3374 unsigned Width = llvm::countr_one(And_imm) - 1;
3375 unsigned LSB = Srl_imm;
3376
3377 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3378
3379 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3380 // It's cheaper to use a right shift to extract the top bits.
3381 if (Subtarget->isThumb()) {
3382 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3383 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3384 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3385 getAL(CurDAG, dl), Reg0, Reg0 };
3386 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3387 return true;
3388 }
3389
3390 // ARM models shift instructions as MOVsi with shifter operand.
3392 SDValue ShOpc =
3393 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3394 MVT::i32);
3395 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3396 getAL(CurDAG, dl), Reg0, Reg0 };
3397 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3398 return true;
3399 }
3400
3401 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3402 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3403 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3404 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3405 getAL(CurDAG, dl), Reg0 };
3406 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3407 return true;
3408 }
3409 }
3410 return false;
3411 }
3412
3413 // Otherwise, we're looking for a shift of a shift
3414 unsigned Shl_imm = 0;
3415 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3416 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3417 unsigned Srl_imm = 0;
3418 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3419 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3420 // Note: The width operand is encoded as width-1.
3421 unsigned Width = 32 - Srl_imm - 1;
3422 int LSB = Srl_imm - Shl_imm;
3423 if (LSB < 0)
3424 return false;
3425 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3426 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3427 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3428 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3429 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3430 getAL(CurDAG, dl), Reg0 };
3431 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3432 return true;
3433 }
3434 }
3435
3436 // Or we are looking for a shift of an and, with a mask operand
3437 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3438 isShiftedMask_32(And_imm)) {
3439 unsigned Srl_imm = 0;
3440 unsigned LSB = llvm::countr_zero(And_imm);
3441 // Shift must be the same as the ands lsb
3442 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3443 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3444 unsigned MSB = llvm::Log2_32(And_imm);
3445 // Note: The width operand is encoded as width-1.
3446 unsigned Width = MSB - LSB;
3447 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3448 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3449 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3450 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3451 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3452 getAL(CurDAG, dl), Reg0 };
3453 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3454 return true;
3455 }
3456 }
3457
3458 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3459 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3460 unsigned LSB = 0;
3461 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3462 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3463 return false;
3464
3465 if (LSB + Width > 32)
3466 return false;
3467
3468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3469 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3470 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3471 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3472 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3473 getAL(CurDAG, dl), Reg0 };
3474 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3475 return true;
3476 }
3477
3478 return false;
3479}
3480
3481/// Target-specific DAG combining for ISD::SUB.
3482/// Target-independent combining lowers SELECT_CC nodes of the form
3483/// select_cc setg[ge] X, 0, X, -X
3484/// select_cc setgt X, -1, X, -X
3485/// select_cc setl[te] X, 0, -X, X
3486/// select_cc setlt X, 1, -X, X
3487/// which represent Integer ABS into:
3488/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3489/// ARM instruction selection detects the latter and matches it to
3490/// ARM::ABS or ARM::t2ABS machine node.
3491bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3492 SDValue SUBSrc0 = N->getOperand(0);
3493 SDValue SUBSrc1 = N->getOperand(1);
3494 EVT VT = N->getValueType(0);
3495
3496 if (Subtarget->isThumb1Only())
3497 return false;
3498
3499 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3500 return false;
3501
3502 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3503 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3504 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3505 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3506 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3507 EVT XType = SRASrc0.getValueType();
3508 unsigned Size = XType.getSizeInBits() - 1;
3509
3510 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3511 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3512 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3513 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3514 return true;
3515 }
3516
3517 return false;
3518}
3519
3520/// We've got special pseudo-instructions for these
3521void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3522 unsigned Opcode;
3523 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3524 if (MemTy == MVT::i8)
3525 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3526 else if (MemTy == MVT::i16)
3527 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3528 else if (MemTy == MVT::i32)
3529 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3530 else
3531 llvm_unreachable("Unknown AtomicCmpSwap type");
3532
3533 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3534 N->getOperand(0)};
3535 SDNode *CmpSwap = CurDAG->getMachineNode(
3536 Opcode, SDLoc(N),
3537 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3538
3539 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3540 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3541
3542 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3543 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3544 CurDAG->RemoveDeadNode(N);
3545}
3546
3547static std::optional<std::pair<unsigned, unsigned>>
3549 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3550 unsigned LastOne = A.countr_zero();
3551 if (A.popcount() != (FirstOne - LastOne + 1))
3552 return std::nullopt;
3553 return std::make_pair(FirstOne, LastOne);
3554}
3555
3556void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3557 assert(N->getOpcode() == ARMISD::CMPZ);
3558 SwitchEQNEToPLMI = false;
3559
3560 if (!Subtarget->isThumb())
3561 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3562 // LSR don't exist as standalone instructions - they need the barrel shifter.
3563 return;
3564
3565 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3566 SDValue And = N->getOperand(0);
3567 if (!And->hasOneUse())
3568 return;
3569
3570 SDValue Zero = N->getOperand(1);
3571 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3572 return;
3573 SDValue X = And.getOperand(0);
3574 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3575
3576 if (!C)
3577 return;
3578 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3579 if (!Range)
3580 return;
3581
3582 // There are several ways to lower this:
3583 SDNode *NewN;
3584 SDLoc dl(N);
3585
3586 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3587 if (Subtarget->isThumb2()) {
3588 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3589 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3590 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3591 CurDAG->getRegister(0, MVT::i32) };
3592 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3593 } else {
3594 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3595 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3596 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3597 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3598 }
3599 };
3600
3601 if (Range->second == 0) {
3602 // 1. Mask includes the LSB -> Simply shift the top N bits off
3603 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3604 ReplaceNode(And.getNode(), NewN);
3605 } else if (Range->first == 31) {
3606 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3607 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3608 ReplaceNode(And.getNode(), NewN);
3609 } else if (Range->first == Range->second) {
3610 // 3. Only one bit is set. We can shift this into the sign bit and use a
3611 // PL/MI comparison.
3612 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3613 ReplaceNode(And.getNode(), NewN);
3614
3615 SwitchEQNEToPLMI = true;
3616 } else if (!Subtarget->hasV6T2Ops()) {
3617 // 4. Do a double shift to clear bottom and top bits, but only in
3618 // thumb-1 mode as in thumb-2 we can use UBFX.
3619 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3620 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3621 Range->second + (31 - Range->first));
3622 ReplaceNode(And.getNode(), NewN);
3623 }
3624}
3625
3626static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3627 unsigned Opc128[3]) {
3628 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3629 "Unexpected vector shuffle length");
3630 switch (VT.getScalarSizeInBits()) {
3631 default:
3632 llvm_unreachable("Unexpected vector shuffle element size");
3633 case 8:
3634 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3635 case 16:
3636 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3637 case 32:
3638 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3639 }
3640}
3641
3642void ARMDAGToDAGISel::Select(SDNode *N) {
3643 SDLoc dl(N);
3644
3645 if (N->isMachineOpcode()) {
3646 N->setNodeId(-1);
3647 return; // Already selected.
3648 }
3649
3650 switch (N->getOpcode()) {
3651 default: break;
3652 case ISD::STORE: {
3653 // For Thumb1, match an sp-relative store in C++. This is a little
3654 // unfortunate, but I don't think I can make the chain check work
3655 // otherwise. (The chain of the store has to be the same as the chain
3656 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3657 // a direct reference to "SP".)
3658 //
3659 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3660 // a different addressing mode from other four-byte stores.
3661 //
3662 // This pattern usually comes up with call arguments.
3663 StoreSDNode *ST = cast<StoreSDNode>(N);
3664 SDValue Ptr = ST->getBasePtr();
3665 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3666 int RHSC = 0;
3667 if (Ptr.getOpcode() == ISD::ADD &&
3668 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3669 Ptr = Ptr.getOperand(0);
3670
3671 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3672 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3673 Ptr.getOperand(0) == ST->getChain()) {
3674 SDValue Ops[] = {ST->getValue(),
3675 CurDAG->getRegister(ARM::SP, MVT::i32),
3676 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3677 getAL(CurDAG, dl),
3678 CurDAG->getRegister(0, MVT::i32),
3679 ST->getChain()};
3680 MachineSDNode *ResNode =
3681 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3682 MachineMemOperand *MemOp = ST->getMemOperand();
3683 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3684 ReplaceNode(N, ResNode);
3685 return;
3686 }
3687 }
3688 break;
3689 }
3691 if (tryWriteRegister(N))
3692 return;
3693 break;
3694 case ISD::READ_REGISTER:
3695 if (tryReadRegister(N))
3696 return;
3697 break;
3698 case ISD::INLINEASM:
3699 case ISD::INLINEASM_BR:
3700 if (tryInlineAsm(N))
3701 return;
3702 break;
3703 case ISD::SUB:
3704 // Select special operations if SUB node forms integer ABS pattern
3705 if (tryABSOp(N))
3706 return;
3707 // Other cases are autogenerated.
3708 break;
3709 case ISD::Constant: {
3710 unsigned Val = N->getAsZExtVal();
3711 // If we can't materialize the constant we need to use a literal pool
3712 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3713 !Subtarget->genExecuteOnly()) {
3714 SDValue CPIdx = CurDAG->getTargetConstantPool(
3715 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3716 TLI->getPointerTy(CurDAG->getDataLayout()));
3717
3718 SDNode *ResNode;
3719 if (Subtarget->isThumb()) {
3720 SDValue Ops[] = {
3721 CPIdx,
3722 getAL(CurDAG, dl),
3723 CurDAG->getRegister(0, MVT::i32),
3724 CurDAG->getEntryNode()
3725 };
3726 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3727 Ops);
3728 } else {
3729 SDValue Ops[] = {
3730 CPIdx,
3731 CurDAG->getTargetConstant(0, dl, MVT::i32),
3732 getAL(CurDAG, dl),
3733 CurDAG->getRegister(0, MVT::i32),
3734 CurDAG->getEntryNode()
3735 };
3736 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3737 Ops);
3738 }
3739 // Annotate the Node with memory operand information so that MachineInstr
3740 // queries work properly. This e.g. gives the register allocation the
3741 // required information for rematerialization.
3742 MachineFunction& MF = CurDAG->getMachineFunction();
3746
3747 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3748
3749 ReplaceNode(N, ResNode);
3750 return;
3751 }
3752
3753 // Other cases are autogenerated.
3754 break;
3755 }
3756 case ISD::FrameIndex: {
3757 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3758 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3759 SDValue TFI = CurDAG->getTargetFrameIndex(
3760 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3761 if (Subtarget->isThumb1Only()) {
3762 // Set the alignment of the frame object to 4, to avoid having to generate
3763 // more than one ADD
3764 MachineFrameInfo &MFI = MF->getFrameInfo();
3765 if (MFI.getObjectAlign(FI) < Align(4))
3766 MFI.setObjectAlignment(FI, Align(4));
3767 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3768 CurDAG->getTargetConstant(0, dl, MVT::i32));
3769 return;
3770 } else {
3771 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3772 ARM::t2ADDri : ARM::ADDri);
3773 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3774 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3775 CurDAG->getRegister(0, MVT::i32) };
3776 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3777 return;
3778 }
3779 }
3781 if (tryInsertVectorElt(N))
3782 return;
3783 break;
3784 }
3785 case ISD::SRL:
3786 if (tryV6T2BitfieldExtractOp(N, false))
3787 return;
3788 break;
3790 case ISD::SRA:
3791 if (tryV6T2BitfieldExtractOp(N, true))
3792 return;
3793 break;
3794 case ISD::FP_TO_UINT:
3795 case ISD::FP_TO_SINT:
3798 if (tryFP_TO_INT(N, dl))
3799 return;
3800 break;
3801 case ISD::FMUL:
3802 if (tryFMULFixed(N, dl))
3803 return;
3804 break;
3805 case ISD::MUL:
3806 if (Subtarget->isThumb1Only())
3807 break;
3808 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3809 unsigned RHSV = C->getZExtValue();
3810 if (!RHSV) break;
3811 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3812 unsigned ShImm = Log2_32(RHSV-1);
3813 if (ShImm >= 32)
3814 break;
3815 SDValue V = N->getOperand(0);
3816 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3817 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3818 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3819 if (Subtarget->isThumb()) {
3820 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3821 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3822 return;
3823 } else {
3824 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3825 Reg0 };
3826 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3827 return;
3828 }
3829 }
3830 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3831 unsigned ShImm = Log2_32(RHSV+1);
3832 if (ShImm >= 32)
3833 break;
3834 SDValue V = N->getOperand(0);
3835 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3836 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3837 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3838 if (Subtarget->isThumb()) {
3839 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3840 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3841 return;
3842 } else {
3843 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3844 Reg0 };
3845 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3846 return;
3847 }
3848 }
3849 }
3850 break;
3851 case ISD::AND: {
3852 // Check for unsigned bitfield extract
3853 if (tryV6T2BitfieldExtractOp(N, false))
3854 return;
3855
3856 // If an immediate is used in an AND node, it is possible that the immediate
3857 // can be more optimally materialized when negated. If this is the case we
3858 // can negate the immediate and use a BIC instead.
3859 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3860 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3861 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3862
3863 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3864 // immediate can be negated and fit in the immediate operand of
3865 // a t2BIC, don't do any manual transform here as this can be
3866 // handled by the generic ISel machinery.
3867 bool PreferImmediateEncoding =
3868 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3869 if (!PreferImmediateEncoding &&
3870 ConstantMaterializationCost(Imm, Subtarget) >
3871 ConstantMaterializationCost(~Imm, Subtarget)) {
3872 // The current immediate costs more to materialize than a negated
3873 // immediate, so negate the immediate and use a BIC.
3874 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3875 // If the new constant didn't exist before, reposition it in the topological
3876 // ordering so it is just before N. Otherwise, don't touch its location.
3877 if (NewImm->getNodeId() == -1)
3878 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3879
3880 if (!Subtarget->hasThumb2()) {
3881 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3882 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3883 CurDAG->getRegister(0, MVT::i32)};
3884 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3885 return;
3886 } else {
3887 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3888 CurDAG->getRegister(0, MVT::i32),
3889 CurDAG->getRegister(0, MVT::i32)};
3890 ReplaceNode(N,
3891 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3892 return;
3893 }
3894 }
3895 }
3896
3897 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3898 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3899 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3900 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3901 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3902 EVT VT = N->getValueType(0);
3903 if (VT != MVT::i32)
3904 break;
3905 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3906 ? ARM::t2MOVTi16
3907 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3908 if (!Opc)
3909 break;
3910 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3911 N1C = dyn_cast<ConstantSDNode>(N1);
3912 if (!N1C)
3913 break;
3914 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3915 SDValue N2 = N0.getOperand(1);
3916 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3917 if (!N2C)
3918 break;
3919 unsigned N1CVal = N1C->getZExtValue();
3920 unsigned N2CVal = N2C->getZExtValue();
3921 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3922 (N1CVal & 0xffffU) == 0xffffU &&
3923 (N2CVal & 0xffffU) == 0x0U) {
3924 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3925 dl, MVT::i32);
3926 SDValue Ops[] = { N0.getOperand(0), Imm16,
3927 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3928 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3929 return;
3930 }
3931 }
3932
3933 break;
3934 }
3935 case ARMISD::UMAAL: {
3936 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3938 N->getOperand(2), N->getOperand(3),
3939 getAL(CurDAG, dl),
3940 CurDAG->getRegister(0, MVT::i32) };
3941 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3942 return;
3943 }
3944 case ARMISD::UMLAL:{
3945 if (Subtarget->isThumb()) {
3946 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3947 N->getOperand(3), getAL(CurDAG, dl),
3948 CurDAG->getRegister(0, MVT::i32)};
3949 ReplaceNode(
3950 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3951 return;
3952 }else{
3953 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3954 N->getOperand(3), getAL(CurDAG, dl),
3955 CurDAG->getRegister(0, MVT::i32),
3956 CurDAG->getRegister(0, MVT::i32) };
3957 ReplaceNode(N, CurDAG->getMachineNode(
3958 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3959 MVT::i32, MVT::i32, Ops));
3960 return;
3961 }
3962 }
3963 case ARMISD::SMLAL:{
3964 if (Subtarget->isThumb()) {
3965 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3966 N->getOperand(3), getAL(CurDAG, dl),
3967 CurDAG->getRegister(0, MVT::i32)};
3968 ReplaceNode(
3969 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3970 return;
3971 }else{
3972 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3973 N->getOperand(3), getAL(CurDAG, dl),
3974 CurDAG->getRegister(0, MVT::i32),
3975 CurDAG->getRegister(0, MVT::i32) };
3976 ReplaceNode(N, CurDAG->getMachineNode(
3977 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3978 MVT::i32, MVT::i32, Ops));
3979 return;
3980 }
3981 }
3982 case ARMISD::SUBE: {
3983 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3984 break;
3985 // Look for a pattern to match SMMLS
3986 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3987 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3988 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3989 !SDValue(N, 1).use_empty())
3990 break;
3991
3992 if (Subtarget->isThumb())
3993 assert(Subtarget->hasThumb2() &&
3994 "This pattern should not be generated for Thumb");
3995
3996 SDValue SmulLoHi = N->getOperand(1);
3997 SDValue Subc = N->getOperand(2);
3998 SDValue Zero = Subc.getOperand(0);
3999
4000 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
4001 N->getOperand(1) != SmulLoHi.getValue(1) ||
4002 N->getOperand(2) != Subc.getValue(1))
4003 break;
4004
4005 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
4006 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
4007 N->getOperand(0), getAL(CurDAG, dl),
4008 CurDAG->getRegister(0, MVT::i32) };
4009 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
4010 return;
4011 }
4012 case ISD::LOAD: {
4013 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4014 return;
4015 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4016 if (tryT2IndexedLoad(N))
4017 return;
4018 } else if (Subtarget->isThumb()) {
4019 if (tryT1IndexedLoad(N))
4020 return;
4021 } else if (tryARMIndexedLoad(N))
4022 return;
4023 // Other cases are autogenerated.
4024 break;
4025 }
4026 case ISD::MLOAD:
4027 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4028 return;
4029 // Other cases are autogenerated.
4030 break;
4031 case ARMISD::WLSSETUP: {
4032 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4033 N->getOperand(0));
4034 ReplaceUses(N, New);
4035 CurDAG->RemoveDeadNode(N);
4036 return;
4037 }
4038 case ARMISD::WLS: {
4039 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4040 N->getOperand(1), N->getOperand(2),
4041 N->getOperand(0));
4042 ReplaceUses(N, New);
4043 CurDAG->RemoveDeadNode(N);
4044 return;
4045 }
4046 case ARMISD::LE: {
4047 SDValue Ops[] = { N->getOperand(1),
4048 N->getOperand(2),
4049 N->getOperand(0) };
4050 unsigned Opc = ARM::t2LoopEnd;
4051 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4052 ReplaceUses(N, New);
4053 CurDAG->RemoveDeadNode(N);
4054 return;
4055 }
4056 case ARMISD::LDRD: {
4057 if (Subtarget->isThumb2())
4058 break; // TableGen handles isel in this case.
4059 SDValue Base, RegOffset, ImmOffset;
4060 const SDValue &Chain = N->getOperand(0);
4061 const SDValue &Addr = N->getOperand(1);
4062 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4063 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4064 // The register-offset variant of LDRD mandates that the register
4065 // allocated to RegOffset is not reused in any of the remaining operands.
4066 // This restriction is currently not enforced. Therefore emitting this
4067 // variant is explicitly avoided.
4068 Base = Addr;
4069 RegOffset = CurDAG->getRegister(0, MVT::i32);
4070 }
4071 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4072 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4073 {MVT::Untyped, MVT::Other}, Ops);
4074 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4075 SDValue(New, 0));
4076 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4077 SDValue(New, 0));
4078 transferMemOperands(N, New);
4079 ReplaceUses(SDValue(N, 0), Lo);
4080 ReplaceUses(SDValue(N, 1), Hi);
4081 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4082 CurDAG->RemoveDeadNode(N);
4083 return;
4084 }
4085 case ARMISD::STRD: {
4086 if (Subtarget->isThumb2())
4087 break; // TableGen handles isel in this case.
4088 SDValue Base, RegOffset, ImmOffset;
4089 const SDValue &Chain = N->getOperand(0);
4090 const SDValue &Addr = N->getOperand(3);
4091 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4092 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4093 // The register-offset variant of STRD mandates that the register
4094 // allocated to RegOffset is not reused in any of the remaining operands.
4095 // This restriction is currently not enforced. Therefore emitting this
4096 // variant is explicitly avoided.
4097 Base = Addr;
4098 RegOffset = CurDAG->getRegister(0, MVT::i32);
4099 }
4100 SDNode *RegPair =
4101 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4102 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4103 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4104 transferMemOperands(N, New);
4105 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4106 CurDAG->RemoveDeadNode(N);
4107 return;
4108 }
4109 case ARMISD::LOOP_DEC: {
4110 SDValue Ops[] = { N->getOperand(1),
4111 N->getOperand(2),
4112 N->getOperand(0) };
4113 SDNode *Dec =
4114 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4115 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4116 ReplaceUses(N, Dec);
4117 CurDAG->RemoveDeadNode(N);
4118 return;
4119 }
4120 case ARMISD::BRCOND: {
4121 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4122 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4123 // Pattern complexity = 6 cost = 1 size = 0
4124
4125 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4126 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4127 // Pattern complexity = 6 cost = 1 size = 0
4128
4129 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4130 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4131 // Pattern complexity = 6 cost = 1 size = 0
4132
4133 unsigned Opc = Subtarget->isThumb() ?
4134 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4135 SDValue Chain = N->getOperand(0);
4136 SDValue N1 = N->getOperand(1);
4137 SDValue N2 = N->getOperand(2);
4138 SDValue N3 = N->getOperand(3);
4139 SDValue InGlue = N->getOperand(4);
4143
4144 unsigned CC = (unsigned)N2->getAsZExtVal();
4145
4146 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4147 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4148 SDValue Int = InGlue.getOperand(0);
4149 uint64_t ID = Int->getConstantOperandVal(1);
4150
4151 // Handle low-overhead loops.
4152 if (ID == Intrinsic::loop_decrement_reg) {
4153 SDValue Elements = Int.getOperand(2);
4154 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4155 dl, MVT::i32);
4156
4157 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4158 SDNode *LoopDec =
4159 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4160 CurDAG->getVTList(MVT::i32, MVT::Other),
4161 Args);
4162 ReplaceUses(Int.getNode(), LoopDec);
4163
4164 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4165 SDNode *LoopEnd =
4166 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4167
4168 ReplaceUses(N, LoopEnd);
4169 CurDAG->RemoveDeadNode(N);
4170 CurDAG->RemoveDeadNode(InGlue.getNode());
4171 CurDAG->RemoveDeadNode(Int.getNode());
4172 return;
4173 }
4174 }
4175
4176 bool SwitchEQNEToPLMI;
4177 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4178 InGlue = N->getOperand(4);
4179
4180 if (SwitchEQNEToPLMI) {
4181 switch ((ARMCC::CondCodes)CC) {
4182 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4183 case ARMCC::NE:
4185 break;
4186 case ARMCC::EQ:
4188 break;
4189 }
4190 }
4191 }
4192
4193 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4194 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4195 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4196 MVT::Glue, Ops);
4197 Chain = SDValue(ResNode, 0);
4198 if (N->getNumValues() == 2) {
4199 InGlue = SDValue(ResNode, 1);
4200 ReplaceUses(SDValue(N, 1), InGlue);
4201 }
4202 ReplaceUses(SDValue(N, 0),
4203 SDValue(Chain.getNode(), Chain.getResNo()));
4204 CurDAG->RemoveDeadNode(N);
4205 return;
4206 }
4207
4208 case ARMISD::CMPZ: {
4209 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4210 // This allows us to avoid materializing the expensive negative constant.
4211 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4212 // for its glue output.
4213 SDValue X = N->getOperand(0);
4214 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4215 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4216 int64_t Addend = -C->getSExtValue();
4217
4218 SDNode *Add = nullptr;
4219 // ADDS can be better than CMN if the immediate fits in a
4220 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4221 // Outside that range we can just use a CMN which is 32-bit but has a
4222 // 12-bit immediate range.
4223 if (Addend < 1<<8) {
4224 if (Subtarget->isThumb2()) {
4225 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4226 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4227 CurDAG->getRegister(0, MVT::i32) };
4228 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4229 } else {
4230 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4231 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4232 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4233 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4234 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4235 }
4236 }
4237 if (Add) {
4238 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4239 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4240 }
4241 }
4242 // Other cases are autogenerated.
4243 break;
4244 }
4245
4246 case ARMISD::CMOV: {
4247 SDValue InGlue = N->getOperand(4);
4248
4249 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4250 bool SwitchEQNEToPLMI;
4251 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4252
4253 if (SwitchEQNEToPLMI) {
4254 SDValue ARMcc = N->getOperand(2);
4256
4257 switch (CC) {
4258 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4259 case ARMCC::NE:
4260 CC = ARMCC::MI;
4261 break;
4262 case ARMCC::EQ:
4263 CC = ARMCC::PL;
4264 break;
4265 }
4266 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4267 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4268 N->getOperand(3), N->getOperand(4)};
4269 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4270 }
4271
4272 }
4273 // Other cases are autogenerated.
4274 break;
4275 }
4276 case ARMISD::VZIP: {
4277 EVT VT = N->getValueType(0);
4278 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4279 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4280 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4281 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4282 SDValue Pred = getAL(CurDAG, dl);
4283 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4284 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4285 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4286 return;
4287 }
4288 case ARMISD::VUZP: {
4289 EVT VT = N->getValueType(0);
4290 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4291 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4292 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4293 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4294 SDValue Pred = getAL(CurDAG, dl);
4295 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4296 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4297 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4298 return;
4299 }
4300 case ARMISD::VTRN: {
4301 EVT VT = N->getValueType(0);
4302 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4303 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4304 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4305 SDValue Pred = getAL(CurDAG, dl);
4306 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4307 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4308 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4309 return;
4310 }
4311 case ARMISD::BUILD_VECTOR: {
4312 EVT VecVT = N->getValueType(0);
4313 EVT EltVT = VecVT.getVectorElementType();
4314 unsigned NumElts = VecVT.getVectorNumElements();
4315 if (EltVT == MVT::f64) {
4316 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4317 ReplaceNode(
4318 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4319 return;
4320 }
4321 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4322 if (NumElts == 2) {
4323 ReplaceNode(
4324 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4325 return;
4326 }
4327 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4328 ReplaceNode(N,
4329 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4330 N->getOperand(2), N->getOperand(3)));
4331 return;
4332 }
4333
4334 case ARMISD::VLD1DUP: {
4335 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4336 ARM::VLD1DUPd32 };
4337 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4338 ARM::VLD1DUPq32 };
4339 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4340 return;
4341 }
4342
4343 case ARMISD::VLD2DUP: {
4344 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4345 ARM::VLD2DUPd32 };
4346 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4347 return;
4348 }
4349
4350 case ARMISD::VLD3DUP: {
4351 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4352 ARM::VLD3DUPd16Pseudo,
4353 ARM::VLD3DUPd32Pseudo };
4354 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4355 return;
4356 }
4357
4358 case ARMISD::VLD4DUP: {
4359 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4360 ARM::VLD4DUPd16Pseudo,
4361 ARM::VLD4DUPd32Pseudo };
4362 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4363 return;
4364 }
4365
4366 case ARMISD::VLD1DUP_UPD: {
4367 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4368 ARM::VLD1DUPd16wb_fixed,
4369 ARM::VLD1DUPd32wb_fixed };
4370 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4371 ARM::VLD1DUPq16wb_fixed,
4372 ARM::VLD1DUPq32wb_fixed };
4373 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4374 return;
4375 }
4376
4377 case ARMISD::VLD2DUP_UPD: {
4378 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4379 ARM::VLD2DUPd16wb_fixed,
4380 ARM::VLD2DUPd32wb_fixed,
4381 ARM::VLD1q64wb_fixed };
4382 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4383 ARM::VLD2DUPq16EvenPseudo,
4384 ARM::VLD2DUPq32EvenPseudo };
4385 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4386 ARM::VLD2DUPq16OddPseudoWB_fixed,
4387 ARM::VLD2DUPq32OddPseudoWB_fixed };
4388 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4389 return;
4390 }
4391
4392 case ARMISD::VLD3DUP_UPD: {
4393 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4394 ARM::VLD3DUPd16Pseudo_UPD,
4395 ARM::VLD3DUPd32Pseudo_UPD,
4396 ARM::VLD1d64TPseudoWB_fixed };
4397 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4398 ARM::VLD3DUPq16EvenPseudo,
4399 ARM::VLD3DUPq32EvenPseudo };
4400 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4401 ARM::VLD3DUPq16OddPseudo_UPD,
4402 ARM::VLD3DUPq32OddPseudo_UPD };
4403 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4404 return;
4405 }
4406
4407 case ARMISD::VLD4DUP_UPD: {
4408 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4409 ARM::VLD4DUPd16Pseudo_UPD,
4410 ARM::VLD4DUPd32Pseudo_UPD,
4411 ARM::VLD1d64QPseudoWB_fixed };
4412 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4413 ARM::VLD4DUPq16EvenPseudo,
4414 ARM::VLD4DUPq32EvenPseudo };
4415 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4416 ARM::VLD4DUPq16OddPseudo_UPD,
4417 ARM::VLD4DUPq32OddPseudo_UPD };
4418 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4419 return;
4420 }
4421
4422 case ARMISD::VLD1_UPD: {
4423 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4424 ARM::VLD1d16wb_fixed,
4425 ARM::VLD1d32wb_fixed,
4426 ARM::VLD1d64wb_fixed };
4427 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4428 ARM::VLD1q16wb_fixed,
4429 ARM::VLD1q32wb_fixed,
4430 ARM::VLD1q64wb_fixed };
4431 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4432 return;
4433 }
4434
4435 case ARMISD::VLD2_UPD: {
4436 if (Subtarget->hasNEON()) {
4437 static const uint16_t DOpcodes[] = {
4438 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4439 ARM::VLD1q64wb_fixed};
4440 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4441 ARM::VLD2q16PseudoWB_fixed,
4442 ARM::VLD2q32PseudoWB_fixed};
4443 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4444 } else {
4445 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4446 ARM::MVE_VLD21_8_wb};
4447 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4448 ARM::MVE_VLD21_16_wb};
4449 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4450 ARM::MVE_VLD21_32_wb};
4451 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4452 SelectMVE_VLD(N, 2, Opcodes, true);
4453 }
4454 return;
4455 }
4456
4457 case ARMISD::VLD3_UPD: {
4458 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4459 ARM::VLD3d16Pseudo_UPD,
4460 ARM::VLD3d32Pseudo_UPD,
4461 ARM::VLD1d64TPseudoWB_fixed};
4462 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4463 ARM::VLD3q16Pseudo_UPD,
4464 ARM::VLD3q32Pseudo_UPD };
4465 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4466 ARM::VLD3q16oddPseudo_UPD,
4467 ARM::VLD3q32oddPseudo_UPD };
4468 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4469 return;
4470 }
4471
4472 case ARMISD::VLD4_UPD: {
4473 if (Subtarget->hasNEON()) {
4474 static const uint16_t DOpcodes[] = {
4475 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4476 ARM::VLD1d64QPseudoWB_fixed};
4477 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4478 ARM::VLD4q16Pseudo_UPD,
4479 ARM::VLD4q32Pseudo_UPD};
4480 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4481 ARM::VLD4q16oddPseudo_UPD,
4482 ARM::VLD4q32oddPseudo_UPD};
4483 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4484 } else {
4485 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4486 ARM::MVE_VLD42_8,
4487 ARM::MVE_VLD43_8_wb};
4488 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4489 ARM::MVE_VLD42_16,
4490 ARM::MVE_VLD43_16_wb};
4491 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4492 ARM::MVE_VLD42_32,
4493 ARM::MVE_VLD43_32_wb};
4494 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4495 SelectMVE_VLD(N, 4, Opcodes, true);
4496 }
4497 return;
4498 }
4499
4500 case ARMISD::VLD1x2_UPD: {
4501 if (Subtarget->hasNEON()) {
4502 static const uint16_t DOpcodes[] = {
4503 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4504 ARM::VLD1q64wb_fixed};
4505 static const uint16_t QOpcodes[] = {
4506 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4507 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4508 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4509 return;
4510 }
4511 break;
4512 }
4513
4514 case ARMISD::VLD1x3_UPD: {
4515 if (Subtarget->hasNEON()) {
4516 static const uint16_t DOpcodes[] = {
4517 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4518 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4519 static const uint16_t QOpcodes0[] = {
4520 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4521 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4522 static const uint16_t QOpcodes1[] = {
4523 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4524 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4525 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4526 return;
4527 }
4528 break;
4529 }
4530
4531 case ARMISD::VLD1x4_UPD: {
4532 if (Subtarget->hasNEON()) {
4533 static const uint16_t DOpcodes[] = {
4534 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4535 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4536 static const uint16_t QOpcodes0[] = {
4537 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4538 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4539 static const uint16_t QOpcodes1[] = {
4540 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4541 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4542 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4543 return;
4544 }
4545 break;
4546 }
4547
4548 case ARMISD::VLD2LN_UPD: {
4549 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4550 ARM::VLD2LNd16Pseudo_UPD,
4551 ARM::VLD2LNd32Pseudo_UPD };
4552 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4553 ARM::VLD2LNq32Pseudo_UPD };
4554 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4555 return;
4556 }
4557
4558 case ARMISD::VLD3LN_UPD: {
4559 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4560 ARM::VLD3LNd16Pseudo_UPD,
4561 ARM::VLD3LNd32Pseudo_UPD };
4562 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4563 ARM::VLD3LNq32Pseudo_UPD };
4564 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4565 return;
4566 }
4567
4568 case ARMISD::VLD4LN_UPD: {
4569 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4570 ARM::VLD4LNd16Pseudo_UPD,
4571 ARM::VLD4LNd32Pseudo_UPD };
4572 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4573 ARM::VLD4LNq32Pseudo_UPD };
4574 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4575 return;
4576 }
4577
4578 case ARMISD::VST1_UPD: {
4579 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4580 ARM::VST1d16wb_fixed,
4581 ARM::VST1d32wb_fixed,
4582 ARM::VST1d64wb_fixed };
4583 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4584 ARM::VST1q16wb_fixed,
4585 ARM::VST1q32wb_fixed,
4586 ARM::VST1q64wb_fixed };
4587 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4588 return;
4589 }
4590
4591 case ARMISD::VST2_UPD: {
4592 if (Subtarget->hasNEON()) {
4593 static const uint16_t DOpcodes[] = {
4594 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4595 ARM::VST1q64wb_fixed};
4596 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4597 ARM::VST2q16PseudoWB_fixed,
4598 ARM::VST2q32PseudoWB_fixed};
4599 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4600 return;
4601 }
4602 break;
4603 }
4604
4605 case ARMISD::VST3_UPD: {
4606 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4607 ARM::VST3d16Pseudo_UPD,
4608 ARM::VST3d32Pseudo_UPD,
4609 ARM::VST1d64TPseudoWB_fixed};
4610 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4611 ARM::VST3q16Pseudo_UPD,
4612 ARM::VST3q32Pseudo_UPD };
4613 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4614 ARM::VST3q16oddPseudo_UPD,
4615 ARM::VST3q32oddPseudo_UPD };
4616 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4617 return;
4618 }
4619
4620 case ARMISD::VST4_UPD: {
4621 if (Subtarget->hasNEON()) {
4622 static const uint16_t DOpcodes[] = {
4623 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4624 ARM::VST1d64QPseudoWB_fixed};
4625 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4626 ARM::VST4q16Pseudo_UPD,
4627 ARM::VST4q32Pseudo_UPD};
4628 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4629 ARM::VST4q16oddPseudo_UPD,
4630 ARM::VST4q32oddPseudo_UPD};
4631 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4632 return;
4633 }
4634 break;
4635 }
4636
4637 case ARMISD::VST1x2_UPD: {
4638 if (Subtarget->hasNEON()) {
4639 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4640 ARM::VST1q16wb_fixed,
4641 ARM::VST1q32wb_fixed,
4642 ARM::VST1q64wb_fixed};
4643 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4644 ARM::VST1d16QPseudoWB_fixed,
4645 ARM::VST1d32QPseudoWB_fixed,
4646 ARM::VST1d64QPseudoWB_fixed };
4647 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4648 return;
4649 }
4650 break;
4651 }
4652
4653 case ARMISD::VST1x3_UPD: {
4654 if (Subtarget->hasNEON()) {
4655 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4656 ARM::VST1d16TPseudoWB_fixed,
4657 ARM::VST1d32TPseudoWB_fixed,
4658 ARM::VST1d64TPseudoWB_fixed };
4659 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4660 ARM::VST1q16LowTPseudo_UPD,
4661 ARM::VST1q32LowTPseudo_UPD,
4662 ARM::VST1q64LowTPseudo_UPD };
4663 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4664 ARM::VST1q16HighTPseudo_UPD,
4665 ARM::VST1q32HighTPseudo_UPD,
4666 ARM::VST1q64HighTPseudo_UPD };
4667 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4668 return;
4669 }
4670 break;
4671 }
4672
4673 case ARMISD::VST1x4_UPD: {
4674 if (Subtarget->hasNEON()) {
4675 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4676 ARM::VST1d16QPseudoWB_fixed,
4677 ARM::VST1d32QPseudoWB_fixed,
4678 ARM::VST1d64QPseudoWB_fixed };
4679 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4680 ARM::VST1q16LowQPseudo_UPD,
4681 ARM::VST1q32LowQPseudo_UPD,
4682 ARM::VST1q64LowQPseudo_UPD };
4683 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4684 ARM::VST1q16HighQPseudo_UPD,
4685 ARM::VST1q32HighQPseudo_UPD,
4686 ARM::VST1q64HighQPseudo_UPD };
4687 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4688 return;
4689 }
4690 break;
4691 }
4692 case ARMISD::VST2LN_UPD: {
4693 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4694 ARM::VST2LNd16Pseudo_UPD,
4695 ARM::VST2LNd32Pseudo_UPD };
4696 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4697 ARM::VST2LNq32Pseudo_UPD };
4698 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4699 return;
4700 }
4701
4702 case ARMISD::VST3LN_UPD: {
4703 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4704 ARM::VST3LNd16Pseudo_UPD,
4705 ARM::VST3LNd32Pseudo_UPD };
4706 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4707 ARM::VST3LNq32Pseudo_UPD };
4708 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4709 return;
4710 }
4711
4712 case ARMISD::VST4LN_UPD: {
4713 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4714 ARM::VST4LNd16Pseudo_UPD,
4715 ARM::VST4LNd32Pseudo_UPD };
4716 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4717 ARM::VST4LNq32Pseudo_UPD };
4718 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4719 return;
4720 }
4721
4724 unsigned IntNo = N->getConstantOperandVal(1);
4725 switch (IntNo) {
4726 default:
4727 break;
4728
4729 case Intrinsic::arm_mrrc:
4730 case Intrinsic::arm_mrrc2: {
4731 SDLoc dl(N);
4732 SDValue Chain = N->getOperand(0);
4733 unsigned Opc;
4734
4735 if (Subtarget->isThumb())
4736 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4737 else
4738 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4739
4741 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4742 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4743 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4744
4745 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4746 // instruction will always be '1111' but it is possible in assembly language to specify
4747 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4748 if (Opc != ARM::MRRC2) {
4749 Ops.push_back(getAL(CurDAG, dl));
4750 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4751 }
4752
4753 Ops.push_back(Chain);
4754
4755 // Writes to two registers.
4756 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4757
4758 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4759 return;
4760 }
4761 case Intrinsic::arm_ldaexd:
4762 case Intrinsic::arm_ldrexd: {
4763 SDLoc dl(N);
4764 SDValue Chain = N->getOperand(0);
4765 SDValue MemAddr = N->getOperand(2);
4766 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4767
4768 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4769 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4770 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4771
4772 // arm_ldrexd returns a i64 value in {i32, i32}
4773 std::vector<EVT> ResTys;
4774 if (isThumb) {
4775 ResTys.push_back(MVT::i32);
4776 ResTys.push_back(MVT::i32);
4777 } else
4778 ResTys.push_back(MVT::Untyped);
4779 ResTys.push_back(MVT::Other);
4780
4781 // Place arguments in the right order.
4782 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4783 CurDAG->getRegister(0, MVT::i32), Chain};
4784 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4785 // Transfer memoperands.
4786 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4787 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4788
4789 // Remap uses.
4790 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4791 if (!SDValue(N, 0).use_empty()) {
4793 if (isThumb)
4794 Result = SDValue(Ld, 0);
4795 else {
4796 SDValue SubRegIdx =
4797 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4798 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4799 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4800 Result = SDValue(ResNode,0);
4801 }
4802 ReplaceUses(SDValue(N, 0), Result);
4803 }
4804 if (!SDValue(N, 1).use_empty()) {
4806 if (isThumb)
4807 Result = SDValue(Ld, 1);
4808 else {
4809 SDValue SubRegIdx =
4810 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4811 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4812 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4813 Result = SDValue(ResNode,0);
4814 }
4815 ReplaceUses(SDValue(N, 1), Result);
4816 }
4817 ReplaceUses(SDValue(N, 2), OutChain);
4818 CurDAG->RemoveDeadNode(N);
4819 return;
4820 }
4821 case Intrinsic::arm_stlexd:
4822 case Intrinsic::arm_strexd: {
4823 SDLoc dl(N);
4824 SDValue Chain = N->getOperand(0);
4825 SDValue Val0 = N->getOperand(2);
4826 SDValue Val1 = N->getOperand(3);
4827 SDValue MemAddr = N->getOperand(4);
4828
4829 // Store exclusive double return a i32 value which is the return status
4830 // of the issued store.
4831 const EVT ResTys[] = {MVT::i32, MVT::Other};
4832
4833 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4834 // Place arguments in the right order.
4836 if (isThumb) {
4837 Ops.push_back(Val0);
4838 Ops.push_back(Val1);
4839 } else
4840 // arm_strexd uses GPRPair.
4841 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4842 Ops.push_back(MemAddr);
4843 Ops.push_back(getAL(CurDAG, dl));
4844 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4845 Ops.push_back(Chain);
4846
4847 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4848 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4849 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4850
4851 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4852 // Transfer memoperands.
4853 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4854 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4855
4856 ReplaceNode(N, St);
4857 return;
4858 }
4859
4860 case Intrinsic::arm_neon_vld1: {
4861 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4862 ARM::VLD1d32, ARM::VLD1d64 };
4863 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4864 ARM::VLD1q32, ARM::VLD1q64};
4865 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4866 return;
4867 }
4868
4869 case Intrinsic::arm_neon_vld1x2: {
4870 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4871 ARM::VLD1q32, ARM::VLD1q64 };
4872 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4873 ARM::VLD1d16QPseudo,
4874 ARM::VLD1d32QPseudo,
4875 ARM::VLD1d64QPseudo };
4876 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4877 return;
4878 }
4879
4880 case Intrinsic::arm_neon_vld1x3: {
4881 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4882 ARM::VLD1d16TPseudo,
4883 ARM::VLD1d32TPseudo,
4884 ARM::VLD1d64TPseudo };
4885 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4886 ARM::VLD1q16LowTPseudo_UPD,
4887 ARM::VLD1q32LowTPseudo_UPD,
4888 ARM::VLD1q64LowTPseudo_UPD };
4889 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4890 ARM::VLD1q16HighTPseudo,
4891 ARM::VLD1q32HighTPseudo,
4892 ARM::VLD1q64HighTPseudo };
4893 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4894 return;
4895 }
4896
4897 case Intrinsic::arm_neon_vld1x4: {
4898 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4899 ARM::VLD1d16QPseudo,
4900 ARM::VLD1d32QPseudo,
4901 ARM::VLD1d64QPseudo };
4902 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4903 ARM::VLD1q16LowQPseudo_UPD,
4904 ARM::VLD1q32LowQPseudo_UPD,
4905 ARM::VLD1q64LowQPseudo_UPD };
4906 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4907 ARM::VLD1q16HighQPseudo,
4908 ARM::VLD1q32HighQPseudo,
4909 ARM::VLD1q64HighQPseudo };
4910 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4911 return;
4912 }
4913
4914 case Intrinsic::arm_neon_vld2: {
4915 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4916 ARM::VLD2d32, ARM::VLD1q64 };
4917 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4918 ARM::VLD2q32Pseudo };
4919 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4920 return;
4921 }
4922
4923 case Intrinsic::arm_neon_vld3: {
4924 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4925 ARM::VLD3d16Pseudo,
4926 ARM::VLD3d32Pseudo,
4927 ARM::VLD1d64TPseudo };
4928 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4929 ARM::VLD3q16Pseudo_UPD,
4930 ARM::VLD3q32Pseudo_UPD };
4931 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4932 ARM::VLD3q16oddPseudo,
4933 ARM::VLD3q32oddPseudo };
4934 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4935 return;
4936 }
4937
4938 case Intrinsic::arm_neon_vld4: {
4939 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4940 ARM::VLD4d16Pseudo,
4941 ARM::VLD4d32Pseudo,
4942 ARM::VLD1d64QPseudo };
4943 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4944 ARM::VLD4q16Pseudo_UPD,
4945 ARM::VLD4q32Pseudo_UPD };
4946 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4947 ARM::VLD4q16oddPseudo,
4948 ARM::VLD4q32oddPseudo };
4949 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4950 return;
4951 }
4952
4953 case Intrinsic::arm_neon_vld2dup: {
4954 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4955 ARM::VLD2DUPd32, ARM::VLD1q64 };
4956 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4957 ARM::VLD2DUPq16EvenPseudo,
4958 ARM::VLD2DUPq32EvenPseudo };
4959 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4960 ARM::VLD2DUPq16OddPseudo,
4961 ARM::VLD2DUPq32OddPseudo };
4962 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4963 DOpcodes, QOpcodes0, QOpcodes1);
4964 return;
4965 }
4966
4967 case Intrinsic::arm_neon_vld3dup: {
4968 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4969 ARM::VLD3DUPd16Pseudo,
4970 ARM::VLD3DUPd32Pseudo,
4971 ARM::VLD1d64TPseudo };
4972 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4973 ARM::VLD3DUPq16EvenPseudo,
4974 ARM::VLD3DUPq32EvenPseudo };
4975 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4976 ARM::VLD3DUPq16OddPseudo,
4977 ARM::VLD3DUPq32OddPseudo };
4978 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4979 DOpcodes, QOpcodes0, QOpcodes1);
4980 return;
4981 }
4982
4983 case Intrinsic::arm_neon_vld4dup: {
4984 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4985 ARM::VLD4DUPd16Pseudo,
4986 ARM::VLD4DUPd32Pseudo,
4987 ARM::VLD1d64QPseudo };
4988 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4989 ARM::VLD4DUPq16EvenPseudo,
4990 ARM::VLD4DUPq32EvenPseudo };
4991 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4992 ARM::VLD4DUPq16OddPseudo,
4993 ARM::VLD4DUPq32OddPseudo };
4994 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4995 DOpcodes, QOpcodes0, QOpcodes1);
4996 return;
4997 }
4998
4999 case Intrinsic::arm_neon_vld2lane: {
5000 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
5001 ARM::VLD2LNd16Pseudo,
5002 ARM::VLD2LNd32Pseudo };
5003 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
5004 ARM::VLD2LNq32Pseudo };
5005 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
5006 return;
5007 }
5008
5009 case Intrinsic::arm_neon_vld3lane: {
5010 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5011 ARM::VLD3LNd16Pseudo,
5012 ARM::VLD3LNd32Pseudo };
5013 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5014 ARM::VLD3LNq32Pseudo };
5015 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5016 return;
5017 }
5018
5019 case Intrinsic::arm_neon_vld4lane: {
5020 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5021 ARM::VLD4LNd16Pseudo,
5022 ARM::VLD4LNd32Pseudo };
5023 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5024 ARM::VLD4LNq32Pseudo };
5025 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5026 return;
5027 }
5028
5029 case Intrinsic::arm_neon_vst1: {
5030 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5031 ARM::VST1d32, ARM::VST1d64 };
5032 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5033 ARM::VST1q32, ARM::VST1q64 };
5034 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5035 return;
5036 }
5037
5038 case Intrinsic::arm_neon_vst1x2: {
5039 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5040 ARM::VST1q32, ARM::VST1q64 };
5041 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5042 ARM::VST1d16QPseudo,
5043 ARM::VST1d32QPseudo,
5044 ARM::VST1d64QPseudo };
5045 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5046 return;
5047 }
5048
5049 case Intrinsic::arm_neon_vst1x3: {
5050 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5051 ARM::VST1d16TPseudo,
5052 ARM::VST1d32TPseudo,
5053 ARM::VST1d64TPseudo };
5054 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5055 ARM::VST1q16LowTPseudo_UPD,
5056 ARM::VST1q32LowTPseudo_UPD,
5057 ARM::VST1q64LowTPseudo_UPD };
5058 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5059 ARM::VST1q16HighTPseudo,
5060 ARM::VST1q32HighTPseudo,
5061 ARM::VST1q64HighTPseudo };
5062 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5063 return;
5064 }
5065
5066 case Intrinsic::arm_neon_vst1x4: {
5067 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5068 ARM::VST1d16QPseudo,
5069 ARM::VST1d32QPseudo,
5070 ARM::VST1d64QPseudo };
5071 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5072 ARM::VST1q16LowQPseudo_UPD,
5073 ARM::VST1q32LowQPseudo_UPD,
5074 ARM::VST1q64LowQPseudo_UPD };
5075 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5076 ARM::VST1q16HighQPseudo,
5077 ARM::VST1q32HighQPseudo,
5078 ARM::VST1q64HighQPseudo };
5079 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5080 return;
5081 }
5082
5083 case Intrinsic::arm_neon_vst2: {
5084 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5085 ARM::VST2d32, ARM::VST1q64 };
5086 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5087 ARM::VST2q32Pseudo };
5088 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5089 return;
5090 }
5091
5092 case Intrinsic::arm_neon_vst3: {
5093 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5094 ARM::VST3d16Pseudo,
5095 ARM::VST3d32Pseudo,
5096 ARM::VST1d64TPseudo };
5097 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5098 ARM::VST3q16Pseudo_UPD,
5099 ARM::VST3q32Pseudo_UPD };
5100 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5101 ARM::VST3q16oddPseudo,
5102 ARM::VST3q32oddPseudo };
5103 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5104 return;
5105 }
5106
5107 case Intrinsic::arm_neon_vst4: {
5108 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5109 ARM::VST4d16Pseudo,
5110 ARM::VST4d32Pseudo,
5111 ARM::VST1d64QPseudo };
5112 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5113 ARM::VST4q16Pseudo_UPD,
5114 ARM::VST4q32Pseudo_UPD };
5115 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5116 ARM::VST4q16oddPseudo,
5117 ARM::VST4q32oddPseudo };
5118 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5119 return;
5120 }
5121
5122 case Intrinsic::arm_neon_vst2lane: {
5123 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5124 ARM::VST2LNd16Pseudo,
5125 ARM::VST2LNd32Pseudo };
5126 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5127 ARM::VST2LNq32Pseudo };
5128 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5129 return;
5130 }
5131
5132 case Intrinsic::arm_neon_vst3lane: {
5133 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5134 ARM::VST3LNd16Pseudo,
5135 ARM::VST3LNd32Pseudo };
5136 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5137 ARM::VST3LNq32Pseudo };
5138 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5139 return;
5140 }
5141
5142 case Intrinsic::arm_neon_vst4lane: {
5143 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5144 ARM::VST4LNd16Pseudo,
5145 ARM::VST4LNd32Pseudo };
5146 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5147 ARM::VST4LNq32Pseudo };
5148 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5149 return;
5150 }
5151
5152 case Intrinsic::arm_mve_vldr_gather_base_wb:
5153 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5154 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5155 ARM::MVE_VLDRDU64_qi_pre};
5156 SelectMVE_WB(N, Opcodes,
5157 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5158 return;
5159 }
5160
5161 case Intrinsic::arm_mve_vld2q: {
5162 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5163 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5164 ARM::MVE_VLD21_16};
5165 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5166 ARM::MVE_VLD21_32};
5167 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5168 SelectMVE_VLD(N, 2, Opcodes, false);
5169 return;
5170 }
5171
5172 case Intrinsic::arm_mve_vld4q: {
5173 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5174 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5175 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5176 ARM::MVE_VLD42_16,
5177 ARM::MVE_VLD43_16};
5178 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5179 ARM::MVE_VLD42_32,
5180 ARM::MVE_VLD43_32};
5181 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5182 SelectMVE_VLD(N, 4, Opcodes, false);
5183 return;
5184 }
5185 }
5186 break;
5187 }
5188
5190 unsigned IntNo = N->getConstantOperandVal(0);
5191 switch (IntNo) {
5192 default:
5193 break;
5194
5195 // Scalar f32 -> bf16
5196 case Intrinsic::arm_neon_vcvtbfp2bf: {
5197 SDLoc dl(N);
5198 const SDValue &Src = N->getOperand(1);
5199 llvm::EVT DestTy = N->getValueType(0);
5200 SDValue Pred = getAL(CurDAG, dl);
5201 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5202 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5203 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5204 return;
5205 }
5206
5207 // Vector v4f32 -> v4bf16
5208 case Intrinsic::arm_neon_vcvtfp2bf: {
5209 SDLoc dl(N);
5210 const SDValue &Src = N->getOperand(1);
5211 SDValue Pred = getAL(CurDAG, dl);
5212 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5213 SDValue Ops[] = { Src, Pred, Reg0 };
5214 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5215 return;
5216 }
5217
5218 case Intrinsic::arm_mve_urshrl:
5219 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5220 return;
5221 case Intrinsic::arm_mve_uqshll:
5222 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5223 return;
5224 case Intrinsic::arm_mve_srshrl:
5225 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5226 return;
5227 case Intrinsic::arm_mve_sqshll:
5228 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5229 return;
5230 case Intrinsic::arm_mve_uqrshll:
5231 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5232 return;
5233 case Intrinsic::arm_mve_sqrshrl:
5234 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5235 return;
5236
5237 case Intrinsic::arm_mve_vadc:
5238 case Intrinsic::arm_mve_vadc_predicated:
5239 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5240 IntNo == Intrinsic::arm_mve_vadc_predicated);
5241 return;
5242 case Intrinsic::arm_mve_vsbc:
5243 case Intrinsic::arm_mve_vsbc_predicated:
5244 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5245 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5246 return;
5247 case Intrinsic::arm_mve_vshlc:
5248 case Intrinsic::arm_mve_vshlc_predicated:
5249 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5250 return;
5251
5252 case Intrinsic::arm_mve_vmlldava:
5253 case Intrinsic::arm_mve_vmlldava_predicated: {
5254 static const uint16_t OpcodesU[] = {
5255 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5256 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5257 };
5258 static const uint16_t OpcodesS[] = {
5259 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5260 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5261 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5262 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5263 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5264 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5265 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5266 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5267 };
5268 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5269 OpcodesS, OpcodesU);
5270 return;
5271 }
5272
5273 case Intrinsic::arm_mve_vrmlldavha:
5274 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5275 static const uint16_t OpcodesU[] = {
5276 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5277 };
5278 static const uint16_t OpcodesS[] = {
5279 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5280 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5281 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5282 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5283 };
5284 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5285 OpcodesS, OpcodesU);
5286 return;
5287 }
5288
5289 case Intrinsic::arm_mve_vidup:
5290 case Intrinsic::arm_mve_vidup_predicated: {
5291 static const uint16_t Opcodes[] = {
5292 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5293 };
5294 SelectMVE_VxDUP(N, Opcodes, false,
5295 IntNo == Intrinsic::arm_mve_vidup_predicated);
5296 return;
5297 }
5298
5299 case Intrinsic::arm_mve_vddup:
5300 case Intrinsic::arm_mve_vddup_predicated: {
5301 static const uint16_t Opcodes[] = {
5302 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5303 };
5304 SelectMVE_VxDUP(N, Opcodes, false,
5305 IntNo == Intrinsic::arm_mve_vddup_predicated);
5306 return;
5307 }
5308
5309 case Intrinsic::arm_mve_viwdup:
5310 case Intrinsic::arm_mve_viwdup_predicated: {
5311 static const uint16_t Opcodes[] = {
5312 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5313 };
5314 SelectMVE_VxDUP(N, Opcodes, true,
5315 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5316 return;
5317 }
5318
5319 case Intrinsic::arm_mve_vdwdup:
5320 case Intrinsic::arm_mve_vdwdup_predicated: {
5321 static const uint16_t Opcodes[] = {
5322 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5323 };
5324 SelectMVE_VxDUP(N, Opcodes, true,
5325 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5326 return;
5327 }
5328
5329 case Intrinsic::arm_cde_cx1d:
5330 case Intrinsic::arm_cde_cx1da:
5331 case Intrinsic::arm_cde_cx2d:
5332 case Intrinsic::arm_cde_cx2da:
5333 case Intrinsic::arm_cde_cx3d:
5334 case Intrinsic::arm_cde_cx3da: {
5335 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5336 IntNo == Intrinsic::arm_cde_cx2da ||
5337 IntNo == Intrinsic::arm_cde_cx3da;
5338 size_t NumExtraOps;
5339 uint16_t Opcode;
5340 switch (IntNo) {
5341 case Intrinsic::arm_cde_cx1d:
5342 case Intrinsic::arm_cde_cx1da:
5343 NumExtraOps = 0;
5344 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5345 break;
5346 case Intrinsic::arm_cde_cx2d:
5347 case Intrinsic::arm_cde_cx2da:
5348 NumExtraOps = 1;
5349 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5350 break;
5351 case Intrinsic::arm_cde_cx3d:
5352 case Intrinsic::arm_cde_cx3da:
5353 NumExtraOps = 2;
5354 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5355 break;
5356 default:
5357 llvm_unreachable("Unexpected opcode");
5358 }
5359 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5360 return;
5361 }
5362 }
5363 break;
5364 }
5365
5367 SelectCMP_SWAP(N);
5368 return;
5369 }
5370
5371 SelectCode(N);
5372}
5373
5374// Inspect a register string of the form
5375// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5376// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5377// and obtain the integer operands from them, adding these operands to the
5378// provided vector.
5380 SelectionDAG *CurDAG,
5381 const SDLoc &DL,
5382 std::vector<SDValue> &Ops) {
5384 RegString.split(Fields, ':');
5385
5386 if (Fields.size() > 1) {
5387 bool AllIntFields = true;
5388
5389 for (StringRef Field : Fields) {
5390 // Need to trim out leading 'cp' characters and get the integer field.
5391 unsigned IntField;
5392 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5393 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5394 }
5395
5396 assert(AllIntFields &&
5397 "Unexpected non-integer value in special register string.");
5398 (void)AllIntFields;
5399 }
5400}
5401
5402// Maps a Banked Register string to its mask value. The mask value returned is
5403// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5404// mask operand, which expresses which register is to be used, e.g. r8, and in
5405// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5406// was invalid.
5407static inline int getBankedRegisterMask(StringRef RegString) {
5408 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5409 if (!TheReg)
5410 return -1;
5411 return TheReg->Encoding;
5412}
5413
5414// The flags here are common to those allowed for apsr in the A class cores and
5415// those allowed for the special registers in the M class cores. Returns a
5416// value representing which flags were present, -1 if invalid.
5417static inline int getMClassFlagsMask(StringRef Flags) {
5418 return StringSwitch<int>(Flags)
5419 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5420 // correct when flags are not permitted
5421 .Case("g", 0x1)
5422 .Case("nzcvq", 0x2)
5423 .Case("nzcvqg", 0x3)
5424 .Default(-1);
5425}
5426
5427// Maps MClass special registers string to its value for use in the
5428// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5429// Returns -1 to signify that the string was invalid.
5430static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5431 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5432 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5433 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5434 return -1;
5435 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5436}
5437
5439 // The mask operand contains the special register (R Bit) in bit 4, whether
5440 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5441 // bits 3-0 contains the fields to be accessed in the special register, set by
5442 // the flags provided with the register.
5443 int Mask = 0;
5444 if (Reg == "apsr") {
5445 // The flags permitted for apsr are the same flags that are allowed in
5446 // M class registers. We get the flag value and then shift the flags into
5447 // the correct place to combine with the mask.
5448 Mask = getMClassFlagsMask(Flags);
5449 if (Mask == -1)
5450 return -1;
5451 return Mask << 2;
5452 }
5453
5454 if (Reg != "cpsr" && Reg != "spsr") {
5455 return -1;
5456 }
5457
5458 // This is the same as if the flags were "fc"
5459 if (Flags.empty() || Flags == "all")
5460 return Mask | 0x9;
5461
5462 // Inspect the supplied flags string and set the bits in the mask for
5463 // the relevant and valid flags allowed for cpsr and spsr.
5464 for (char Flag : Flags) {
5465 int FlagVal;
5466 switch (Flag) {
5467 case 'c':
5468 FlagVal = 0x1;
5469 break;
5470 case 'x':
5471 FlagVal = 0x2;
5472 break;
5473 case 's':
5474 FlagVal = 0x4;
5475 break;
5476 case 'f':
5477 FlagVal = 0x8;
5478 break;
5479 default:
5480 FlagVal = 0;
5481 }
5482
5483 // This avoids allowing strings where the same flag bit appears twice.
5484 if (!FlagVal || (Mask & FlagVal))
5485 return -1;
5486 Mask |= FlagVal;
5487 }
5488
5489 // If the register is spsr then we need to set the R bit.
5490 if (Reg == "spsr")
5491 Mask |= 0x10;
5492
5493 return Mask;
5494}
5495
5496// Lower the read_register intrinsic to ARM specific DAG nodes
5497// using the supplied metadata string to select the instruction node to use
5498// and the registers/masks to construct as operands for the node.
5499bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5500 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5501 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5502 bool IsThumb2 = Subtarget->isThumb2();
5503 SDLoc DL(N);
5504
5505 std::vector<SDValue> Ops;
5506 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5507
5508 if (!Ops.empty()) {
5509 // If the special register string was constructed of fields (as defined
5510 // in the ACLE) then need to lower to MRC node (32 bit) or
5511 // MRRC node(64 bit), we can make the distinction based on the number of
5512 // operands we have.
5513 unsigned Opcode;
5514 SmallVector<EVT, 3> ResTypes;
5515 if (Ops.size() == 5){
5516 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5517 ResTypes.append({ MVT::i32, MVT::Other });
5518 } else {
5519 assert(Ops.size() == 3 &&
5520 "Invalid number of fields in special register string.");
5521 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5522 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5523 }
5524
5525 Ops.push_back(getAL(CurDAG, DL));
5526 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5527 Ops.push_back(N->getOperand(0));
5528 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5529 return true;
5530 }
5531
5532 std::string SpecialReg = RegString->getString().lower();
5533
5534 int BankedReg = getBankedRegisterMask(SpecialReg);
5535 if (BankedReg != -1) {
5536 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5537 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5538 N->getOperand(0) };
5539 ReplaceNode(
5540 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5541 DL, MVT::i32, MVT::Other, Ops));
5542 return true;
5543 }
5544
5545 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5546 // corresponding to the register that is being read from. So we switch on the
5547 // string to find which opcode we need to use.
5548 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5549 .Case("fpscr", ARM::VMRS)
5550 .Case("fpexc", ARM::VMRS_FPEXC)
5551 .Case("fpsid", ARM::VMRS_FPSID)
5552 .Case("mvfr0", ARM::VMRS_MVFR0)
5553 .Case("mvfr1", ARM::VMRS_MVFR1)
5554 .Case("mvfr2", ARM::VMRS_MVFR2)
5555 .Case("fpinst", ARM::VMRS_FPINST)
5556 .Case("fpinst2", ARM::VMRS_FPINST2)
5557 .Default(0);
5558
5559 // If an opcode was found then we can lower the read to a VFP instruction.
5560 if (Opcode) {
5561 if (!Subtarget->hasVFP2Base())
5562 return false;
5563 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5564 return false;
5565
5566 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5567 N->getOperand(0) };
5568 ReplaceNode(N,
5569 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5570 return true;
5571 }
5572
5573 // If the target is M Class then need to validate that the register string
5574 // is an acceptable value, so check that a mask can be constructed from the
5575 // string.
5576 if (Subtarget->isMClass()) {
5577 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5578 if (SYSmValue == -1)
5579 return false;
5580
5581 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5582 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5583 N->getOperand(0) };
5584 ReplaceNode(
5585 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5586 return true;
5587 }
5588
5589 // Here we know the target is not M Class so we need to check if it is one
5590 // of the remaining possible values which are apsr, cpsr or spsr.
5591 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5592 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5593 N->getOperand(0) };
5594 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5595 DL, MVT::i32, MVT::Other, Ops));
5596 return true;
5597 }
5598
5599 if (SpecialReg == "spsr") {
5600 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5601 N->getOperand(0) };
5602 ReplaceNode(
5603 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5604 MVT::i32, MVT::Other, Ops));
5605 return true;
5606 }
5607
5608 return false;
5609}
5610
5611// Lower the write_register intrinsic to ARM specific DAG nodes
5612// using the supplied metadata string to select the instruction node to use
5613// and the registers/masks to use in the nodes
5614bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5615 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5616 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5617 bool IsThumb2 = Subtarget->isThumb2();
5618 SDLoc DL(N);
5619
5620 std::vector<SDValue> Ops;
5621 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5622
5623 if (!Ops.empty()) {
5624 // If the special register string was constructed of fields (as defined
5625 // in the ACLE) then need to lower to MCR node (32 bit) or
5626 // MCRR node(64 bit), we can make the distinction based on the number of
5627 // operands we have.
5628 unsigned Opcode;
5629 if (Ops.size() == 5) {
5630 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5631 Ops.insert(Ops.begin()+2, N->getOperand(2));
5632 } else {
5633 assert(Ops.size() == 3 &&
5634 "Invalid number of fields in special register string.");
5635 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5636 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5637 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5638 }
5639
5640 Ops.push_back(getAL(CurDAG, DL));
5641 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5642 Ops.push_back(N->getOperand(0));
5643
5644 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5645 return true;
5646 }
5647
5648 std::string SpecialReg = RegString->getString().lower();
5649 int BankedReg = getBankedRegisterMask(SpecialReg);
5650 if (BankedReg != -1) {
5651 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5652 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5653 N->getOperand(0) };
5654 ReplaceNode(
5655 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5656 DL, MVT::Other, Ops));
5657 return true;
5658 }
5659
5660 // The VFP registers are written to by creating SelectionDAG nodes with
5661 // opcodes corresponding to the register that is being written. So we switch
5662 // on the string to find which opcode we need to use.
5663 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5664 .Case("fpscr", ARM::VMSR)
5665 .Case("fpexc", ARM::VMSR_FPEXC)
5666 .Case("fpsid", ARM::VMSR_FPSID)
5667 .Case("fpinst", ARM::VMSR_FPINST)
5668 .Case("fpinst2", ARM::VMSR_FPINST2)
5669 .Default(0);
5670
5671 if (Opcode) {
5672 if (!Subtarget->hasVFP2Base())
5673 return false;
5674 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5675 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5676 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5677 return true;
5678 }
5679
5680 std::pair<StringRef, StringRef> Fields;
5681 Fields = StringRef(SpecialReg).rsplit('_');
5682 std::string Reg = Fields.first.str();
5683 StringRef Flags = Fields.second;
5684
5685 // If the target was M Class then need to validate the special register value
5686 // and retrieve the mask for use in the instruction node.
5687 if (Subtarget->isMClass()) {
5688 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5689 if (SYSmValue == -1)
5690 return false;
5691
5692 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5693 N->getOperand(2), getAL(CurDAG, DL),
5694 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5695 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5696 return true;
5697 }
5698
5699 // We then check to see if a valid mask can be constructed for one of the
5700 // register string values permitted for the A and R class cores. These values
5701 // are apsr, spsr and cpsr; these are also valid on older cores.
5702 int Mask = getARClassRegisterMask(Reg, Flags);
5703 if (Mask != -1) {
5704 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5705 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5706 N->getOperand(0) };
5707 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5708 DL, MVT::Other, Ops));
5709 return true;
5710 }
5711
5712 return false;
5713}
5714
5715bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5716 std::vector<SDValue> AsmNodeOperands;
5718 bool Changed = false;
5719 unsigned NumOps = N->getNumOperands();
5720
5721 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5722 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5723 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5724 // respectively. Since there is no constraint to explicitly specify a
5725 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5726 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5727 // them into a GPRPair.
5728
5729 SDLoc dl(N);
5730 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5731
5732 SmallVector<bool, 8> OpChanged;
5733 // Glue node will be appended late.
5734 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5735 SDValue op = N->getOperand(i);
5736 AsmNodeOperands.push_back(op);
5737
5739 continue;
5740
5741 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5742 Flag = InlineAsm::Flag(C->getZExtValue());
5743 else
5744 continue;
5745
5746 // Immediate operands to inline asm in the SelectionDAG are modeled with
5747 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5748 // the second is a constant with the value of the immediate. If we get here
5749 // and we have a Kind::Imm, skip the next operand, and continue.
5750 if (Flag.isImmKind()) {
5751 SDValue op = N->getOperand(++i);
5752 AsmNodeOperands.push_back(op);
5753 continue;
5754 }
5755
5756 const unsigned NumRegs = Flag.getNumOperandRegisters();
5757 if (NumRegs)
5758 OpChanged.push_back(false);
5759
5760 unsigned DefIdx = 0;
5761 bool IsTiedToChangedOp = false;
5762 // If it's a use that is tied with a previous def, it has no
5763 // reg class constraint.
5764 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5765 IsTiedToChangedOp = OpChanged[DefIdx];
5766
5767 // Memory operands to inline asm in the SelectionDAG are modeled with two
5768 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5769 // operand. If we get here and we have a Kind::Mem, skip the next operand
5770 // (so it doesn't get misinterpreted), and continue. We do this here because
5771 // it's important to update the OpChanged array correctly before moving on.
5772 if (Flag.isMemKind()) {
5773 SDValue op = N->getOperand(++i);
5774 AsmNodeOperands.push_back(op);
5775 continue;
5776 }
5777
5778 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5779 !Flag.isRegDefEarlyClobberKind())
5780 continue;
5781
5782 unsigned RC;
5783 const bool HasRC = Flag.hasRegClassConstraint(RC);
5784 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5785 || NumRegs != 2)
5786 continue;
5787
5788 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5789 SDValue V0 = N->getOperand(i+1);
5790 SDValue V1 = N->getOperand(i+2);
5791 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5792 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5793 SDValue PairedReg;
5795
5796 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5797 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5798 // the original GPRs.
5799
5800 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5801 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5802 SDValue Chain = SDValue(N,0);
5803
5804 SDNode *GU = N->getGluedUser();
5805 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5806 Chain.getValue(1));
5807
5808 // Extract values from a GPRPair reg and copy to the original GPR reg.
5809 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5810 RegCopy);
5811 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5812 RegCopy);
5813 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5814 RegCopy.getValue(1));
5815 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5816
5817 // Update the original glue user.
5818 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5819 Ops.push_back(T1.getValue(1));
5820 CurDAG->UpdateNodeOperands(GU, Ops);
5821 } else {
5822 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5823 // GPRPair and then pass the GPRPair to the inline asm.
5824 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5825
5826 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5827 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5828 Chain.getValue(1));
5829 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5830 T0.getValue(1));
5831 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5832
5833 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5834 // i32 VRs of inline asm with it.
5835 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5836 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5837 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5838
5839 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5840 Glue = Chain.getValue(1);
5841 }
5842
5843 Changed = true;
5844
5845 if(PairedReg.getNode()) {
5846 OpChanged[OpChanged.size() -1 ] = true;
5847 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5848 if (IsTiedToChangedOp)
5849 Flag.setMatchingOp(DefIdx);
5850 else
5851 Flag.setRegClass(ARM::GPRPairRegClassID);
5852 // Replace the current flag.
5853 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5854 Flag, dl, MVT::i32);
5855 // Add the new register node and skip the original two GPRs.
5856 AsmNodeOperands.push_back(PairedReg);
5857 // Skip the next two GPRs.
5858 i += 2;
5859 }
5860 }
5861
5862 if (Glue.getNode())
5863 AsmNodeOperands.push_back(Glue);
5864 if (!Changed)
5865 return false;
5866
5867 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5868 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5869 New->setNodeId(-1);
5870 ReplaceNode(N, New.getNode());
5871 return true;
5872}
5873
5874bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5875 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5876 std::vector<SDValue> &OutOps) {
5877 switch(ConstraintID) {
5878 default:
5879 llvm_unreachable("Unexpected asm memory constraint");
5880 case InlineAsm::ConstraintCode::m:
5881 case InlineAsm::ConstraintCode::o:
5882 case InlineAsm::ConstraintCode::Q:
5883 case InlineAsm::ConstraintCode::Um:
5884 case InlineAsm::ConstraintCode::Un:
5885 case InlineAsm::ConstraintCode::Uq:
5886 case InlineAsm::ConstraintCode::Us:
5887 case InlineAsm::ConstraintCode::Ut:
5888 case InlineAsm::ConstraintCode::Uv:
5889 case InlineAsm::ConstraintCode::Uy:
5890 // Require the address to be in a register. That is safe for all ARM
5891 // variants and it is hard to do anything much smarter without knowing
5892 // how the operand is used.
5893 OutOps.push_back(Op);
5894 return false;
5895 }
5896 return true;
5897}
5898
5899/// createARMISelDag - This pass converts a legalized DAG into a
5900/// ARM-specific DAG, ready for instruction scheduling.
5901///
5903 CodeGenOptLevel OptLevel) {
5904 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5905}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
#define PASS_NAME
#define DEBUG_TYPE
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define op(i)
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
Class for arbitrary precision integers.
Definition: APInt.h:78
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
bool isSwift() const
Definition: ARMSubtarget.h:257
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isLikeA9() const
Definition: ARMSubtarget.h:260
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isLittle() const
Definition: ARMSubtarget.h:407
bool isMClass() const
Definition: ARMSubtarget.h:366
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
Base class for LoadSDNode and StoreSDNode.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
An instruction for reading from memory.
Definition: Instructions.h:174
This class is used to represent ISD::LOAD nodes.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MLOAD node.
This is an abstract virtual class for memory operations.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
std::string lower() const
Definition: StringRef.cpp:111
std::pair< StringRef, StringRef > rsplit(StringRef Separator) const
Split into two substrings around the last occurrence of a separator string.
Definition: StringRef.h:718
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ FrameIndex
Definition: ISDOpcodes.h:80
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:906
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1165
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1162
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ NearestTiesToEven
roundTiesToEven.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:199
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const