LLVM 20.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/Constants.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115 SDValue &Offset, SDValue &Opc);
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
122 SDValue &Offset, SDValue &Opc);
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 // Select special operations if node forms integer ABS pattern
310 bool tryABSOp(SDNode *N);
311
312 bool tryReadRegister(SDNode *N);
313 bool tryWriteRegister(SDNode *N);
314
315 bool tryInlineAsm(SDNode *N);
316
317 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
318
319 void SelectCMP_SWAP(SDNode *N);
320
321 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
322 /// inline asm expressions.
324 InlineAsm::ConstraintCode ConstraintID,
325 std::vector<SDValue> &OutOps) override;
326
327 // Form pairs of consecutive R, S, D, or Q registers.
329 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
330 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
331 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
332
333 // Form sequences of 4 consecutive S, D, or Q registers.
334 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
337
338 // Get the alignment operand for a NEON VLD or VST instruction.
339 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
340 bool is64BitVector);
341
342 /// Checks if N is a multiplication by a constant where we can extract out a
343 /// power of two from the constant so that it can be used in a shift, but only
344 /// if it simplifies the materialization of the constant. Returns true if it
345 /// is, and assigns to PowerOfTwo the power of two that should be extracted
346 /// out and to NewMulConst the new constant to be multiplied by.
347 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
348 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
349
350 /// Replace N with M in CurDAG, in a way that also ensures that M gets
351 /// selected when N would have been selected.
352 void replaceDAGValue(const SDValue &N, SDValue M);
353};
354
355class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
356public:
357 static char ID;
358 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
360 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
361};
362}
363
364char ARMDAGToDAGISelLegacy::ID = 0;
365
366INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
367
368/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
369/// operand. If so Imm will receive the 32-bit value.
370static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
371 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
372 Imm = N->getAsZExtVal();
373 return true;
374 }
375 return false;
376}
377
378// isInt32Immediate - This method tests to see if a constant operand.
379// If so Imm will receive the 32 bit value.
380static bool isInt32Immediate(SDValue N, unsigned &Imm) {
381 return isInt32Immediate(N.getNode(), Imm);
382}
383
384// isOpcWithIntImmediate - This method tests to see if the node is a specific
385// opcode and that it has a immediate integer right operand.
386// If so Imm will receive the 32 bit value.
387static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
388 return N->getOpcode() == Opc &&
389 isInt32Immediate(N->getOperand(1).getNode(), Imm);
390}
391
392/// Check whether a particular node is a constant value representable as
393/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
394///
395/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
396static bool isScaledConstantInRange(SDValue Node, int Scale,
397 int RangeMin, int RangeMax,
398 int &ScaledConstant) {
399 assert(Scale > 0 && "Invalid scale!");
400
401 // Check that this is a constant.
402 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
403 if (!C)
404 return false;
405
406 ScaledConstant = (int) C->getZExtValue();
407 if ((ScaledConstant % Scale) != 0)
408 return false;
409
410 ScaledConstant /= Scale;
411 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
412}
413
414void ARMDAGToDAGISel::PreprocessISelDAG() {
415 if (!Subtarget->hasV6T2Ops())
416 return;
417
418 bool isThumb2 = Subtarget->isThumb();
419 // We use make_early_inc_range to avoid invalidation issues.
420 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
421 if (N.getOpcode() != ISD::ADD)
422 continue;
423
424 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
425 // leading zeros, followed by consecutive set bits, followed by 1 or 2
426 // trailing zeros, e.g. 1020.
427 // Transform the expression to
428 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
429 // of trailing zeros of c2. The left shift would be folded as an shifter
430 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
431 // node (UBFX).
432
433 SDValue N0 = N.getOperand(0);
434 SDValue N1 = N.getOperand(1);
435 unsigned And_imm = 0;
436 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
437 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
438 std::swap(N0, N1);
439 }
440 if (!And_imm)
441 continue;
442
443 // Check if the AND mask is an immediate of the form: 000.....1111111100
444 unsigned TZ = llvm::countr_zero(And_imm);
445 if (TZ != 1 && TZ != 2)
446 // Be conservative here. Shifter operands aren't always free. e.g. On
447 // Swift, left shifter operand of 1 / 2 for free but others are not.
448 // e.g.
449 // ubfx r3, r1, #16, #8
450 // ldr.w r3, [r0, r3, lsl #2]
451 // vs.
452 // mov.w r9, #1020
453 // and.w r2, r9, r1, lsr #14
454 // ldr r2, [r0, r2]
455 continue;
456 And_imm >>= TZ;
457 if (And_imm & (And_imm + 1))
458 continue;
459
460 // Look for (and (srl X, c1), c2).
461 SDValue Srl = N1.getOperand(0);
462 unsigned Srl_imm = 0;
463 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
464 (Srl_imm <= 2))
465 continue;
466
467 // Make sure first operand is not a shifter operand which would prevent
468 // folding of the left shift.
469 SDValue CPTmp0;
470 SDValue CPTmp1;
471 SDValue CPTmp2;
472 if (isThumb2) {
473 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
474 continue;
475 } else {
476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
477 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
478 continue;
479 }
480
481 // Now make the transformation.
482 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
483 Srl.getOperand(0),
484 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
485 MVT::i32));
486 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
487 Srl,
488 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
489 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
490 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
491 CurDAG->UpdateNodeOperands(&N, N0, N1);
492 }
493}
494
495/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
496/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
497/// least on current ARM implementations) which should be avoidded.
498bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
499 if (OptLevel == CodeGenOptLevel::None)
500 return true;
501
502 if (!Subtarget->hasVMLxHazards())
503 return true;
504
505 if (!N->hasOneUse())
506 return false;
507
508 SDNode *User = *N->user_begin();
509 if (User->getOpcode() == ISD::CopyToReg)
510 return true;
511 if (User->isMachineOpcode()) {
512 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
513 CurDAG->getSubtarget().getInstrInfo());
514
515 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
516 if (MCID.mayStore())
517 return true;
518 unsigned Opcode = MCID.getOpcode();
519 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
520 return true;
521 // vmlx feeding into another vmlx. We actually want to unfold
522 // the use later in the MLxExpansion pass. e.g.
523 // vmla
524 // vmla (stall 8 cycles)
525 //
526 // vmul (5 cycles)
527 // vadd (5 cycles)
528 // vmla
529 // This adds up to about 18 - 19 cycles.
530 //
531 // vmla
532 // vmul (stall 4 cycles)
533 // vadd adds up to about 14 cycles.
534 return TII->isFpMLxInstruction(Opcode);
535 }
536
537 return false;
538}
539
540bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
541 ARM_AM::ShiftOpc ShOpcVal,
542 unsigned ShAmt) {
543 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
544 return true;
545 if (Shift.hasOneUse())
546 return true;
547 // R << 2 is free.
548 return ShOpcVal == ARM_AM::lsl &&
549 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
550}
551
552bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
553 unsigned MaxShift,
554 unsigned &PowerOfTwo,
555 SDValue &NewMulConst) const {
556 assert(N.getOpcode() == ISD::MUL);
557 assert(MaxShift > 0);
558
559 // If the multiply is used in more than one place then changing the constant
560 // will make other uses incorrect, so don't.
561 if (!N.hasOneUse()) return false;
562 // Check if the multiply is by a constant
563 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
564 if (!MulConst) return false;
565 // If the constant is used in more than one place then modifying it will mean
566 // we need to materialize two constants instead of one, which is a bad idea.
567 if (!MulConst->hasOneUse()) return false;
568 unsigned MulConstVal = MulConst->getZExtValue();
569 if (MulConstVal == 0) return false;
570
571 // Find the largest power of 2 that MulConstVal is a multiple of
572 PowerOfTwo = MaxShift;
573 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
574 --PowerOfTwo;
575 if (PowerOfTwo == 0) return false;
576 }
577
578 // Only optimise if the new cost is better
579 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
580 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
581 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
582 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
583 return NewCost < OldCost;
584}
585
586void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
587 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
588 ReplaceUses(N, M);
589}
590
591bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
592 SDValue &BaseReg,
593 SDValue &Opc,
594 bool CheckProfitability) {
596 return false;
597
598 // If N is a multiply-by-constant and it's profitable to extract a shift and
599 // use it in a shifted operand do so.
600 if (N.getOpcode() == ISD::MUL) {
601 unsigned PowerOfTwo = 0;
602 SDValue NewMulConst;
603 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
604 HandleSDNode Handle(N);
605 SDLoc Loc(N);
606 replaceDAGValue(N.getOperand(1), NewMulConst);
607 BaseReg = Handle.getValue();
608 Opc = CurDAG->getTargetConstant(
609 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
610 return true;
611 }
612 }
613
614 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
615
616 // Don't match base register only case. That is matched to a separate
617 // lower complexity pattern with explicit register operand.
618 if (ShOpcVal == ARM_AM::no_shift) return false;
619
620 BaseReg = N.getOperand(0);
621 unsigned ShImmVal = 0;
622 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
623 if (!RHS) return false;
624 ShImmVal = RHS->getZExtValue() & 31;
625 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
626 SDLoc(N), MVT::i32);
627 return true;
628}
629
630bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
631 SDValue &BaseReg,
632 SDValue &ShReg,
633 SDValue &Opc,
634 bool CheckProfitability) {
636 return false;
637
638 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
639
640 // Don't match base register only case. That is matched to a separate
641 // lower complexity pattern with explicit register operand.
642 if (ShOpcVal == ARM_AM::no_shift) return false;
643
644 BaseReg = N.getOperand(0);
645 unsigned ShImmVal = 0;
646 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
647 if (RHS) return false;
648
649 ShReg = N.getOperand(1);
650 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
651 return false;
652 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
653 SDLoc(N), MVT::i32);
654 return true;
655}
656
657// Determine whether an ISD::OR's operands are suitable to turn the operation
658// into an addition, which often has more compact encodings.
659bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
660 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
661 Out = N;
662 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
663}
664
665
666bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
667 SDValue &Base,
668 SDValue &OffImm) {
669 // Match simple R + imm12 operands.
670
671 // Base only.
672 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
673 !CurDAG->isBaseWithConstantOffset(N)) {
674 if (N.getOpcode() == ISD::FrameIndex) {
675 // Match frame index.
676 int FI = cast<FrameIndexSDNode>(N)->getIndex();
677 Base = CurDAG->getTargetFrameIndex(
678 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
679 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
680 return true;
681 }
682
683 if (N.getOpcode() == ARMISD::Wrapper &&
684 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
685 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
686 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
687 Base = N.getOperand(0);
688 } else
689 Base = N;
690 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
691 return true;
692 }
693
694 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
695 int RHSC = (int)RHS->getSExtValue();
696 if (N.getOpcode() == ISD::SUB)
697 RHSC = -RHSC;
698
699 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
700 Base = N.getOperand(0);
701 if (Base.getOpcode() == ISD::FrameIndex) {
702 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
703 Base = CurDAG->getTargetFrameIndex(
704 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
705 }
706 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
707 return true;
708 }
709 }
710
711 // Base only.
712 Base = N;
713 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
714 return true;
715}
716
717
718
719bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
720 SDValue &Opc) {
721 if (N.getOpcode() == ISD::MUL &&
722 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
723 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
724 // X * [3,5,9] -> X + X * [2,4,8] etc.
725 int RHSC = (int)RHS->getZExtValue();
726 if (RHSC & 1) {
727 RHSC = RHSC & ~1;
729 if (RHSC < 0) {
731 RHSC = - RHSC;
732 }
733 if (isPowerOf2_32(RHSC)) {
734 unsigned ShAmt = Log2_32(RHSC);
735 Base = Offset = N.getOperand(0);
736 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
738 SDLoc(N), MVT::i32);
739 return true;
740 }
741 }
742 }
743 }
744
745 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
746 // ISD::OR that is equivalent to an ISD::ADD.
747 !CurDAG->isBaseWithConstantOffset(N))
748 return false;
749
750 // Leave simple R +/- imm12 operands for LDRi12
751 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
752 int RHSC;
753 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
754 -0x1000+1, 0x1000, RHSC)) // 12 bits.
755 return false;
756 }
757
758 // Otherwise this is R +/- [possibly shifted] R.
760 ARM_AM::ShiftOpc ShOpcVal =
761 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
762 unsigned ShAmt = 0;
763
764 Base = N.getOperand(0);
765 Offset = N.getOperand(1);
766
767 if (ShOpcVal != ARM_AM::no_shift) {
768 // Check to see if the RHS of the shift is a constant, if not, we can't fold
769 // it.
770 if (ConstantSDNode *Sh =
771 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
772 ShAmt = Sh->getZExtValue();
773 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
774 Offset = N.getOperand(1).getOperand(0);
775 else {
776 ShAmt = 0;
777 ShOpcVal = ARM_AM::no_shift;
778 }
779 } else {
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 }
783
784 // Try matching (R shl C) + (R).
785 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
786 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
787 N.getOperand(0).hasOneUse())) {
788 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
789 if (ShOpcVal != ARM_AM::no_shift) {
790 // Check to see if the RHS of the shift is a constant, if not, we can't
791 // fold it.
792 if (ConstantSDNode *Sh =
793 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
794 ShAmt = Sh->getZExtValue();
795 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
796 Offset = N.getOperand(0).getOperand(0);
797 Base = N.getOperand(1);
798 } else {
799 ShAmt = 0;
800 ShOpcVal = ARM_AM::no_shift;
801 }
802 } else {
803 ShOpcVal = ARM_AM::no_shift;
804 }
805 }
806 }
807
808 // If Offset is a multiply-by-constant and it's profitable to extract a shift
809 // and use it in a shifted operand do so.
810 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
811 unsigned PowerOfTwo = 0;
812 SDValue NewMulConst;
813 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
814 HandleSDNode Handle(Offset);
815 replaceDAGValue(Offset.getOperand(1), NewMulConst);
816 Offset = Handle.getValue();
817 ShAmt = PowerOfTwo;
818 ShOpcVal = ARM_AM::lsl;
819 }
820 }
821
822 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
823 SDLoc(N), MVT::i32);
824 return true;
825}
826
827bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
828 SDValue &Offset, SDValue &Opc) {
829 unsigned Opcode = Op->getOpcode();
830 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
831 ? cast<LoadSDNode>(Op)->getAddressingMode()
832 : cast<StoreSDNode>(Op)->getAddressingMode();
835 int Val;
836 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
837 return false;
838
839 Offset = N;
840 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
841 unsigned ShAmt = 0;
842 if (ShOpcVal != ARM_AM::no_shift) {
843 // Check to see if the RHS of the shift is a constant, if not, we can't fold
844 // it.
845 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
846 ShAmt = Sh->getZExtValue();
847 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
848 Offset = N.getOperand(0);
849 else {
850 ShAmt = 0;
851 ShOpcVal = ARM_AM::no_shift;
852 }
853 } else {
854 ShOpcVal = ARM_AM::no_shift;
855 }
856 }
857
858 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
859 SDLoc(N), MVT::i32);
860 return true;
861}
862
863bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
864 SDValue &Offset, SDValue &Opc) {
865 unsigned Opcode = Op->getOpcode();
866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867 ? cast<LoadSDNode>(Op)->getAddressingMode()
868 : cast<StoreSDNode>(Op)->getAddressingMode();
871 int Val;
872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
873 if (AddSub == ARM_AM::sub) Val *= -1;
874 Offset = CurDAG->getRegister(0, MVT::i32);
875 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32);
876 return true;
877 }
878
879 return false;
880}
881
882
883bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
884 SDValue &Offset, SDValue &Opc) {
885 unsigned Opcode = Op->getOpcode();
886 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
887 ? cast<LoadSDNode>(Op)->getAddressingMode()
888 : cast<StoreSDNode>(Op)->getAddressingMode();
891 int Val;
892 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
893 Offset = CurDAG->getRegister(0, MVT::i32);
894 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
896 SDLoc(Op), MVT::i32);
897 return true;
898 }
899
900 return false;
901}
902
903bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
904 Base = N;
905 return true;
906}
907
908bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
910 SDValue &Opc) {
911 if (N.getOpcode() == ISD::SUB) {
912 // X - C is canonicalize to X + -C, no need to handle it here.
913 Base = N.getOperand(0);
914 Offset = N.getOperand(1);
915 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
916 MVT::i32);
917 return true;
918 }
919
920 if (!CurDAG->isBaseWithConstantOffset(N)) {
921 Base = N;
922 if (N.getOpcode() == ISD::FrameIndex) {
923 int FI = cast<FrameIndexSDNode>(N)->getIndex();
924 Base = CurDAG->getTargetFrameIndex(
925 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
926 }
927 Offset = CurDAG->getRegister(0, MVT::i32);
928 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
929 MVT::i32);
930 return true;
931 }
932
933 // If the RHS is +/- imm8, fold into addr mode.
934 int RHSC;
935 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
936 -256 + 1, 256, RHSC)) { // 8 bits.
937 Base = N.getOperand(0);
938 if (Base.getOpcode() == ISD::FrameIndex) {
939 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
940 Base = CurDAG->getTargetFrameIndex(
941 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
942 }
943 Offset = CurDAG->getRegister(0, MVT::i32);
944
946 if (RHSC < 0) {
948 RHSC = -RHSC;
949 }
950 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
951 MVT::i32);
952 return true;
953 }
954
955 Base = N.getOperand(0);
956 Offset = N.getOperand(1);
957 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
958 MVT::i32);
959 return true;
960}
961
962bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
963 SDValue &Offset, SDValue &Opc) {
964 unsigned Opcode = Op->getOpcode();
965 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
966 ? cast<LoadSDNode>(Op)->getAddressingMode()
967 : cast<StoreSDNode>(Op)->getAddressingMode();
970 int Val;
971 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
972 Offset = CurDAG->getRegister(0, MVT::i32);
973 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
974 MVT::i32);
975 return true;
976 }
977
978 Offset = N;
979 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
980 MVT::i32);
981 return true;
982}
983
984bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
985 bool FP16) {
986 if (!CurDAG->isBaseWithConstantOffset(N)) {
987 Base = N;
988 if (N.getOpcode() == ISD::FrameIndex) {
989 int FI = cast<FrameIndexSDNode>(N)->getIndex();
990 Base = CurDAG->getTargetFrameIndex(
991 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
992 } else if (N.getOpcode() == ARMISD::Wrapper &&
993 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
994 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
995 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
996 Base = N.getOperand(0);
997 }
998 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
999 SDLoc(N), MVT::i32);
1000 return true;
1001 }
1002
1003 // If the RHS is +/- imm8, fold into addr mode.
1004 int RHSC;
1005 const int Scale = FP16 ? 2 : 4;
1006
1007 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1008 Base = N.getOperand(0);
1009 if (Base.getOpcode() == ISD::FrameIndex) {
1010 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1011 Base = CurDAG->getTargetFrameIndex(
1012 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1013 }
1014
1016 if (RHSC < 0) {
1018 RHSC = -RHSC;
1019 }
1020
1021 if (FP16)
1022 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1023 SDLoc(N), MVT::i32);
1024 else
1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1026 SDLoc(N), MVT::i32);
1027
1028 return true;
1029 }
1030
1031 Base = N;
1032
1033 if (FP16)
1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1035 SDLoc(N), MVT::i32);
1036 else
1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1038 SDLoc(N), MVT::i32);
1039
1040 return true;
1041}
1042
1043bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1045 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1046}
1047
1048bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1050 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1051}
1052
1053bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1054 SDValue &Align) {
1055 Addr = N;
1056
1057 unsigned Alignment = 0;
1058
1059 MemSDNode *MemN = cast<MemSDNode>(Parent);
1060
1061 if (isa<LSBaseSDNode>(MemN) ||
1062 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1063 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1064 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1065 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1066 // The maximum alignment is equal to the memory size being referenced.
1067 llvm::Align MMOAlign = MemN->getAlign();
1068 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1069 if (MMOAlign.value() >= MemSize && MemSize > 1)
1070 Alignment = MemSize;
1071 } else {
1072 // All other uses of addrmode6 are for intrinsics. For now just record
1073 // the raw alignment value; it will be refined later based on the legal
1074 // alignment operands for the intrinsic.
1075 Alignment = MemN->getAlign().value();
1076 }
1077
1078 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1079 return true;
1080}
1081
1082bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1083 SDValue &Offset) {
1084 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1086 if (AM != ISD::POST_INC)
1087 return false;
1088 Offset = N;
1089 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1090 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1091 Offset = CurDAG->getRegister(0, MVT::i32);
1092 }
1093 return true;
1094}
1095
1096bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1097 SDValue &Offset, SDValue &Label) {
1098 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1099 Offset = N.getOperand(0);
1100 SDValue N1 = N.getOperand(1);
1101 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1102 return true;
1103 }
1104
1105 return false;
1106}
1107
1108
1109//===----------------------------------------------------------------------===//
1110// Thumb Addressing Modes
1111//===----------------------------------------------------------------------===//
1112
1114 // Negative numbers are difficult to materialise in thumb1. If we are
1115 // selecting the add of a negative, instead try to select ri with a zero
1116 // offset, so create the add node directly which will become a sub.
1117 if (N.getOpcode() != ISD::ADD)
1118 return false;
1119
1120 // Look for an imm which is not legal for ld/st, but is legal for sub.
1121 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1122 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1123
1124 return false;
1125}
1126
1127bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1128 SDValue &Offset) {
1129 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1130 if (!isNullConstant(N))
1131 return false;
1132
1133 Base = Offset = N;
1134 return true;
1135 }
1136
1137 Base = N.getOperand(0);
1138 Offset = N.getOperand(1);
1139 return true;
1140}
1141
1142bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1143 SDValue &Offset) {
1145 return false; // Select ri instead
1146 return SelectThumbAddrModeRRSext(N, Base, Offset);
1147}
1148
1149bool
1150ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1151 SDValue &Base, SDValue &OffImm) {
1153 Base = N;
1154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1155 return true;
1156 }
1157
1158 if (!CurDAG->isBaseWithConstantOffset(N)) {
1159 if (N.getOpcode() == ISD::ADD) {
1160 return false; // We want to select register offset instead
1161 } else if (N.getOpcode() == ARMISD::Wrapper &&
1162 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1163 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1164 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1166 Base = N.getOperand(0);
1167 } else {
1168 Base = N;
1169 }
1170
1171 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1172 return true;
1173 }
1174
1175 // If the RHS is + imm5 * scale, fold into addr mode.
1176 int RHSC;
1177 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1178 Base = N.getOperand(0);
1179 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1180 return true;
1181 }
1182
1183 // Offset is too large, so use register offset instead.
1184 return false;
1185}
1186
1187bool
1188ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1189 SDValue &OffImm) {
1190 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1191}
1192
1193bool
1194ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1195 SDValue &OffImm) {
1196 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1197}
1198
1199bool
1200ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1201 SDValue &OffImm) {
1202 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1203}
1204
1205bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1206 SDValue &Base, SDValue &OffImm) {
1207 if (N.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1209 // Only multiples of 4 are allowed for the offset, so the frame object
1210 // alignment must be at least 4.
1211 MachineFrameInfo &MFI = MF->getFrameInfo();
1212 if (MFI.getObjectAlign(FI) < Align(4))
1213 MFI.setObjectAlignment(FI, Align(4));
1214 Base = CurDAG->getTargetFrameIndex(
1215 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1216 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1217 return true;
1218 }
1219
1220 if (!CurDAG->isBaseWithConstantOffset(N))
1221 return false;
1222
1223 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1224 // If the RHS is + imm8 * scale, fold into addr mode.
1225 int RHSC;
1226 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1227 Base = N.getOperand(0);
1228 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1229 // Make sure the offset is inside the object, or we might fail to
1230 // allocate an emergency spill slot. (An out-of-range access is UB, but
1231 // it could show up anyway.)
1232 MachineFrameInfo &MFI = MF->getFrameInfo();
1233 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1234 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1235 // indexed by the LHS must be 4-byte aligned.
1236 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1237 MFI.setObjectAlignment(FI, Align(4));
1238 if (MFI.getObjectAlign(FI) >= Align(4)) {
1239 Base = CurDAG->getTargetFrameIndex(
1240 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1241 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1242 return true;
1243 }
1244 }
1245 }
1246 }
1247
1248 return false;
1249}
1250
1251template <unsigned Shift>
1252bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1253 SDValue &OffImm) {
1254 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1255 int RHSC;
1256 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1257 RHSC)) {
1258 Base = N.getOperand(0);
1259 if (N.getOpcode() == ISD::SUB)
1260 RHSC = -RHSC;
1261 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1262 MVT::i32);
1263 return true;
1264 }
1265 }
1266
1267 // Base only.
1268 Base = N;
1269 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1270 return true;
1271}
1272
1273
1274//===----------------------------------------------------------------------===//
1275// Thumb 2 Addressing Modes
1276//===----------------------------------------------------------------------===//
1277
1278
1279bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1280 SDValue &Base, SDValue &OffImm) {
1281 // Match simple R + imm12 operands.
1282
1283 // Base only.
1284 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1285 !CurDAG->isBaseWithConstantOffset(N)) {
1286 if (N.getOpcode() == ISD::FrameIndex) {
1287 // Match frame index.
1288 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1289 Base = CurDAG->getTargetFrameIndex(
1290 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1291 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1292 return true;
1293 }
1294
1295 if (N.getOpcode() == ARMISD::Wrapper &&
1296 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1297 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1298 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1299 Base = N.getOperand(0);
1300 if (Base.getOpcode() == ISD::TargetConstantPool)
1301 return false; // We want to select t2LDRpci instead.
1302 } else
1303 Base = N;
1304 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1305 return true;
1306 }
1307
1308 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1309 if (SelectT2AddrModeImm8(N, Base, OffImm))
1310 // Let t2LDRi8 handle (R - imm8).
1311 return false;
1312
1313 int RHSC = (int)RHS->getZExtValue();
1314 if (N.getOpcode() == ISD::SUB)
1315 RHSC = -RHSC;
1316
1317 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1318 Base = N.getOperand(0);
1319 if (Base.getOpcode() == ISD::FrameIndex) {
1320 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1321 Base = CurDAG->getTargetFrameIndex(
1322 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1323 }
1324 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1325 return true;
1326 }
1327 }
1328
1329 // Base only.
1330 Base = N;
1331 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1332 return true;
1333}
1334
1335template <unsigned Shift>
1336bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1337 SDValue &OffImm) {
1338 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1339 int RHSC;
1340 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1341 Base = N.getOperand(0);
1342 if (Base.getOpcode() == ISD::FrameIndex) {
1343 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1344 Base = CurDAG->getTargetFrameIndex(
1345 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1346 }
1347
1348 if (N.getOpcode() == ISD::SUB)
1349 RHSC = -RHSC;
1350 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1351 MVT::i32);
1352 return true;
1353 }
1354 }
1355
1356 // Base only.
1357 Base = N;
1358 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1359 return true;
1360}
1361
1362bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1363 SDValue &Base, SDValue &OffImm) {
1364 // Match simple R - imm8 operands.
1365 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1366 !CurDAG->isBaseWithConstantOffset(N))
1367 return false;
1368
1369 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1370 int RHSC = (int)RHS->getSExtValue();
1371 if (N.getOpcode() == ISD::SUB)
1372 RHSC = -RHSC;
1373
1374 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1375 Base = N.getOperand(0);
1376 if (Base.getOpcode() == ISD::FrameIndex) {
1377 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1378 Base = CurDAG->getTargetFrameIndex(
1379 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1380 }
1381 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1382 return true;
1383 }
1384 }
1385
1386 return false;
1387}
1388
1389bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1390 SDValue &OffImm){
1391 unsigned Opcode = Op->getOpcode();
1392 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1393 ? cast<LoadSDNode>(Op)->getAddressingMode()
1394 : cast<StoreSDNode>(Op)->getAddressingMode();
1395 int RHSC;
1396 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1397 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1398 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32)
1399 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1400 return true;
1401 }
1402
1403 return false;
1404}
1405
1406template <unsigned Shift>
1407bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1408 SDValue &OffImm) {
1409 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1410 int RHSC;
1411 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1412 RHSC)) {
1413 Base = N.getOperand(0);
1414 if (Base.getOpcode() == ISD::FrameIndex) {
1415 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1416 Base = CurDAG->getTargetFrameIndex(
1417 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1418 }
1419
1420 if (N.getOpcode() == ISD::SUB)
1421 RHSC = -RHSC;
1422 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1423 MVT::i32);
1424 return true;
1425 }
1426 }
1427
1428 // Base only.
1429 Base = N;
1430 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1431 return true;
1432}
1433
1434template <unsigned Shift>
1435bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1436 SDValue &OffImm) {
1437 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1438}
1439
1440bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1441 SDValue &OffImm,
1442 unsigned Shift) {
1443 unsigned Opcode = Op->getOpcode();
1445 switch (Opcode) {
1446 case ISD::LOAD:
1447 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1448 break;
1449 case ISD::STORE:
1450 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::MLOAD:
1453 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1454 break;
1455 case ISD::MSTORE:
1456 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1457 break;
1458 default:
1459 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1460 }
1461
1462 int RHSC;
1463 // 7 bit constant, shifted by Shift.
1464 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1465 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1466 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift),
1467 SDLoc(N), MVT::i32)
1468 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift),
1469 SDLoc(N), MVT::i32);
1470 return true;
1471 }
1472 return false;
1473}
1474
1475template <int Min, int Max>
1476bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1477 int Val;
1478 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1479 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32);
1480 return true;
1481 }
1482 return false;
1483}
1484
1485bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1486 SDValue &Base,
1487 SDValue &OffReg, SDValue &ShImm) {
1488 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1489 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1490 return false;
1491
1492 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1493 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1494 int RHSC = (int)RHS->getZExtValue();
1495 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1496 return false;
1497 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1498 return false;
1499 }
1500
1501 // Look for (R + R) or (R + (R << [1,2,3])).
1502 unsigned ShAmt = 0;
1503 Base = N.getOperand(0);
1504 OffReg = N.getOperand(1);
1505
1506 // Swap if it is ((R << c) + R).
1508 if (ShOpcVal != ARM_AM::lsl) {
1509 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1510 if (ShOpcVal == ARM_AM::lsl)
1511 std::swap(Base, OffReg);
1512 }
1513
1514 if (ShOpcVal == ARM_AM::lsl) {
1515 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1516 // it.
1517 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1518 ShAmt = Sh->getZExtValue();
1519 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1520 OffReg = OffReg.getOperand(0);
1521 else {
1522 ShAmt = 0;
1523 }
1524 }
1525 }
1526
1527 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1528 // and use it in a shifted operand do so.
1529 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1530 unsigned PowerOfTwo = 0;
1531 SDValue NewMulConst;
1532 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1533 HandleSDNode Handle(OffReg);
1534 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1535 OffReg = Handle.getValue();
1536 ShAmt = PowerOfTwo;
1537 }
1538 }
1539
1540 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1541
1542 return true;
1543}
1544
1545bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1546 SDValue &OffImm) {
1547 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1548 // instructions.
1549 Base = N;
1550 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1551
1552 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1553 return true;
1554
1555 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1556 if (!RHS)
1557 return true;
1558
1559 uint32_t RHSC = (int)RHS->getZExtValue();
1560 if (RHSC > 1020 || RHSC % 4 != 0)
1561 return true;
1562
1563 Base = N.getOperand(0);
1564 if (Base.getOpcode() == ISD::FrameIndex) {
1565 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1566 Base = CurDAG->getTargetFrameIndex(
1567 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1568 }
1569
1570 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1571 return true;
1572}
1573
1574//===--------------------------------------------------------------------===//
1575
1576/// getAL - Returns a ARMCC::AL immediate node.
1577static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1578 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1579}
1580
1581void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1582 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1583 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1584}
1585
1586bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1587 LoadSDNode *LD = cast<LoadSDNode>(N);
1588 ISD::MemIndexedMode AM = LD->getAddressingMode();
1589 if (AM == ISD::UNINDEXED)
1590 return false;
1591
1592 EVT LoadedVT = LD->getMemoryVT();
1593 SDValue Offset, AMOpc;
1594 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1595 unsigned Opcode = 0;
1596 bool Match = false;
1597 if (LoadedVT == MVT::i32 && isPre &&
1598 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1599 Opcode = ARM::LDR_PRE_IMM;
1600 Match = true;
1601 } else if (LoadedVT == MVT::i32 && !isPre &&
1602 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1603 Opcode = ARM::LDR_POST_IMM;
1604 Match = true;
1605 } else if (LoadedVT == MVT::i32 &&
1606 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1607 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1608 Match = true;
1609
1610 } else if (LoadedVT == MVT::i16 &&
1611 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1612 Match = true;
1613 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1614 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1615 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1616 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1617 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1618 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1619 Match = true;
1620 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1621 }
1622 } else {
1623 if (isPre &&
1624 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1625 Match = true;
1626 Opcode = ARM::LDRB_PRE_IMM;
1627 } else if (!isPre &&
1628 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1629 Match = true;
1630 Opcode = ARM::LDRB_POST_IMM;
1631 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1632 Match = true;
1633 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1634 }
1635 }
1636 }
1637
1638 if (Match) {
1639 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1640 SDValue Chain = LD->getChain();
1641 SDValue Base = LD->getBasePtr();
1642 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1643 CurDAG->getRegister(0, MVT::i32), Chain };
1644 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1645 MVT::Other, Ops);
1646 transferMemOperands(N, New);
1647 ReplaceNode(N, New);
1648 return true;
1649 } else {
1650 SDValue Chain = LD->getChain();
1651 SDValue Base = LD->getBasePtr();
1652 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1653 CurDAG->getRegister(0, MVT::i32), Chain };
1654 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1655 MVT::Other, Ops);
1656 transferMemOperands(N, New);
1657 ReplaceNode(N, New);
1658 return true;
1659 }
1660 }
1661
1662 return false;
1663}
1664
1665bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1666 LoadSDNode *LD = cast<LoadSDNode>(N);
1667 EVT LoadedVT = LD->getMemoryVT();
1668 ISD::MemIndexedMode AM = LD->getAddressingMode();
1669 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1670 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1671 return false;
1672
1673 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1674 if (!COffs || COffs->getZExtValue() != 4)
1675 return false;
1676
1677 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1678 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1679 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1680 // ISel.
1681 SDValue Chain = LD->getChain();
1682 SDValue Base = LD->getBasePtr();
1683 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1684 CurDAG->getRegister(0, MVT::i32), Chain };
1685 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1686 MVT::i32, MVT::Other, Ops);
1687 transferMemOperands(N, New);
1688 ReplaceNode(N, New);
1689 return true;
1690}
1691
1692bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1693 LoadSDNode *LD = cast<LoadSDNode>(N);
1694 ISD::MemIndexedMode AM = LD->getAddressingMode();
1695 if (AM == ISD::UNINDEXED)
1696 return false;
1697
1698 EVT LoadedVT = LD->getMemoryVT();
1699 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1701 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1702 unsigned Opcode = 0;
1703 bool Match = false;
1704 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1705 switch (LoadedVT.getSimpleVT().SimpleTy) {
1706 case MVT::i32:
1707 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1708 break;
1709 case MVT::i16:
1710 if (isSExtLd)
1711 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1712 else
1713 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1714 break;
1715 case MVT::i8:
1716 case MVT::i1:
1717 if (isSExtLd)
1718 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1719 else
1720 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1721 break;
1722 default:
1723 return false;
1724 }
1725 Match = true;
1726 }
1727
1728 if (Match) {
1729 SDValue Chain = LD->getChain();
1730 SDValue Base = LD->getBasePtr();
1731 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1732 CurDAG->getRegister(0, MVT::i32), Chain };
1733 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1734 MVT::Other, Ops);
1735 transferMemOperands(N, New);
1736 ReplaceNode(N, New);
1737 return true;
1738 }
1739
1740 return false;
1741}
1742
1743bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1744 EVT LoadedVT;
1745 unsigned Opcode = 0;
1746 bool isSExtLd, isPre;
1747 Align Alignment;
1748 ARMVCC::VPTCodes Pred;
1749 SDValue PredReg;
1750 SDValue Chain, Base, Offset;
1751
1752 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1753 ISD::MemIndexedMode AM = LD->getAddressingMode();
1754 if (AM == ISD::UNINDEXED)
1755 return false;
1756 LoadedVT = LD->getMemoryVT();
1757 if (!LoadedVT.isVector())
1758 return false;
1759
1760 Chain = LD->getChain();
1761 Base = LD->getBasePtr();
1762 Offset = LD->getOffset();
1763 Alignment = LD->getAlign();
1764 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1765 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1766 Pred = ARMVCC::None;
1767 PredReg = CurDAG->getRegister(0, MVT::i32);
1768 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1769 ISD::MemIndexedMode AM = LD->getAddressingMode();
1770 if (AM == ISD::UNINDEXED)
1771 return false;
1772 LoadedVT = LD->getMemoryVT();
1773 if (!LoadedVT.isVector())
1774 return false;
1775
1776 Chain = LD->getChain();
1777 Base = LD->getBasePtr();
1778 Offset = LD->getOffset();
1779 Alignment = LD->getAlign();
1780 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1781 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1782 Pred = ARMVCC::Then;
1783 PredReg = LD->getMask();
1784 } else
1785 llvm_unreachable("Expected a Load or a Masked Load!");
1786
1787 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1788 // as opposed to a vldrw.32). This can allow extra addressing modes or
1789 // alignments for what is otherwise an equivalent instruction.
1790 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1791
1792 SDValue NewOffset;
1793 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1794 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1795 if (isSExtLd)
1796 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1797 else
1798 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1799 } else if (LoadedVT == MVT::v8i8 &&
1800 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1801 if (isSExtLd)
1802 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1803 else
1804 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1805 } else if (LoadedVT == MVT::v4i8 &&
1806 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1807 if (isSExtLd)
1808 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1809 else
1810 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1811 } else if (Alignment >= Align(4) &&
1812 (CanChangeType || LoadedVT == MVT::v4i32 ||
1813 LoadedVT == MVT::v4f32) &&
1814 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1815 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1816 else if (Alignment >= Align(2) &&
1817 (CanChangeType || LoadedVT == MVT::v8i16 ||
1818 LoadedVT == MVT::v8f16) &&
1819 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1820 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1821 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1823 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1824 else
1825 return false;
1826
1827 SDValue Ops[] = {Base,
1828 NewOffset,
1829 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1830 PredReg,
1831 CurDAG->getRegister(0, MVT::i32), // tp_reg
1832 Chain};
1833 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1834 N->getValueType(0), MVT::Other, Ops);
1835 transferMemOperands(N, New);
1836 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1837 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1838 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1839 CurDAG->RemoveDeadNode(N);
1840 return true;
1841}
1842
1843/// Form a GPRPair pseudo register from a pair of GPR regs.
1844SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1845 SDLoc dl(V0.getNode());
1846 SDValue RegClass =
1847 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1848 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1849 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1850 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1851 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1852}
1853
1854/// Form a D register from a pair of S registers.
1855SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1856 SDLoc dl(V0.getNode());
1857 SDValue RegClass =
1858 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1859 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1860 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1861 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1862 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1863}
1864
1865/// Form a quad register from a pair of D registers.
1866SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1867 SDLoc dl(V0.getNode());
1868 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1869 MVT::i32);
1870 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1871 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1872 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1873 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1874}
1875
1876/// Form 4 consecutive D registers from a pair of Q registers.
1877SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1878 SDLoc dl(V0.getNode());
1879 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1880 MVT::i32);
1881 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1882 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1883 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1884 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1885}
1886
1887/// Form 4 consecutive S registers.
1888SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1889 SDValue V2, SDValue V3) {
1890 SDLoc dl(V0.getNode());
1891 SDValue RegClass =
1892 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1893 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1894 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1895 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1896 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1897 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1898 V2, SubReg2, V3, SubReg3 };
1899 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1900}
1901
1902/// Form 4 consecutive D registers.
1903SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1904 SDValue V2, SDValue V3) {
1905 SDLoc dl(V0.getNode());
1906 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1907 MVT::i32);
1908 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1909 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1910 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1911 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1912 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1913 V2, SubReg2, V3, SubReg3 };
1914 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1915}
1916
1917/// Form 4 consecutive Q registers.
1918SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1919 SDValue V2, SDValue V3) {
1920 SDLoc dl(V0.getNode());
1921 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1922 MVT::i32);
1923 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1924 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1925 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1926 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1927 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1928 V2, SubReg2, V3, SubReg3 };
1929 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1930}
1931
1932/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1933/// of a NEON VLD or VST instruction. The supported values depend on the
1934/// number of registers being loaded.
1935SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1936 unsigned NumVecs, bool is64BitVector) {
1937 unsigned NumRegs = NumVecs;
1938 if (!is64BitVector && NumVecs < 3)
1939 NumRegs *= 2;
1940
1941 unsigned Alignment = Align->getAsZExtVal();
1942 if (Alignment >= 32 && NumRegs == 4)
1943 Alignment = 32;
1944 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1945 Alignment = 16;
1946 else if (Alignment >= 8)
1947 Alignment = 8;
1948 else
1949 Alignment = 0;
1950
1951 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1952}
1953
1954static bool isVLDfixed(unsigned Opc)
1955{
1956 switch (Opc) {
1957 default: return false;
1958 case ARM::VLD1d8wb_fixed : return true;
1959 case ARM::VLD1d16wb_fixed : return true;
1960 case ARM::VLD1d64Qwb_fixed : return true;
1961 case ARM::VLD1d32wb_fixed : return true;
1962 case ARM::VLD1d64wb_fixed : return true;
1963 case ARM::VLD1d8TPseudoWB_fixed : return true;
1964 case ARM::VLD1d16TPseudoWB_fixed : return true;
1965 case ARM::VLD1d32TPseudoWB_fixed : return true;
1966 case ARM::VLD1d64TPseudoWB_fixed : return true;
1967 case ARM::VLD1d8QPseudoWB_fixed : return true;
1968 case ARM::VLD1d16QPseudoWB_fixed : return true;
1969 case ARM::VLD1d32QPseudoWB_fixed : return true;
1970 case ARM::VLD1d64QPseudoWB_fixed : return true;
1971 case ARM::VLD1q8wb_fixed : return true;
1972 case ARM::VLD1q16wb_fixed : return true;
1973 case ARM::VLD1q32wb_fixed : return true;
1974 case ARM::VLD1q64wb_fixed : return true;
1975 case ARM::VLD1DUPd8wb_fixed : return true;
1976 case ARM::VLD1DUPd16wb_fixed : return true;
1977 case ARM::VLD1DUPd32wb_fixed : return true;
1978 case ARM::VLD1DUPq8wb_fixed : return true;
1979 case ARM::VLD1DUPq16wb_fixed : return true;
1980 case ARM::VLD1DUPq32wb_fixed : return true;
1981 case ARM::VLD2d8wb_fixed : return true;
1982 case ARM::VLD2d16wb_fixed : return true;
1983 case ARM::VLD2d32wb_fixed : return true;
1984 case ARM::VLD2q8PseudoWB_fixed : return true;
1985 case ARM::VLD2q16PseudoWB_fixed : return true;
1986 case ARM::VLD2q32PseudoWB_fixed : return true;
1987 case ARM::VLD2DUPd8wb_fixed : return true;
1988 case ARM::VLD2DUPd16wb_fixed : return true;
1989 case ARM::VLD2DUPd32wb_fixed : return true;
1990 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1991 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1992 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1993 }
1994}
1995
1996static bool isVSTfixed(unsigned Opc)
1997{
1998 switch (Opc) {
1999 default: return false;
2000 case ARM::VST1d8wb_fixed : return true;
2001 case ARM::VST1d16wb_fixed : return true;
2002 case ARM::VST1d32wb_fixed : return true;
2003 case ARM::VST1d64wb_fixed : return true;
2004 case ARM::VST1q8wb_fixed : return true;
2005 case ARM::VST1q16wb_fixed : return true;
2006 case ARM::VST1q32wb_fixed : return true;
2007 case ARM::VST1q64wb_fixed : return true;
2008 case ARM::VST1d8TPseudoWB_fixed : return true;
2009 case ARM::VST1d16TPseudoWB_fixed : return true;
2010 case ARM::VST1d32TPseudoWB_fixed : return true;
2011 case ARM::VST1d64TPseudoWB_fixed : return true;
2012 case ARM::VST1d8QPseudoWB_fixed : return true;
2013 case ARM::VST1d16QPseudoWB_fixed : return true;
2014 case ARM::VST1d32QPseudoWB_fixed : return true;
2015 case ARM::VST1d64QPseudoWB_fixed : return true;
2016 case ARM::VST2d8wb_fixed : return true;
2017 case ARM::VST2d16wb_fixed : return true;
2018 case ARM::VST2d32wb_fixed : return true;
2019 case ARM::VST2q8PseudoWB_fixed : return true;
2020 case ARM::VST2q16PseudoWB_fixed : return true;
2021 case ARM::VST2q32PseudoWB_fixed : return true;
2022 }
2023}
2024
2025// Get the register stride update opcode of a VLD/VST instruction that
2026// is otherwise equivalent to the given fixed stride updating instruction.
2027static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2028 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2029 && "Incorrect fixed stride updating instruction.");
2030 switch (Opc) {
2031 default: break;
2032 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2033 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2034 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2035 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2036 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2037 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2038 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2039 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2040 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2041 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2042 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2043 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2044 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2045 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2046 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2047 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2048 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2049 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2050 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2051 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2052 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2053 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2054 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2055 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2056 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2057 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2058 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2059
2060 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2061 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2062 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2063 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2064 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2065 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2066 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2067 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2068 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2069 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2070 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2071 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2072 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2073 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2074 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2075 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2076
2077 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2078 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2079 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2080 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2081 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2082 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2083
2084 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2085 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2086 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2087 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2088 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2089 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2090
2091 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2092 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2093 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2094 }
2095 return Opc; // If not one we handle, return it unchanged.
2096}
2097
2098/// Returns true if the given increment is a Constant known to be equal to the
2099/// access size performed by a NEON load/store. This means the "[rN]!" form can
2100/// be used.
2101static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2102 auto C = dyn_cast<ConstantSDNode>(Inc);
2103 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2104}
2105
2106void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2107 const uint16_t *DOpcodes,
2108 const uint16_t *QOpcodes0,
2109 const uint16_t *QOpcodes1) {
2110 assert(Subtarget->hasNEON());
2111 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2112 SDLoc dl(N);
2113
2114 SDValue MemAddr, Align;
2115 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2116 // nodes are not intrinsics.
2117 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2118 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2119 return;
2120
2121 SDValue Chain = N->getOperand(0);
2122 EVT VT = N->getValueType(0);
2123 bool is64BitVector = VT.is64BitVector();
2124 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2125
2126 unsigned OpcodeIndex;
2127 switch (VT.getSimpleVT().SimpleTy) {
2128 default: llvm_unreachable("unhandled vld type");
2129 // Double-register operations:
2130 case MVT::v8i8: OpcodeIndex = 0; break;
2131 case MVT::v4f16:
2132 case MVT::v4bf16:
2133 case MVT::v4i16: OpcodeIndex = 1; break;
2134 case MVT::v2f32:
2135 case MVT::v2i32: OpcodeIndex = 2; break;
2136 case MVT::v1i64: OpcodeIndex = 3; break;
2137 // Quad-register operations:
2138 case MVT::v16i8: OpcodeIndex = 0; break;
2139 case MVT::v8f16:
2140 case MVT::v8bf16:
2141 case MVT::v8i16: OpcodeIndex = 1; break;
2142 case MVT::v4f32:
2143 case MVT::v4i32: OpcodeIndex = 2; break;
2144 case MVT::v2f64:
2145 case MVT::v2i64: OpcodeIndex = 3; break;
2146 }
2147
2148 EVT ResTy;
2149 if (NumVecs == 1)
2150 ResTy = VT;
2151 else {
2152 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2153 if (!is64BitVector)
2154 ResTyElts *= 2;
2155 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2156 }
2157 std::vector<EVT> ResTys;
2158 ResTys.push_back(ResTy);
2159 if (isUpdating)
2160 ResTys.push_back(MVT::i32);
2161 ResTys.push_back(MVT::Other);
2162
2163 SDValue Pred = getAL(CurDAG, dl);
2164 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2165 SDNode *VLd;
2167
2168 // Double registers and VLD1/VLD2 quad registers are directly supported.
2169 if (is64BitVector || NumVecs <= 2) {
2170 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2171 QOpcodes0[OpcodeIndex]);
2172 Ops.push_back(MemAddr);
2173 Ops.push_back(Align);
2174 if (isUpdating) {
2175 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2176 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2177 if (!IsImmUpdate) {
2178 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2179 // check for the opcode rather than the number of vector elements.
2180 if (isVLDfixed(Opc))
2182 Ops.push_back(Inc);
2183 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2184 // the operands if not such an opcode.
2185 } else if (!isVLDfixed(Opc))
2186 Ops.push_back(Reg0);
2187 }
2188 Ops.push_back(Pred);
2189 Ops.push_back(Reg0);
2190 Ops.push_back(Chain);
2191 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2192
2193 } else {
2194 // Otherwise, quad registers are loaded with two separate instructions,
2195 // where one loads the even registers and the other loads the odd registers.
2196 EVT AddrTy = MemAddr.getValueType();
2197
2198 // Load the even subregs. This is always an updating load, so that it
2199 // provides the address to the second load for the odd subregs.
2200 SDValue ImplDef =
2201 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2202 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2203 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2204 ResTy, AddrTy, MVT::Other, OpsA);
2205 Chain = SDValue(VLdA, 2);
2206
2207 // Load the odd subregs.
2208 Ops.push_back(SDValue(VLdA, 1));
2209 Ops.push_back(Align);
2210 if (isUpdating) {
2211 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2212 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2213 "only constant post-increment update allowed for VLD3/4");
2214 (void)Inc;
2215 Ops.push_back(Reg0);
2216 }
2217 Ops.push_back(SDValue(VLdA, 0));
2218 Ops.push_back(Pred);
2219 Ops.push_back(Reg0);
2220 Ops.push_back(Chain);
2221 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2222 }
2223
2224 // Transfer memoperands.
2225 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2226 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2227
2228 if (NumVecs == 1) {
2229 ReplaceNode(N, VLd);
2230 return;
2231 }
2232
2233 // Extract out the subregisters.
2234 SDValue SuperReg = SDValue(VLd, 0);
2235 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2236 ARM::qsub_3 == ARM::qsub_0 + 3,
2237 "Unexpected subreg numbering");
2238 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2239 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2240 ReplaceUses(SDValue(N, Vec),
2241 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2242 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2243 if (isUpdating)
2244 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2245 CurDAG->RemoveDeadNode(N);
2246}
2247
2248void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2249 const uint16_t *DOpcodes,
2250 const uint16_t *QOpcodes0,
2251 const uint16_t *QOpcodes1) {
2252 assert(Subtarget->hasNEON());
2253 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2254 SDLoc dl(N);
2255
2256 SDValue MemAddr, Align;
2257 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2258 // nodes are not intrinsics.
2259 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2260 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2261 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2262 return;
2263
2264 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2265
2266 SDValue Chain = N->getOperand(0);
2267 EVT VT = N->getOperand(Vec0Idx).getValueType();
2268 bool is64BitVector = VT.is64BitVector();
2269 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2270
2271 unsigned OpcodeIndex;
2272 switch (VT.getSimpleVT().SimpleTy) {
2273 default: llvm_unreachable("unhandled vst type");
2274 // Double-register operations:
2275 case MVT::v8i8: OpcodeIndex = 0; break;
2276 case MVT::v4f16:
2277 case MVT::v4bf16:
2278 case MVT::v4i16: OpcodeIndex = 1; break;
2279 case MVT::v2f32:
2280 case MVT::v2i32: OpcodeIndex = 2; break;
2281 case MVT::v1i64: OpcodeIndex = 3; break;
2282 // Quad-register operations:
2283 case MVT::v16i8: OpcodeIndex = 0; break;
2284 case MVT::v8f16:
2285 case MVT::v8bf16:
2286 case MVT::v8i16: OpcodeIndex = 1; break;
2287 case MVT::v4f32:
2288 case MVT::v4i32: OpcodeIndex = 2; break;
2289 case MVT::v2f64:
2290 case MVT::v2i64: OpcodeIndex = 3; break;
2291 }
2292
2293 std::vector<EVT> ResTys;
2294 if (isUpdating)
2295 ResTys.push_back(MVT::i32);
2296 ResTys.push_back(MVT::Other);
2297
2298 SDValue Pred = getAL(CurDAG, dl);
2299 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2301
2302 // Double registers and VST1/VST2 quad registers are directly supported.
2303 if (is64BitVector || NumVecs <= 2) {
2304 SDValue SrcReg;
2305 if (NumVecs == 1) {
2306 SrcReg = N->getOperand(Vec0Idx);
2307 } else if (is64BitVector) {
2308 // Form a REG_SEQUENCE to force register allocation.
2309 SDValue V0 = N->getOperand(Vec0Idx + 0);
2310 SDValue V1 = N->getOperand(Vec0Idx + 1);
2311 if (NumVecs == 2)
2312 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2313 else {
2314 SDValue V2 = N->getOperand(Vec0Idx + 2);
2315 // If it's a vst3, form a quad D-register and leave the last part as
2316 // an undef.
2317 SDValue V3 = (NumVecs == 3)
2318 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2319 : N->getOperand(Vec0Idx + 3);
2320 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2321 }
2322 } else {
2323 // Form a QQ register.
2324 SDValue Q0 = N->getOperand(Vec0Idx);
2325 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2326 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2327 }
2328
2329 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2330 QOpcodes0[OpcodeIndex]);
2331 Ops.push_back(MemAddr);
2332 Ops.push_back(Align);
2333 if (isUpdating) {
2334 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2335 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2336 if (!IsImmUpdate) {
2337 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2338 // check for the opcode rather than the number of vector elements.
2339 if (isVSTfixed(Opc))
2341 Ops.push_back(Inc);
2342 }
2343 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2344 // the operands if not such an opcode.
2345 else if (!isVSTfixed(Opc))
2346 Ops.push_back(Reg0);
2347 }
2348 Ops.push_back(SrcReg);
2349 Ops.push_back(Pred);
2350 Ops.push_back(Reg0);
2351 Ops.push_back(Chain);
2352 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2353
2354 // Transfer memoperands.
2355 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2356
2357 ReplaceNode(N, VSt);
2358 return;
2359 }
2360
2361 // Otherwise, quad registers are stored with two separate instructions,
2362 // where one stores the even registers and the other stores the odd registers.
2363
2364 // Form the QQQQ REG_SEQUENCE.
2365 SDValue V0 = N->getOperand(Vec0Idx + 0);
2366 SDValue V1 = N->getOperand(Vec0Idx + 1);
2367 SDValue V2 = N->getOperand(Vec0Idx + 2);
2368 SDValue V3 = (NumVecs == 3)
2369 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2370 : N->getOperand(Vec0Idx + 3);
2371 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2372
2373 // Store the even D registers. This is always an updating store, so that it
2374 // provides the address to the second store for the odd subregs.
2375 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2376 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2377 MemAddr.getValueType(),
2378 MVT::Other, OpsA);
2379 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2380 Chain = SDValue(VStA, 1);
2381
2382 // Store the odd D registers.
2383 Ops.push_back(SDValue(VStA, 0));
2384 Ops.push_back(Align);
2385 if (isUpdating) {
2386 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2387 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2388 "only constant post-increment update allowed for VST3/4");
2389 (void)Inc;
2390 Ops.push_back(Reg0);
2391 }
2392 Ops.push_back(RegSeq);
2393 Ops.push_back(Pred);
2394 Ops.push_back(Reg0);
2395 Ops.push_back(Chain);
2396 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2397 Ops);
2398 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2399 ReplaceNode(N, VStB);
2400}
2401
2402void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2403 unsigned NumVecs,
2404 const uint16_t *DOpcodes,
2405 const uint16_t *QOpcodes) {
2406 assert(Subtarget->hasNEON());
2407 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2408 SDLoc dl(N);
2409
2410 SDValue MemAddr, Align;
2411 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2412 // nodes are not intrinsics.
2413 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2414 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2415 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2416 return;
2417
2418 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2419
2420 SDValue Chain = N->getOperand(0);
2421 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2422 EVT VT = N->getOperand(Vec0Idx).getValueType();
2423 bool is64BitVector = VT.is64BitVector();
2424
2425 unsigned Alignment = 0;
2426 if (NumVecs != 3) {
2427 Alignment = Align->getAsZExtVal();
2428 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2429 if (Alignment > NumBytes)
2430 Alignment = NumBytes;
2431 if (Alignment < 8 && Alignment < NumBytes)
2432 Alignment = 0;
2433 // Alignment must be a power of two; make sure of that.
2434 Alignment = (Alignment & -Alignment);
2435 if (Alignment == 1)
2436 Alignment = 0;
2437 }
2438 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2439
2440 unsigned OpcodeIndex;
2441 switch (VT.getSimpleVT().SimpleTy) {
2442 default: llvm_unreachable("unhandled vld/vst lane type");
2443 // Double-register operations:
2444 case MVT::v8i8: OpcodeIndex = 0; break;
2445 case MVT::v4f16:
2446 case MVT::v4bf16:
2447 case MVT::v4i16: OpcodeIndex = 1; break;
2448 case MVT::v2f32:
2449 case MVT::v2i32: OpcodeIndex = 2; break;
2450 // Quad-register operations:
2451 case MVT::v8f16:
2452 case MVT::v8bf16:
2453 case MVT::v8i16: OpcodeIndex = 0; break;
2454 case MVT::v4f32:
2455 case MVT::v4i32: OpcodeIndex = 1; break;
2456 }
2457
2458 std::vector<EVT> ResTys;
2459 if (IsLoad) {
2460 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2461 if (!is64BitVector)
2462 ResTyElts *= 2;
2463 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2464 MVT::i64, ResTyElts));
2465 }
2466 if (isUpdating)
2467 ResTys.push_back(MVT::i32);
2468 ResTys.push_back(MVT::Other);
2469
2470 SDValue Pred = getAL(CurDAG, dl);
2471 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2472
2474 Ops.push_back(MemAddr);
2475 Ops.push_back(Align);
2476 if (isUpdating) {
2477 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2478 bool IsImmUpdate =
2479 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2480 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2481 }
2482
2483 SDValue SuperReg;
2484 SDValue V0 = N->getOperand(Vec0Idx + 0);
2485 SDValue V1 = N->getOperand(Vec0Idx + 1);
2486 if (NumVecs == 2) {
2487 if (is64BitVector)
2488 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2489 else
2490 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2491 } else {
2492 SDValue V2 = N->getOperand(Vec0Idx + 2);
2493 SDValue V3 = (NumVecs == 3)
2494 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2495 : N->getOperand(Vec0Idx + 3);
2496 if (is64BitVector)
2497 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2498 else
2499 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2500 }
2501 Ops.push_back(SuperReg);
2502 Ops.push_back(getI32Imm(Lane, dl));
2503 Ops.push_back(Pred);
2504 Ops.push_back(Reg0);
2505 Ops.push_back(Chain);
2506
2507 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2508 QOpcodes[OpcodeIndex]);
2509 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2510 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2511 if (!IsLoad) {
2512 ReplaceNode(N, VLdLn);
2513 return;
2514 }
2515
2516 // Extract the subregisters.
2517 SuperReg = SDValue(VLdLn, 0);
2518 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2519 ARM::qsub_3 == ARM::qsub_0 + 3,
2520 "Unexpected subreg numbering");
2521 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2522 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2523 ReplaceUses(SDValue(N, Vec),
2524 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2525 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2526 if (isUpdating)
2527 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2528 CurDAG->RemoveDeadNode(N);
2529}
2530
2531template <typename SDValueVector>
2532void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2533 SDValue PredicateMask) {
2534 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2535 Ops.push_back(PredicateMask);
2536 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2537}
2538
2539template <typename SDValueVector>
2540void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2541 SDValue PredicateMask,
2542 SDValue Inactive) {
2543 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2544 Ops.push_back(PredicateMask);
2545 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2546 Ops.push_back(Inactive);
2547}
2548
2549template <typename SDValueVector>
2550void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2551 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2552 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2553 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2554}
2555
2556template <typename SDValueVector>
2557void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2558 EVT InactiveTy) {
2559 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2560 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2561 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2562 Ops.push_back(SDValue(
2563 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2564}
2565
2566void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2567 bool Predicated) {
2568 SDLoc Loc(N);
2570
2571 uint16_t Opcode;
2572 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2573 case 32:
2574 Opcode = Opcodes[0];
2575 break;
2576 case 64:
2577 Opcode = Opcodes[1];
2578 break;
2579 default:
2580 llvm_unreachable("bad vector element size in SelectMVE_WB");
2581 }
2582
2583 Ops.push_back(N->getOperand(2)); // vector of base addresses
2584
2585 int32_t ImmValue = N->getConstantOperandVal(3);
2586 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2587
2588 if (Predicated)
2589 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2590 else
2591 AddEmptyMVEPredicateToOps(Ops, Loc);
2592
2593 Ops.push_back(N->getOperand(0)); // chain
2594
2596 VTs.push_back(N->getValueType(1));
2597 VTs.push_back(N->getValueType(0));
2598 VTs.push_back(N->getValueType(2));
2599
2600 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2601 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2602 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2603 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2604 transferMemOperands(N, New);
2605 CurDAG->RemoveDeadNode(N);
2606}
2607
2608void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2609 bool Immediate,
2610 bool HasSaturationOperand) {
2611 SDLoc Loc(N);
2613
2614 // Two 32-bit halves of the value to be shifted
2615 Ops.push_back(N->getOperand(1));
2616 Ops.push_back(N->getOperand(2));
2617
2618 // The shift count
2619 if (Immediate) {
2620 int32_t ImmValue = N->getConstantOperandVal(3);
2621 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2622 } else {
2623 Ops.push_back(N->getOperand(3));
2624 }
2625
2626 // The immediate saturation operand, if any
2627 if (HasSaturationOperand) {
2628 int32_t SatOp = N->getConstantOperandVal(4);
2629 int SatBit = (SatOp == 64 ? 0 : 1);
2630 Ops.push_back(getI32Imm(SatBit, Loc));
2631 }
2632
2633 // MVE scalar shifts are IT-predicable, so include the standard
2634 // predicate arguments.
2635 Ops.push_back(getAL(CurDAG, Loc));
2636 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2637
2638 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2639}
2640
2641void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2642 uint16_t OpcodeWithNoCarry,
2643 bool Add, bool Predicated) {
2644 SDLoc Loc(N);
2646 uint16_t Opcode;
2647
2648 unsigned FirstInputOp = Predicated ? 2 : 1;
2649
2650 // Two input vectors and the input carry flag
2651 Ops.push_back(N->getOperand(FirstInputOp));
2652 Ops.push_back(N->getOperand(FirstInputOp + 1));
2653 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2654 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2655 uint32_t CarryMask = 1 << 29;
2656 uint32_t CarryExpected = Add ? 0 : CarryMask;
2657 if (CarryInConstant &&
2658 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2659 Opcode = OpcodeWithNoCarry;
2660 } else {
2661 Ops.push_back(CarryIn);
2662 Opcode = OpcodeWithCarry;
2663 }
2664
2665 if (Predicated)
2666 AddMVEPredicateToOps(Ops, Loc,
2667 N->getOperand(FirstInputOp + 3), // predicate
2668 N->getOperand(FirstInputOp - 1)); // inactive
2669 else
2670 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2671
2672 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2673}
2674
2675void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2676 SDLoc Loc(N);
2678
2679 // One vector input, followed by a 32-bit word of bits to shift in
2680 // and then an immediate shift count
2681 Ops.push_back(N->getOperand(1));
2682 Ops.push_back(N->getOperand(2));
2683 int32_t ImmValue = N->getConstantOperandVal(3);
2684 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2685
2686 if (Predicated)
2687 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2688 else
2689 AddEmptyMVEPredicateToOps(Ops, Loc);
2690
2691 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2692}
2693
2694static bool SDValueToConstBool(SDValue SDVal) {
2695 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2696 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2697 uint64_t Value = SDValConstant->getZExtValue();
2698 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2699 return Value;
2700}
2701
2702void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2703 const uint16_t *OpcodesS,
2704 const uint16_t *OpcodesU,
2705 size_t Stride, size_t TySize) {
2706 assert(TySize < Stride && "Invalid TySize");
2707 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2708 bool IsSub = SDValueToConstBool(N->getOperand(2));
2709 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2710 if (IsUnsigned) {
2711 assert(!IsSub &&
2712 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2713 assert(!IsExchange &&
2714 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2715 }
2716
2717 auto OpIsZero = [N](size_t OpNo) {
2718 return isNullConstant(N->getOperand(OpNo));
2719 };
2720
2721 // If the input accumulator value is not zero, select an instruction with
2722 // accumulator, otherwise select an instruction without accumulator
2723 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2724
2725 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2726 if (IsSub)
2727 Opcodes += 4 * Stride;
2728 if (IsExchange)
2729 Opcodes += 2 * Stride;
2730 if (IsAccum)
2731 Opcodes += Stride;
2732 uint16_t Opcode = Opcodes[TySize];
2733
2734 SDLoc Loc(N);
2736 // Push the accumulator operands, if they are used
2737 if (IsAccum) {
2738 Ops.push_back(N->getOperand(4));
2739 Ops.push_back(N->getOperand(5));
2740 }
2741 // Push the two vector operands
2742 Ops.push_back(N->getOperand(6));
2743 Ops.push_back(N->getOperand(7));
2744
2745 if (Predicated)
2746 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2747 else
2748 AddEmptyMVEPredicateToOps(Ops, Loc);
2749
2750 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2751}
2752
2753void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2754 const uint16_t *OpcodesS,
2755 const uint16_t *OpcodesU) {
2756 EVT VecTy = N->getOperand(6).getValueType();
2757 size_t SizeIndex;
2758 switch (VecTy.getVectorElementType().getSizeInBits()) {
2759 case 16:
2760 SizeIndex = 0;
2761 break;
2762 case 32:
2763 SizeIndex = 1;
2764 break;
2765 default:
2766 llvm_unreachable("bad vector element size");
2767 }
2768
2769 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2770}
2771
2772void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2773 const uint16_t *OpcodesS,
2774 const uint16_t *OpcodesU) {
2775 assert(
2776 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2777 32 &&
2778 "bad vector element size");
2779 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2780}
2781
2782void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2783 const uint16_t *const *Opcodes,
2784 bool HasWriteback) {
2785 EVT VT = N->getValueType(0);
2786 SDLoc Loc(N);
2787
2788 const uint16_t *OurOpcodes;
2789 switch (VT.getVectorElementType().getSizeInBits()) {
2790 case 8:
2791 OurOpcodes = Opcodes[0];
2792 break;
2793 case 16:
2794 OurOpcodes = Opcodes[1];
2795 break;
2796 case 32:
2797 OurOpcodes = Opcodes[2];
2798 break;
2799 default:
2800 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2801 }
2802
2803 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2804 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2805 unsigned PtrOperand = HasWriteback ? 1 : 2;
2806
2807 auto Data = SDValue(
2808 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2809 SDValue Chain = N->getOperand(0);
2810 // Add a MVE_VLDn instruction for each Vec, except the last
2811 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2812 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2813 auto LoadInst =
2814 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2815 Data = SDValue(LoadInst, 0);
2816 Chain = SDValue(LoadInst, 1);
2817 transferMemOperands(N, LoadInst);
2818 }
2819 // The last may need a writeback on it
2820 if (HasWriteback)
2821 ResultTys = {DataTy, MVT::i32, MVT::Other};
2822 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2823 auto LoadInst =
2824 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2825 transferMemOperands(N, LoadInst);
2826
2827 unsigned i;
2828 for (i = 0; i < NumVecs; i++)
2829 ReplaceUses(SDValue(N, i),
2830 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2831 SDValue(LoadInst, 0)));
2832 if (HasWriteback)
2833 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2834 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2835 CurDAG->RemoveDeadNode(N);
2836}
2837
2838void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2839 bool Wrapping, bool Predicated) {
2840 EVT VT = N->getValueType(0);
2841 SDLoc Loc(N);
2842
2843 uint16_t Opcode;
2844 switch (VT.getScalarSizeInBits()) {
2845 case 8:
2846 Opcode = Opcodes[0];
2847 break;
2848 case 16:
2849 Opcode = Opcodes[1];
2850 break;
2851 case 32:
2852 Opcode = Opcodes[2];
2853 break;
2854 default:
2855 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2856 }
2857
2859 unsigned OpIdx = 1;
2860
2861 SDValue Inactive;
2862 if (Predicated)
2863 Inactive = N->getOperand(OpIdx++);
2864
2865 Ops.push_back(N->getOperand(OpIdx++)); // base
2866 if (Wrapping)
2867 Ops.push_back(N->getOperand(OpIdx++)); // limit
2868
2869 SDValue ImmOp = N->getOperand(OpIdx++); // step
2870 int ImmValue = ImmOp->getAsZExtVal();
2871 Ops.push_back(getI32Imm(ImmValue, Loc));
2872
2873 if (Predicated)
2874 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2875 else
2876 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2877
2878 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2879}
2880
2881void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2882 size_t NumExtraOps, bool HasAccum) {
2883 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2884 SDLoc Loc(N);
2886
2887 unsigned OpIdx = 1;
2888
2889 // Convert and append the immediate operand designating the coprocessor.
2890 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2891 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2892 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2893
2894 // For accumulating variants copy the low and high order parts of the
2895 // accumulator into a register pair and add it to the operand vector.
2896 if (HasAccum) {
2897 SDValue AccLo = N->getOperand(OpIdx++);
2898 SDValue AccHi = N->getOperand(OpIdx++);
2899 if (IsBigEndian)
2900 std::swap(AccLo, AccHi);
2901 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2902 }
2903
2904 // Copy extra operands as-is.
2905 for (size_t I = 0; I < NumExtraOps; I++)
2906 Ops.push_back(N->getOperand(OpIdx++));
2907
2908 // Convert and append the immediate operand
2909 SDValue Imm = N->getOperand(OpIdx);
2910 uint32_t ImmVal = Imm->getAsZExtVal();
2911 Ops.push_back(getI32Imm(ImmVal, Loc));
2912
2913 // Accumulating variants are IT-predicable, add predicate operands.
2914 if (HasAccum) {
2915 SDValue Pred = getAL(CurDAG, Loc);
2916 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2917 Ops.push_back(Pred);
2918 Ops.push_back(PredReg);
2919 }
2920
2921 // Create the CDE intruction
2922 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2923 SDValue ResultPair = SDValue(InstrNode, 0);
2924
2925 // The original intrinsic had two outputs, and the output of the dual-register
2926 // CDE instruction is a register pair. We need to extract the two subregisters
2927 // and replace all uses of the original outputs with the extracted
2928 // subregisters.
2929 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2930 if (IsBigEndian)
2931 std::swap(SubRegs[0], SubRegs[1]);
2932
2933 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2934 if (SDValue(N, ResIdx).use_empty())
2935 continue;
2936 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2937 MVT::i32, ResultPair);
2938 ReplaceUses(SDValue(N, ResIdx), SubReg);
2939 }
2940
2941 CurDAG->RemoveDeadNode(N);
2942}
2943
2944void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2945 bool isUpdating, unsigned NumVecs,
2946 const uint16_t *DOpcodes,
2947 const uint16_t *QOpcodes0,
2948 const uint16_t *QOpcodes1) {
2949 assert(Subtarget->hasNEON());
2950 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2951 SDLoc dl(N);
2952
2953 SDValue MemAddr, Align;
2954 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2955 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2956 return;
2957
2958 SDValue Chain = N->getOperand(0);
2959 EVT VT = N->getValueType(0);
2960 bool is64BitVector = VT.is64BitVector();
2961
2962 unsigned Alignment = 0;
2963 if (NumVecs != 3) {
2964 Alignment = Align->getAsZExtVal();
2965 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2966 if (Alignment > NumBytes)
2967 Alignment = NumBytes;
2968 if (Alignment < 8 && Alignment < NumBytes)
2969 Alignment = 0;
2970 // Alignment must be a power of two; make sure of that.
2971 Alignment = (Alignment & -Alignment);
2972 if (Alignment == 1)
2973 Alignment = 0;
2974 }
2975 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2976
2977 unsigned OpcodeIndex;
2978 switch (VT.getSimpleVT().SimpleTy) {
2979 default: llvm_unreachable("unhandled vld-dup type");
2980 case MVT::v8i8:
2981 case MVT::v16i8: OpcodeIndex = 0; break;
2982 case MVT::v4i16:
2983 case MVT::v8i16:
2984 case MVT::v4f16:
2985 case MVT::v8f16:
2986 case MVT::v4bf16:
2987 case MVT::v8bf16:
2988 OpcodeIndex = 1; break;
2989 case MVT::v2f32:
2990 case MVT::v2i32:
2991 case MVT::v4f32:
2992 case MVT::v4i32: OpcodeIndex = 2; break;
2993 case MVT::v1f64:
2994 case MVT::v1i64: OpcodeIndex = 3; break;
2995 }
2996
2997 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2998 if (!is64BitVector)
2999 ResTyElts *= 2;
3000 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3001
3002 std::vector<EVT> ResTys;
3003 ResTys.push_back(ResTy);
3004 if (isUpdating)
3005 ResTys.push_back(MVT::i32);
3006 ResTys.push_back(MVT::Other);
3007
3008 SDValue Pred = getAL(CurDAG, dl);
3009 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3010
3012 Ops.push_back(MemAddr);
3013 Ops.push_back(Align);
3014 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3015 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3016 : QOpcodes1[OpcodeIndex];
3017 if (isUpdating) {
3018 SDValue Inc = N->getOperand(2);
3019 bool IsImmUpdate =
3020 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3021 if (IsImmUpdate) {
3022 if (!isVLDfixed(Opc))
3023 Ops.push_back(Reg0);
3024 } else {
3025 if (isVLDfixed(Opc))
3027 Ops.push_back(Inc);
3028 }
3029 }
3030 if (is64BitVector || NumVecs == 1) {
3031 // Double registers and VLD1 quad registers are directly supported.
3032 } else {
3033 SDValue ImplDef = SDValue(
3034 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3035 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3036 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3037 MVT::Other, OpsA);
3038 Ops.push_back(SDValue(VLdA, 0));
3039 Chain = SDValue(VLdA, 1);
3040 }
3041
3042 Ops.push_back(Pred);
3043 Ops.push_back(Reg0);
3044 Ops.push_back(Chain);
3045
3046 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3047
3048 // Transfer memoperands.
3049 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3050 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3051
3052 // Extract the subregisters.
3053 if (NumVecs == 1) {
3054 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3055 } else {
3056 SDValue SuperReg = SDValue(VLdDup, 0);
3057 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3058 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3059 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3060 ReplaceUses(SDValue(N, Vec),
3061 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3062 }
3063 }
3064 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3065 if (isUpdating)
3066 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3067 CurDAG->RemoveDeadNode(N);
3068}
3069
3070bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3071 if (!Subtarget->hasMVEIntegerOps())
3072 return false;
3073
3074 SDLoc dl(N);
3075
3076 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078 // inserts of the correct type:
3079 SDValue Ins1 = SDValue(N, 0);
3080 SDValue Ins2 = N->getOperand(0);
3081 EVT VT = Ins1.getValueType();
3082 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3083 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3084 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3085 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3086 return false;
3087
3088 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3089 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3090 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3091 return false;
3092
3093 // If the inserted values will be able to use T/B already, leave it to the
3094 // existing tablegen patterns. For example VCVTT/VCVTB.
3095 SDValue Val1 = Ins1.getOperand(1);
3096 SDValue Val2 = Ins2.getOperand(1);
3097 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3098 return false;
3099
3100 // Check if the inserted values are both extracts.
3101 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3102 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3104 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3105 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3106 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3107 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3108 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3109 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3110 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3111 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3112 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3113
3114 // If the two extracted lanes are from the same place and adjacent, this
3115 // simplifies into a f32 lane move.
3116 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3117 ExtractLane1 == ExtractLane2 + 1) {
3118 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3119 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3120 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3121 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3122 NewExt);
3123 ReplaceUses(Ins1, NewIns);
3124 return true;
3125 }
3126
3127 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128 // extracting odd lanes.
3129 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3130 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3131 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3132 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3133 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3134 if (ExtractLane1 % 2 != 0)
3135 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3136 if (ExtractLane2 % 2 != 0)
3137 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3138 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3139 SDValue NewIns =
3140 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3141 Ins2.getOperand(0), SDValue(VINS, 0));
3142 ReplaceUses(Ins1, NewIns);
3143 return true;
3144 }
3145 }
3146
3147 // The inserted values are not extracted - if they are f16 then insert them
3148 // directly using a VINS.
3149 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3150 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3151 SDValue NewIns =
3152 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3153 Ins2.getOperand(0), SDValue(VINS, 0));
3154 ReplaceUses(Ins1, NewIns);
3155 return true;
3156 }
3157
3158 return false;
3159}
3160
3161bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3162 SDNode *FMul,
3163 bool IsUnsigned,
3164 bool FixedToFloat) {
3165 auto Type = N->getValueType(0);
3166 unsigned ScalarBits = Type.getScalarSizeInBits();
3167 if (ScalarBits > 32)
3168 return false;
3169
3170 SDNodeFlags FMulFlags = FMul->getFlags();
3171 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172 // allowed in 16 bit unsigned floats
3173 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3174 return false;
3175
3176 SDValue ImmNode = FMul->getOperand(1);
3177 SDValue VecVal = FMul->getOperand(0);
3178 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3179 VecVal->getOpcode() == ISD::SINT_TO_FP)
3180 VecVal = VecVal->getOperand(0);
3181
3182 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3183 return false;
3184
3185 if (ImmNode.getOpcode() == ISD::BITCAST) {
3186 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3187 return false;
3188 ImmNode = ImmNode.getOperand(0);
3189 }
3190
3191 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192 return false;
3193
3194 APFloat ImmAPF(0.0f);
3195 switch (ImmNode.getOpcode()) {
3196 case ARMISD::VMOVIMM:
3197 case ARMISD::VDUP: {
3198 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3199 return false;
3200 unsigned Imm = ImmNode.getConstantOperandVal(0);
3201 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3202 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3203 ImmAPF =
3204 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205 APInt(ScalarBits, Imm));
3206 break;
3207 }
3208 case ARMISD::VMOVFPIMM: {
3210 break;
3211 }
3212 default:
3213 return false;
3214 }
3215
3216 // Where n is the number of fractional bits, multiplying by 2^n will convert
3217 // from float to fixed and multiplying by 2^-n will convert from fixed to
3218 // float. Taking log2 of the factor (after taking the inverse in the case of
3219 // float to fixed) will give n.
3220 APFloat ToConvert = ImmAPF;
3221 if (FixedToFloat) {
3222 if (!ImmAPF.getExactInverse(&ToConvert))
3223 return false;
3224 }
3225 APSInt Converted(64, false);
3226 bool IsExact;
3228 &IsExact);
3229 if (!IsExact || !Converted.isPowerOf2())
3230 return false;
3231
3232 unsigned FracBits = Converted.logBase2();
3233 if (FracBits > ScalarBits)
3234 return false;
3235
3237 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3238 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3239
3240 unsigned int Opcode;
3241 switch (ScalarBits) {
3242 case 16:
3243 if (FixedToFloat)
3244 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3245 else
3246 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3247 break;
3248 case 32:
3249 if (FixedToFloat)
3250 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3251 else
3252 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3253 break;
3254 default:
3255 llvm_unreachable("unexpected number of scalar bits");
3256 break;
3257 }
3258
3259 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3260 return true;
3261}
3262
3263bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3264 // Transform a floating-point to fixed-point conversion to a VCVT
3265 if (!Subtarget->hasMVEFloatOps())
3266 return false;
3267 EVT Type = N->getValueType(0);
3268 if (!Type.isVector())
3269 return false;
3270 unsigned int ScalarBits = Type.getScalarSizeInBits();
3271
3272 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3273 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3274 SDNode *Node = N->getOperand(0).getNode();
3275
3276 // floating-point to fixed-point with one fractional bit gets turned into an
3277 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3278 if (Node->getOpcode() == ISD::FADD) {
3279 if (Node->getOperand(0) != Node->getOperand(1))
3280 return false;
3281 SDNodeFlags Flags = Node->getFlags();
3282 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3283 // allowed in 16 bit unsigned floats
3284 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3285 return false;
3286
3287 unsigned Opcode;
3288 switch (ScalarBits) {
3289 case 16:
3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3291 break;
3292 case 32:
3293 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3294 break;
3295 }
3296 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3297 CurDAG->getConstant(1, dl, MVT::i32)};
3298 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3299
3300 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3301 return true;
3302 }
3303
3304 if (Node->getOpcode() != ISD::FMUL)
3305 return false;
3306
3307 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3308}
3309
3310bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3311 // Transform a fixed-point to floating-point conversion to a VCVT
3312 if (!Subtarget->hasMVEFloatOps())
3313 return false;
3314 auto Type = N->getValueType(0);
3315 if (!Type.isVector())
3316 return false;
3317
3318 auto LHS = N->getOperand(0);
3319 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3320 return false;
3321
3322 return transformFixedFloatingPointConversion(
3323 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3324}
3325
3326bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3327 if (!Subtarget->hasV6T2Ops())
3328 return false;
3329
3330 unsigned Opc = isSigned
3331 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3332 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3333 SDLoc dl(N);
3334
3335 // For unsigned extracts, check for a shift right and mask
3336 unsigned And_imm = 0;
3337 if (N->getOpcode() == ISD::AND) {
3338 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3339
3340 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3341 if (And_imm & (And_imm + 1))
3342 return false;
3343
3344 unsigned Srl_imm = 0;
3345 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3346 Srl_imm)) {
3347 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3348
3349 // Mask off the unnecessary bits of the AND immediate; normally
3350 // DAGCombine will do this, but that might not happen if
3351 // targetShrinkDemandedConstant chooses a different immediate.
3352 And_imm &= -1U >> Srl_imm;
3353
3354 // Note: The width operand is encoded as width-1.
3355 unsigned Width = llvm::countr_one(And_imm) - 1;
3356 unsigned LSB = Srl_imm;
3357
3358 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3359
3360 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3361 // It's cheaper to use a right shift to extract the top bits.
3362 if (Subtarget->isThumb()) {
3363 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3364 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3365 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3366 getAL(CurDAG, dl), Reg0, Reg0 };
3367 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3368 return true;
3369 }
3370
3371 // ARM models shift instructions as MOVsi with shifter operand.
3373 SDValue ShOpc =
3374 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3375 MVT::i32);
3376 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3377 getAL(CurDAG, dl), Reg0, Reg0 };
3378 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3379 return true;
3380 }
3381
3382 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3383 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3384 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3385 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3386 getAL(CurDAG, dl), Reg0 };
3387 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3388 return true;
3389 }
3390 }
3391 return false;
3392 }
3393
3394 // Otherwise, we're looking for a shift of a shift
3395 unsigned Shl_imm = 0;
3396 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3397 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3398 unsigned Srl_imm = 0;
3399 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3400 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3401 // Note: The width operand is encoded as width-1.
3402 unsigned Width = 32 - Srl_imm - 1;
3403 int LSB = Srl_imm - Shl_imm;
3404 if (LSB < 0)
3405 return false;
3406 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3407 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3408 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3409 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3410 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3411 getAL(CurDAG, dl), Reg0 };
3412 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3413 return true;
3414 }
3415 }
3416
3417 // Or we are looking for a shift of an and, with a mask operand
3418 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3419 isShiftedMask_32(And_imm)) {
3420 unsigned Srl_imm = 0;
3421 unsigned LSB = llvm::countr_zero(And_imm);
3422 // Shift must be the same as the ands lsb
3423 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3424 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3425 unsigned MSB = llvm::Log2_32(And_imm);
3426 // Note: The width operand is encoded as width-1.
3427 unsigned Width = MSB - LSB;
3428 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3429 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3430 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3431 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3432 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3433 getAL(CurDAG, dl), Reg0 };
3434 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3435 return true;
3436 }
3437 }
3438
3439 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3440 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3441 unsigned LSB = 0;
3442 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3443 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3444 return false;
3445
3446 if (LSB + Width > 32)
3447 return false;
3448
3449 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3450 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3451 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3452 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3453 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3454 getAL(CurDAG, dl), Reg0 };
3455 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3456 return true;
3457 }
3458
3459 return false;
3460}
3461
3462/// Target-specific DAG combining for ISD::SUB.
3463/// Target-independent combining lowers SELECT_CC nodes of the form
3464/// select_cc setg[ge] X, 0, X, -X
3465/// select_cc setgt X, -1, X, -X
3466/// select_cc setl[te] X, 0, -X, X
3467/// select_cc setlt X, 1, -X, X
3468/// which represent Integer ABS into:
3469/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3470/// ARM instruction selection detects the latter and matches it to
3471/// ARM::ABS or ARM::t2ABS machine node.
3472bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3473 SDValue SUBSrc0 = N->getOperand(0);
3474 SDValue SUBSrc1 = N->getOperand(1);
3475 EVT VT = N->getValueType(0);
3476
3477 if (Subtarget->isThumb1Only())
3478 return false;
3479
3480 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3481 return false;
3482
3483 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3484 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3485 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3486 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3487 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3488 EVT XType = SRASrc0.getValueType();
3489 unsigned Size = XType.getSizeInBits() - 1;
3490
3491 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3492 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3493 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3495 return true;
3496 }
3497
3498 return false;
3499}
3500
3501/// We've got special pseudo-instructions for these
3502void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3503 unsigned Opcode;
3504 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3505 if (MemTy == MVT::i8)
3506 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3507 else if (MemTy == MVT::i16)
3508 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3509 else if (MemTy == MVT::i32)
3510 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3511 else
3512 llvm_unreachable("Unknown AtomicCmpSwap type");
3513
3514 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3515 N->getOperand(0)};
3516 SDNode *CmpSwap = CurDAG->getMachineNode(
3517 Opcode, SDLoc(N),
3518 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3519
3520 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3521 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3522
3523 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3524 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3525 CurDAG->RemoveDeadNode(N);
3526}
3527
3528static std::optional<std::pair<unsigned, unsigned>>
3530 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3531 unsigned LastOne = A.countr_zero();
3532 if (A.popcount() != (FirstOne - LastOne + 1))
3533 return std::nullopt;
3534 return std::make_pair(FirstOne, LastOne);
3535}
3536
3537void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3538 assert(N->getOpcode() == ARMISD::CMPZ);
3539 SwitchEQNEToPLMI = false;
3540
3541 if (!Subtarget->isThumb())
3542 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543 // LSR don't exist as standalone instructions - they need the barrel shifter.
3544 return;
3545
3546 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547 SDValue And = N->getOperand(0);
3548 if (!And->hasOneUse())
3549 return;
3550
3551 SDValue Zero = N->getOperand(1);
3552 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3553 return;
3554 SDValue X = And.getOperand(0);
3555 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3556
3557 if (!C)
3558 return;
3559 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3560 if (!Range)
3561 return;
3562
3563 // There are several ways to lower this:
3564 SDNode *NewN;
3565 SDLoc dl(N);
3566
3567 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3568 if (Subtarget->isThumb2()) {
3569 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3570 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3571 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3572 CurDAG->getRegister(0, MVT::i32) };
3573 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3574 } else {
3575 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3576 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3577 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3578 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3579 }
3580 };
3581
3582 if (Range->second == 0) {
3583 // 1. Mask includes the LSB -> Simply shift the top N bits off
3584 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3585 ReplaceNode(And.getNode(), NewN);
3586 } else if (Range->first == 31) {
3587 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3588 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3589 ReplaceNode(And.getNode(), NewN);
3590 } else if (Range->first == Range->second) {
3591 // 3. Only one bit is set. We can shift this into the sign bit and use a
3592 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3593 // only one of them (the one currently being selected) will be switched
3594 // to use the new condition code.
3595 if (!N->hasOneUse())
3596 return;
3597 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3598 ReplaceNode(And.getNode(), NewN);
3599
3600 SwitchEQNEToPLMI = true;
3601 } else if (!Subtarget->hasV6T2Ops()) {
3602 // 4. Do a double shift to clear bottom and top bits, but only in
3603 // thumb-1 mode as in thumb-2 we can use UBFX.
3604 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3605 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3606 Range->second + (31 - Range->first));
3607 ReplaceNode(And.getNode(), NewN);
3608 }
3609}
3610
3611static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3612 unsigned Opc128[3]) {
3613 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3614 "Unexpected vector shuffle length");
3615 switch (VT.getScalarSizeInBits()) {
3616 default:
3617 llvm_unreachable("Unexpected vector shuffle element size");
3618 case 8:
3619 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3620 case 16:
3621 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3622 case 32:
3623 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3624 }
3625}
3626
3627void ARMDAGToDAGISel::Select(SDNode *N) {
3628 SDLoc dl(N);
3629
3630 if (N->isMachineOpcode()) {
3631 N->setNodeId(-1);
3632 return; // Already selected.
3633 }
3634
3635 switch (N->getOpcode()) {
3636 default: break;
3637 case ISD::STORE: {
3638 // For Thumb1, match an sp-relative store in C++. This is a little
3639 // unfortunate, but I don't think I can make the chain check work
3640 // otherwise. (The chain of the store has to be the same as the chain
3641 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3642 // a direct reference to "SP".)
3643 //
3644 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3645 // a different addressing mode from other four-byte stores.
3646 //
3647 // This pattern usually comes up with call arguments.
3648 StoreSDNode *ST = cast<StoreSDNode>(N);
3649 SDValue Ptr = ST->getBasePtr();
3650 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3651 int RHSC = 0;
3652 if (Ptr.getOpcode() == ISD::ADD &&
3653 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3654 Ptr = Ptr.getOperand(0);
3655
3656 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3657 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3658 Ptr.getOperand(0) == ST->getChain()) {
3659 SDValue Ops[] = {ST->getValue(),
3660 CurDAG->getRegister(ARM::SP, MVT::i32),
3661 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3662 getAL(CurDAG, dl),
3663 CurDAG->getRegister(0, MVT::i32),
3664 ST->getChain()};
3665 MachineSDNode *ResNode =
3666 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3667 MachineMemOperand *MemOp = ST->getMemOperand();
3668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3669 ReplaceNode(N, ResNode);
3670 return;
3671 }
3672 }
3673 break;
3674 }
3676 if (tryWriteRegister(N))
3677 return;
3678 break;
3679 case ISD::READ_REGISTER:
3680 if (tryReadRegister(N))
3681 return;
3682 break;
3683 case ISD::INLINEASM:
3684 case ISD::INLINEASM_BR:
3685 if (tryInlineAsm(N))
3686 return;
3687 break;
3688 case ISD::SUB:
3689 // Select special operations if SUB node forms integer ABS pattern
3690 if (tryABSOp(N))
3691 return;
3692 // Other cases are autogenerated.
3693 break;
3694 case ISD::Constant: {
3695 unsigned Val = N->getAsZExtVal();
3696 // If we can't materialize the constant we need to use a literal pool
3697 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3698 !Subtarget->genExecuteOnly()) {
3699 SDValue CPIdx = CurDAG->getTargetConstantPool(
3700 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3701 TLI->getPointerTy(CurDAG->getDataLayout()));
3702
3703 SDNode *ResNode;
3704 if (Subtarget->isThumb()) {
3705 SDValue Ops[] = {
3706 CPIdx,
3707 getAL(CurDAG, dl),
3708 CurDAG->getRegister(0, MVT::i32),
3709 CurDAG->getEntryNode()
3710 };
3711 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3712 Ops);
3713 } else {
3714 SDValue Ops[] = {
3715 CPIdx,
3716 CurDAG->getTargetConstant(0, dl, MVT::i32),
3717 getAL(CurDAG, dl),
3718 CurDAG->getRegister(0, MVT::i32),
3719 CurDAG->getEntryNode()
3720 };
3721 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3722 Ops);
3723 }
3724 // Annotate the Node with memory operand information so that MachineInstr
3725 // queries work properly. This e.g. gives the register allocation the
3726 // required information for rematerialization.
3727 MachineFunction& MF = CurDAG->getMachineFunction();
3731
3732 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3733
3734 ReplaceNode(N, ResNode);
3735 return;
3736 }
3737
3738 // Other cases are autogenerated.
3739 break;
3740 }
3741 case ISD::FrameIndex: {
3742 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3743 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3744 SDValue TFI = CurDAG->getTargetFrameIndex(
3745 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3746 if (Subtarget->isThumb1Only()) {
3747 // Set the alignment of the frame object to 4, to avoid having to generate
3748 // more than one ADD
3749 MachineFrameInfo &MFI = MF->getFrameInfo();
3750 if (MFI.getObjectAlign(FI) < Align(4))
3751 MFI.setObjectAlignment(FI, Align(4));
3752 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3753 CurDAG->getTargetConstant(0, dl, MVT::i32));
3754 return;
3755 } else {
3756 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3757 ARM::t2ADDri : ARM::ADDri);
3758 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3759 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3760 CurDAG->getRegister(0, MVT::i32) };
3761 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3762 return;
3763 }
3764 }
3766 if (tryInsertVectorElt(N))
3767 return;
3768 break;
3769 }
3770 case ISD::SRL:
3771 if (tryV6T2BitfieldExtractOp(N, false))
3772 return;
3773 break;
3775 case ISD::SRA:
3776 if (tryV6T2BitfieldExtractOp(N, true))
3777 return;
3778 break;
3779 case ISD::FP_TO_UINT:
3780 case ISD::FP_TO_SINT:
3783 if (tryFP_TO_INT(N, dl))
3784 return;
3785 break;
3786 case ISD::FMUL:
3787 if (tryFMULFixed(N, dl))
3788 return;
3789 break;
3790 case ISD::MUL:
3791 if (Subtarget->isThumb1Only())
3792 break;
3793 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3794 unsigned RHSV = C->getZExtValue();
3795 if (!RHSV) break;
3796 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3797 unsigned ShImm = Log2_32(RHSV-1);
3798 if (ShImm >= 32)
3799 break;
3800 SDValue V = N->getOperand(0);
3801 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3802 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3803 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3804 if (Subtarget->isThumb()) {
3805 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3806 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3807 return;
3808 } else {
3809 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3810 Reg0 };
3811 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3812 return;
3813 }
3814 }
3815 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3816 unsigned ShImm = Log2_32(RHSV+1);
3817 if (ShImm >= 32)
3818 break;
3819 SDValue V = N->getOperand(0);
3820 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3821 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3822 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3823 if (Subtarget->isThumb()) {
3824 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3825 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3826 return;
3827 } else {
3828 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3829 Reg0 };
3830 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3831 return;
3832 }
3833 }
3834 }
3835 break;
3836 case ISD::AND: {
3837 // Check for unsigned bitfield extract
3838 if (tryV6T2BitfieldExtractOp(N, false))
3839 return;
3840
3841 // If an immediate is used in an AND node, it is possible that the immediate
3842 // can be more optimally materialized when negated. If this is the case we
3843 // can negate the immediate and use a BIC instead.
3844 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3845 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3846 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3847
3848 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3849 // immediate can be negated and fit in the immediate operand of
3850 // a t2BIC, don't do any manual transform here as this can be
3851 // handled by the generic ISel machinery.
3852 bool PreferImmediateEncoding =
3853 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3854 if (!PreferImmediateEncoding &&
3855 ConstantMaterializationCost(Imm, Subtarget) >
3856 ConstantMaterializationCost(~Imm, Subtarget)) {
3857 // The current immediate costs more to materialize than a negated
3858 // immediate, so negate the immediate and use a BIC.
3859 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3860 // If the new constant didn't exist before, reposition it in the topological
3861 // ordering so it is just before N. Otherwise, don't touch its location.
3862 if (NewImm->getNodeId() == -1)
3863 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3864
3865 if (!Subtarget->hasThumb2()) {
3866 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3867 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3868 CurDAG->getRegister(0, MVT::i32)};
3869 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3870 return;
3871 } else {
3872 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3873 CurDAG->getRegister(0, MVT::i32),
3874 CurDAG->getRegister(0, MVT::i32)};
3875 ReplaceNode(N,
3876 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3877 return;
3878 }
3879 }
3880 }
3881
3882 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3885 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887 EVT VT = N->getValueType(0);
3888 if (VT != MVT::i32)
3889 break;
3890 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891 ? ARM::t2MOVTi16
3892 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3893 if (!Opc)
3894 break;
3895 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3896 N1C = dyn_cast<ConstantSDNode>(N1);
3897 if (!N1C)
3898 break;
3899 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900 SDValue N2 = N0.getOperand(1);
3901 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3902 if (!N2C)
3903 break;
3904 unsigned N1CVal = N1C->getZExtValue();
3905 unsigned N2CVal = N2C->getZExtValue();
3906 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3907 (N1CVal & 0xffffU) == 0xffffU &&
3908 (N2CVal & 0xffffU) == 0x0U) {
3909 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3910 dl, MVT::i32);
3911 SDValue Ops[] = { N0.getOperand(0), Imm16,
3912 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3914 return;
3915 }
3916 }
3917
3918 break;
3919 }
3920 case ARMISD::UMAAL: {
3921 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3923 N->getOperand(2), N->getOperand(3),
3924 getAL(CurDAG, dl),
3925 CurDAG->getRegister(0, MVT::i32) };
3926 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3927 return;
3928 }
3929 case ARMISD::UMLAL:{
3930 if (Subtarget->isThumb()) {
3931 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3932 N->getOperand(3), getAL(CurDAG, dl),
3933 CurDAG->getRegister(0, MVT::i32)};
3934 ReplaceNode(
3935 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3936 return;
3937 }else{
3938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3939 N->getOperand(3), getAL(CurDAG, dl),
3940 CurDAG->getRegister(0, MVT::i32),
3941 CurDAG->getRegister(0, MVT::i32) };
3942 ReplaceNode(N, CurDAG->getMachineNode(
3943 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944 MVT::i32, MVT::i32, Ops));
3945 return;
3946 }
3947 }
3948 case ARMISD::SMLAL:{
3949 if (Subtarget->isThumb()) {
3950 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3951 N->getOperand(3), getAL(CurDAG, dl),
3952 CurDAG->getRegister(0, MVT::i32)};
3953 ReplaceNode(
3954 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3955 return;
3956 }else{
3957 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3958 N->getOperand(3), getAL(CurDAG, dl),
3959 CurDAG->getRegister(0, MVT::i32),
3960 CurDAG->getRegister(0, MVT::i32) };
3961 ReplaceNode(N, CurDAG->getMachineNode(
3962 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963 MVT::i32, MVT::i32, Ops));
3964 return;
3965 }
3966 }
3967 case ARMISD::SUBE: {
3968 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3969 break;
3970 // Look for a pattern to match SMMLS
3971 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3973 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3974 !SDValue(N, 1).use_empty())
3975 break;
3976
3977 if (Subtarget->isThumb())
3978 assert(Subtarget->hasThumb2() &&
3979 "This pattern should not be generated for Thumb");
3980
3981 SDValue SmulLoHi = N->getOperand(1);
3982 SDValue Subc = N->getOperand(2);
3983 SDValue Zero = Subc.getOperand(0);
3984
3985 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3986 N->getOperand(1) != SmulLoHi.getValue(1) ||
3987 N->getOperand(2) != Subc.getValue(1))
3988 break;
3989
3990 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3992 N->getOperand(0), getAL(CurDAG, dl),
3993 CurDAG->getRegister(0, MVT::i32) };
3994 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3995 return;
3996 }
3997 case ISD::LOAD: {
3998 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999 return;
4000 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001 if (tryT2IndexedLoad(N))
4002 return;
4003 } else if (Subtarget->isThumb()) {
4004 if (tryT1IndexedLoad(N))
4005 return;
4006 } else if (tryARMIndexedLoad(N))
4007 return;
4008 // Other cases are autogenerated.
4009 break;
4010 }
4011 case ISD::MLOAD:
4012 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013 return;
4014 // Other cases are autogenerated.
4015 break;
4016 case ARMISD::WLSSETUP: {
4017 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4018 N->getOperand(0));
4019 ReplaceUses(N, New);
4020 CurDAG->RemoveDeadNode(N);
4021 return;
4022 }
4023 case ARMISD::WLS: {
4024 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4025 N->getOperand(1), N->getOperand(2),
4026 N->getOperand(0));
4027 ReplaceUses(N, New);
4028 CurDAG->RemoveDeadNode(N);
4029 return;
4030 }
4031 case ARMISD::LE: {
4032 SDValue Ops[] = { N->getOperand(1),
4033 N->getOperand(2),
4034 N->getOperand(0) };
4035 unsigned Opc = ARM::t2LoopEnd;
4036 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4037 ReplaceUses(N, New);
4038 CurDAG->RemoveDeadNode(N);
4039 return;
4040 }
4041 case ARMISD::LDRD: {
4042 if (Subtarget->isThumb2())
4043 break; // TableGen handles isel in this case.
4044 SDValue Base, RegOffset, ImmOffset;
4045 const SDValue &Chain = N->getOperand(0);
4046 const SDValue &Addr = N->getOperand(1);
4047 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4048 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4049 // The register-offset variant of LDRD mandates that the register
4050 // allocated to RegOffset is not reused in any of the remaining operands.
4051 // This restriction is currently not enforced. Therefore emitting this
4052 // variant is explicitly avoided.
4053 Base = Addr;
4054 RegOffset = CurDAG->getRegister(0, MVT::i32);
4055 }
4056 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4058 {MVT::Untyped, MVT::Other}, Ops);
4059 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4060 SDValue(New, 0));
4061 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4062 SDValue(New, 0));
4063 transferMemOperands(N, New);
4064 ReplaceUses(SDValue(N, 0), Lo);
4065 ReplaceUses(SDValue(N, 1), Hi);
4066 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4067 CurDAG->RemoveDeadNode(N);
4068 return;
4069 }
4070 case ARMISD::STRD: {
4071 if (Subtarget->isThumb2())
4072 break; // TableGen handles isel in this case.
4073 SDValue Base, RegOffset, ImmOffset;
4074 const SDValue &Chain = N->getOperand(0);
4075 const SDValue &Addr = N->getOperand(3);
4076 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4077 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4078 // The register-offset variant of STRD mandates that the register
4079 // allocated to RegOffset is not reused in any of the remaining operands.
4080 // This restriction is currently not enforced. Therefore emitting this
4081 // variant is explicitly avoided.
4082 Base = Addr;
4083 RegOffset = CurDAG->getRegister(0, MVT::i32);
4084 }
4085 SDNode *RegPair =
4086 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4087 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4088 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4089 transferMemOperands(N, New);
4090 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4091 CurDAG->RemoveDeadNode(N);
4092 return;
4093 }
4094 case ARMISD::LOOP_DEC: {
4095 SDValue Ops[] = { N->getOperand(1),
4096 N->getOperand(2),
4097 N->getOperand(0) };
4098 SDNode *Dec =
4099 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4100 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4101 ReplaceUses(N, Dec);
4102 CurDAG->RemoveDeadNode(N);
4103 return;
4104 }
4105 case ARMISD::BRCOND: {
4106 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108 // Pattern complexity = 6 cost = 1 size = 0
4109
4110 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112 // Pattern complexity = 6 cost = 1 size = 0
4113
4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116 // Pattern complexity = 6 cost = 1 size = 0
4117
4118 unsigned Opc = Subtarget->isThumb() ?
4119 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120 SDValue Chain = N->getOperand(0);
4121 SDValue N1 = N->getOperand(1);
4122 SDValue N2 = N->getOperand(2);
4123 SDValue Flags = N->getOperand(3);
4126
4127 unsigned CC = (unsigned)N2->getAsZExtVal();
4128
4129 if (Flags.getOpcode() == ARMISD::CMPZ) {
4130 if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4131 SDValue Int = Flags.getOperand(0);
4132 uint64_t ID = Int->getConstantOperandVal(1);
4133
4134 // Handle low-overhead loops.
4135 if (ID == Intrinsic::loop_decrement_reg) {
4136 SDValue Elements = Int.getOperand(2);
4137 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4138 dl, MVT::i32);
4139
4140 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4141 SDNode *LoopDec =
4142 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4143 CurDAG->getVTList(MVT::i32, MVT::Other),
4144 Args);
4145 ReplaceUses(Int.getNode(), LoopDec);
4146
4147 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4148 SDNode *LoopEnd =
4149 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4150
4151 ReplaceUses(N, LoopEnd);
4152 CurDAG->RemoveDeadNode(N);
4153 CurDAG->RemoveDeadNode(Flags.getNode());
4154 CurDAG->RemoveDeadNode(Int.getNode());
4155 return;
4156 }
4157 }
4158
4159 bool SwitchEQNEToPLMI;
4160 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4161 Flags = N->getOperand(3);
4162
4163 if (SwitchEQNEToPLMI) {
4164 switch ((ARMCC::CondCodes)CC) {
4165 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4166 case ARMCC::NE:
4168 break;
4169 case ARMCC::EQ:
4171 break;
4172 }
4173 }
4174 }
4175
4176 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4177 Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
4178 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
4179 Chain.getValue(1)};
4180 CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
4181 return;
4182 }
4183
4184 case ARMISD::CMPZ: {
4185 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4186 // This allows us to avoid materializing the expensive negative constant.
4187 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4188 // it for its flags output.
4189 SDValue X = N->getOperand(0);
4190 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4191 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4192 int64_t Addend = -C->getSExtValue();
4193
4194 SDNode *Add = nullptr;
4195 // ADDS can be better than CMN if the immediate fits in a
4196 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4197 // Outside that range we can just use a CMN which is 32-bit but has a
4198 // 12-bit immediate range.
4199 if (Addend < 1<<8) {
4200 if (Subtarget->isThumb2()) {
4201 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4202 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4203 CurDAG->getRegister(0, MVT::i32) };
4204 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4205 } else {
4206 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4207 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4208 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4209 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4210 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4211 }
4212 }
4213 if (Add) {
4214 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4215 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
4216 }
4217 }
4218 // Other cases are autogenerated.
4219 break;
4220 }
4221
4222 case ARMISD::CMOV: {
4223 SDValue Flags = N->getOperand(3);
4224
4225 if (Flags.getOpcode() == ARMISD::CMPZ) {
4226 bool SwitchEQNEToPLMI;
4227 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4228
4229 if (SwitchEQNEToPLMI) {
4230 SDValue ARMcc = N->getOperand(2);
4232
4233 switch (CC) {
4234 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4235 case ARMCC::NE:
4236 CC = ARMCC::MI;
4237 break;
4238 case ARMCC::EQ:
4239 CC = ARMCC::PL;
4240 break;
4241 }
4242 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4243 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4244 N->getOperand(3)};
4245 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4246 }
4247 }
4248 // Other cases are autogenerated.
4249 break;
4250 }
4251 case ARMISD::VZIP: {
4252 EVT VT = N->getValueType(0);
4253 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4254 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4255 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4256 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4257 SDValue Pred = getAL(CurDAG, dl);
4258 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4259 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4260 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4261 return;
4262 }
4263 case ARMISD::VUZP: {
4264 EVT VT = N->getValueType(0);
4265 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4266 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4267 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4268 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4269 SDValue Pred = getAL(CurDAG, dl);
4270 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4271 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4272 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4273 return;
4274 }
4275 case ARMISD::VTRN: {
4276 EVT VT = N->getValueType(0);
4277 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4278 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4279 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4280 SDValue Pred = getAL(CurDAG, dl);
4281 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4282 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4283 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4284 return;
4285 }
4286 case ARMISD::BUILD_VECTOR: {
4287 EVT VecVT = N->getValueType(0);
4288 EVT EltVT = VecVT.getVectorElementType();
4289 unsigned NumElts = VecVT.getVectorNumElements();
4290 if (EltVT == MVT::f64) {
4291 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4292 ReplaceNode(
4293 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4294 return;
4295 }
4296 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4297 if (NumElts == 2) {
4298 ReplaceNode(
4299 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4300 return;
4301 }
4302 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4303 ReplaceNode(N,
4304 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4305 N->getOperand(2), N->getOperand(3)));
4306 return;
4307 }
4308
4309 case ARMISD::VLD1DUP: {
4310 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4311 ARM::VLD1DUPd32 };
4312 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4313 ARM::VLD1DUPq32 };
4314 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4315 return;
4316 }
4317
4318 case ARMISD::VLD2DUP: {
4319 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4320 ARM::VLD2DUPd32 };
4321 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4322 return;
4323 }
4324
4325 case ARMISD::VLD3DUP: {
4326 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4327 ARM::VLD3DUPd16Pseudo,
4328 ARM::VLD3DUPd32Pseudo };
4329 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4330 return;
4331 }
4332
4333 case ARMISD::VLD4DUP: {
4334 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4335 ARM::VLD4DUPd16Pseudo,
4336 ARM::VLD4DUPd32Pseudo };
4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4338 return;
4339 }
4340
4341 case ARMISD::VLD1DUP_UPD: {
4342 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4343 ARM::VLD1DUPd16wb_fixed,
4344 ARM::VLD1DUPd32wb_fixed };
4345 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4346 ARM::VLD1DUPq16wb_fixed,
4347 ARM::VLD1DUPq32wb_fixed };
4348 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4349 return;
4350 }
4351
4352 case ARMISD::VLD2DUP_UPD: {
4353 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4354 ARM::VLD2DUPd16wb_fixed,
4355 ARM::VLD2DUPd32wb_fixed,
4356 ARM::VLD1q64wb_fixed };
4357 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4358 ARM::VLD2DUPq16EvenPseudo,
4359 ARM::VLD2DUPq32EvenPseudo };
4360 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4361 ARM::VLD2DUPq16OddPseudoWB_fixed,
4362 ARM::VLD2DUPq32OddPseudoWB_fixed };
4363 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4364 return;
4365 }
4366
4367 case ARMISD::VLD3DUP_UPD: {
4368 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4369 ARM::VLD3DUPd16Pseudo_UPD,
4370 ARM::VLD3DUPd32Pseudo_UPD,
4371 ARM::VLD1d64TPseudoWB_fixed };
4372 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4373 ARM::VLD3DUPq16EvenPseudo,
4374 ARM::VLD3DUPq32EvenPseudo };
4375 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4376 ARM::VLD3DUPq16OddPseudo_UPD,
4377 ARM::VLD3DUPq32OddPseudo_UPD };
4378 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4379 return;
4380 }
4381
4382 case ARMISD::VLD4DUP_UPD: {
4383 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4384 ARM::VLD4DUPd16Pseudo_UPD,
4385 ARM::VLD4DUPd32Pseudo_UPD,
4386 ARM::VLD1d64QPseudoWB_fixed };
4387 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4388 ARM::VLD4DUPq16EvenPseudo,
4389 ARM::VLD4DUPq32EvenPseudo };
4390 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4391 ARM::VLD4DUPq16OddPseudo_UPD,
4392 ARM::VLD4DUPq32OddPseudo_UPD };
4393 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4394 return;
4395 }
4396
4397 case ARMISD::VLD1_UPD: {
4398 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4399 ARM::VLD1d16wb_fixed,
4400 ARM::VLD1d32wb_fixed,
4401 ARM::VLD1d64wb_fixed };
4402 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4403 ARM::VLD1q16wb_fixed,
4404 ARM::VLD1q32wb_fixed,
4405 ARM::VLD1q64wb_fixed };
4406 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4407 return;
4408 }
4409
4410 case ARMISD::VLD2_UPD: {
4411 if (Subtarget->hasNEON()) {
4412 static const uint16_t DOpcodes[] = {
4413 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4414 ARM::VLD1q64wb_fixed};
4415 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4416 ARM::VLD2q16PseudoWB_fixed,
4417 ARM::VLD2q32PseudoWB_fixed};
4418 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4419 } else {
4420 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4421 ARM::MVE_VLD21_8_wb};
4422 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4423 ARM::MVE_VLD21_16_wb};
4424 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4425 ARM::MVE_VLD21_32_wb};
4426 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4427 SelectMVE_VLD(N, 2, Opcodes, true);
4428 }
4429 return;
4430 }
4431
4432 case ARMISD::VLD3_UPD: {
4433 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4434 ARM::VLD3d16Pseudo_UPD,
4435 ARM::VLD3d32Pseudo_UPD,
4436 ARM::VLD1d64TPseudoWB_fixed};
4437 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4438 ARM::VLD3q16Pseudo_UPD,
4439 ARM::VLD3q32Pseudo_UPD };
4440 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4441 ARM::VLD3q16oddPseudo_UPD,
4442 ARM::VLD3q32oddPseudo_UPD };
4443 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4444 return;
4445 }
4446
4447 case ARMISD::VLD4_UPD: {
4448 if (Subtarget->hasNEON()) {
4449 static const uint16_t DOpcodes[] = {
4450 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4451 ARM::VLD1d64QPseudoWB_fixed};
4452 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4453 ARM::VLD4q16Pseudo_UPD,
4454 ARM::VLD4q32Pseudo_UPD};
4455 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4456 ARM::VLD4q16oddPseudo_UPD,
4457 ARM::VLD4q32oddPseudo_UPD};
4458 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4459 } else {
4460 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4461 ARM::MVE_VLD42_8,
4462 ARM::MVE_VLD43_8_wb};
4463 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4464 ARM::MVE_VLD42_16,
4465 ARM::MVE_VLD43_16_wb};
4466 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4467 ARM::MVE_VLD42_32,
4468 ARM::MVE_VLD43_32_wb};
4469 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4470 SelectMVE_VLD(N, 4, Opcodes, true);
4471 }
4472 return;
4473 }
4474
4475 case ARMISD::VLD1x2_UPD: {
4476 if (Subtarget->hasNEON()) {
4477 static const uint16_t DOpcodes[] = {
4478 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4479 ARM::VLD1q64wb_fixed};
4480 static const uint16_t QOpcodes[] = {
4481 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4482 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4483 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4484 return;
4485 }
4486 break;
4487 }
4488
4489 case ARMISD::VLD1x3_UPD: {
4490 if (Subtarget->hasNEON()) {
4491 static const uint16_t DOpcodes[] = {
4492 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4493 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4494 static const uint16_t QOpcodes0[] = {
4495 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4496 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4497 static const uint16_t QOpcodes1[] = {
4498 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4499 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4500 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4501 return;
4502 }
4503 break;
4504 }
4505
4506 case ARMISD::VLD1x4_UPD: {
4507 if (Subtarget->hasNEON()) {
4508 static const uint16_t DOpcodes[] = {
4509 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4510 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4511 static const uint16_t QOpcodes0[] = {
4512 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4513 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4514 static const uint16_t QOpcodes1[] = {
4515 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4516 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4517 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4518 return;
4519 }
4520 break;
4521 }
4522
4523 case ARMISD::VLD2LN_UPD: {
4524 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4525 ARM::VLD2LNd16Pseudo_UPD,
4526 ARM::VLD2LNd32Pseudo_UPD };
4527 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4528 ARM::VLD2LNq32Pseudo_UPD };
4529 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4530 return;
4531 }
4532
4533 case ARMISD::VLD3LN_UPD: {
4534 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4535 ARM::VLD3LNd16Pseudo_UPD,
4536 ARM::VLD3LNd32Pseudo_UPD };
4537 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4538 ARM::VLD3LNq32Pseudo_UPD };
4539 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4540 return;
4541 }
4542
4543 case ARMISD::VLD4LN_UPD: {
4544 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4545 ARM::VLD4LNd16Pseudo_UPD,
4546 ARM::VLD4LNd32Pseudo_UPD };
4547 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4548 ARM::VLD4LNq32Pseudo_UPD };
4549 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4550 return;
4551 }
4552
4553 case ARMISD::VST1_UPD: {
4554 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4555 ARM::VST1d16wb_fixed,
4556 ARM::VST1d32wb_fixed,
4557 ARM::VST1d64wb_fixed };
4558 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4559 ARM::VST1q16wb_fixed,
4560 ARM::VST1q32wb_fixed,
4561 ARM::VST1q64wb_fixed };
4562 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4563 return;
4564 }
4565
4566 case ARMISD::VST2_UPD: {
4567 if (Subtarget->hasNEON()) {
4568 static const uint16_t DOpcodes[] = {
4569 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4570 ARM::VST1q64wb_fixed};
4571 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4572 ARM::VST2q16PseudoWB_fixed,
4573 ARM::VST2q32PseudoWB_fixed};
4574 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4575 return;
4576 }
4577 break;
4578 }
4579
4580 case ARMISD::VST3_UPD: {
4581 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4582 ARM::VST3d16Pseudo_UPD,
4583 ARM::VST3d32Pseudo_UPD,
4584 ARM::VST1d64TPseudoWB_fixed};
4585 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4586 ARM::VST3q16Pseudo_UPD,
4587 ARM::VST3q32Pseudo_UPD };
4588 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4589 ARM::VST3q16oddPseudo_UPD,
4590 ARM::VST3q32oddPseudo_UPD };
4591 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4592 return;
4593 }
4594
4595 case ARMISD::VST4_UPD: {
4596 if (Subtarget->hasNEON()) {
4597 static const uint16_t DOpcodes[] = {
4598 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4599 ARM::VST1d64QPseudoWB_fixed};
4600 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4601 ARM::VST4q16Pseudo_UPD,
4602 ARM::VST4q32Pseudo_UPD};
4603 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4604 ARM::VST4q16oddPseudo_UPD,
4605 ARM::VST4q32oddPseudo_UPD};
4606 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4607 return;
4608 }
4609 break;
4610 }
4611
4612 case ARMISD::VST1x2_UPD: {
4613 if (Subtarget->hasNEON()) {
4614 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4615 ARM::VST1q16wb_fixed,
4616 ARM::VST1q32wb_fixed,
4617 ARM::VST1q64wb_fixed};
4618 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4619 ARM::VST1d16QPseudoWB_fixed,
4620 ARM::VST1d32QPseudoWB_fixed,
4621 ARM::VST1d64QPseudoWB_fixed };
4622 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4623 return;
4624 }
4625 break;
4626 }
4627
4628 case ARMISD::VST1x3_UPD: {
4629 if (Subtarget->hasNEON()) {
4630 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4631 ARM::VST1d16TPseudoWB_fixed,
4632 ARM::VST1d32TPseudoWB_fixed,
4633 ARM::VST1d64TPseudoWB_fixed };
4634 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4635 ARM::VST1q16LowTPseudo_UPD,
4636 ARM::VST1q32LowTPseudo_UPD,
4637 ARM::VST1q64LowTPseudo_UPD };
4638 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4639 ARM::VST1q16HighTPseudo_UPD,
4640 ARM::VST1q32HighTPseudo_UPD,
4641 ARM::VST1q64HighTPseudo_UPD };
4642 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4643 return;
4644 }
4645 break;
4646 }
4647
4648 case ARMISD::VST1x4_UPD: {
4649 if (Subtarget->hasNEON()) {
4650 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4651 ARM::VST1d16QPseudoWB_fixed,
4652 ARM::VST1d32QPseudoWB_fixed,
4653 ARM::VST1d64QPseudoWB_fixed };
4654 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4655 ARM::VST1q16LowQPseudo_UPD,
4656 ARM::VST1q32LowQPseudo_UPD,
4657 ARM::VST1q64LowQPseudo_UPD };
4658 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4659 ARM::VST1q16HighQPseudo_UPD,
4660 ARM::VST1q32HighQPseudo_UPD,
4661 ARM::VST1q64HighQPseudo_UPD };
4662 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4663 return;
4664 }
4665 break;
4666 }
4667 case ARMISD::VST2LN_UPD: {
4668 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4669 ARM::VST2LNd16Pseudo_UPD,
4670 ARM::VST2LNd32Pseudo_UPD };
4671 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4672 ARM::VST2LNq32Pseudo_UPD };
4673 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4674 return;
4675 }
4676
4677 case ARMISD::VST3LN_UPD: {
4678 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4679 ARM::VST3LNd16Pseudo_UPD,
4680 ARM::VST3LNd32Pseudo_UPD };
4681 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4682 ARM::VST3LNq32Pseudo_UPD };
4683 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4684 return;
4685 }
4686
4687 case ARMISD::VST4LN_UPD: {
4688 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4689 ARM::VST4LNd16Pseudo_UPD,
4690 ARM::VST4LNd32Pseudo_UPD };
4691 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4692 ARM::VST4LNq32Pseudo_UPD };
4693 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4694 return;
4695 }
4696
4699 unsigned IntNo = N->getConstantOperandVal(1);
4700 switch (IntNo) {
4701 default:
4702 break;
4703
4704 case Intrinsic::arm_mrrc:
4705 case Intrinsic::arm_mrrc2: {
4706 SDLoc dl(N);
4707 SDValue Chain = N->getOperand(0);
4708 unsigned Opc;
4709
4710 if (Subtarget->isThumb())
4711 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4712 else
4713 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4714
4716 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4717 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4718 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4719
4720 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4721 // instruction will always be '1111' but it is possible in assembly language to specify
4722 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4723 if (Opc != ARM::MRRC2) {
4724 Ops.push_back(getAL(CurDAG, dl));
4725 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4726 }
4727
4728 Ops.push_back(Chain);
4729
4730 // Writes to two registers.
4731 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4732
4733 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4734 return;
4735 }
4736 case Intrinsic::arm_ldaexd:
4737 case Intrinsic::arm_ldrexd: {
4738 SDLoc dl(N);
4739 SDValue Chain = N->getOperand(0);
4740 SDValue MemAddr = N->getOperand(2);
4741 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4742
4743 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4744 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4745 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4746
4747 // arm_ldrexd returns a i64 value in {i32, i32}
4748 std::vector<EVT> ResTys;
4749 if (isThumb) {
4750 ResTys.push_back(MVT::i32);
4751 ResTys.push_back(MVT::i32);
4752 } else
4753 ResTys.push_back(MVT::Untyped);
4754 ResTys.push_back(MVT::Other);
4755
4756 // Place arguments in the right order.
4757 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4758 CurDAG->getRegister(0, MVT::i32), Chain};
4759 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4760 // Transfer memoperands.
4761 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4762 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4763
4764 // Remap uses.
4765 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4766 if (!SDValue(N, 0).use_empty()) {
4768 if (isThumb)
4769 Result = SDValue(Ld, 0);
4770 else {
4771 SDValue SubRegIdx =
4772 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4773 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4774 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4775 Result = SDValue(ResNode,0);
4776 }
4777 ReplaceUses(SDValue(N, 0), Result);
4778 }
4779 if (!SDValue(N, 1).use_empty()) {
4781 if (isThumb)
4782 Result = SDValue(Ld, 1);
4783 else {
4784 SDValue SubRegIdx =
4785 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4786 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4787 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4788 Result = SDValue(ResNode,0);
4789 }
4790 ReplaceUses(SDValue(N, 1), Result);
4791 }
4792 ReplaceUses(SDValue(N, 2), OutChain);
4793 CurDAG->RemoveDeadNode(N);
4794 return;
4795 }
4796 case Intrinsic::arm_stlexd:
4797 case Intrinsic::arm_strexd: {
4798 SDLoc dl(N);
4799 SDValue Chain = N->getOperand(0);
4800 SDValue Val0 = N->getOperand(2);
4801 SDValue Val1 = N->getOperand(3);
4802 SDValue MemAddr = N->getOperand(4);
4803
4804 // Store exclusive double return a i32 value which is the return status
4805 // of the issued store.
4806 const EVT ResTys[] = {MVT::i32, MVT::Other};
4807
4808 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4809 // Place arguments in the right order.
4811 if (isThumb) {
4812 Ops.push_back(Val0);
4813 Ops.push_back(Val1);
4814 } else
4815 // arm_strexd uses GPRPair.
4816 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4817 Ops.push_back(MemAddr);
4818 Ops.push_back(getAL(CurDAG, dl));
4819 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4820 Ops.push_back(Chain);
4821
4822 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4823 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4824 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4825
4826 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4827 // Transfer memoperands.
4828 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4829 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4830
4831 ReplaceNode(N, St);
4832 return;
4833 }
4834
4835 case Intrinsic::arm_neon_vld1: {
4836 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4837 ARM::VLD1d32, ARM::VLD1d64 };
4838 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4839 ARM::VLD1q32, ARM::VLD1q64};
4840 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4841 return;
4842 }
4843
4844 case Intrinsic::arm_neon_vld1x2: {
4845 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4846 ARM::VLD1q32, ARM::VLD1q64 };
4847 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4848 ARM::VLD1d16QPseudo,
4849 ARM::VLD1d32QPseudo,
4850 ARM::VLD1d64QPseudo };
4851 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4852 return;
4853 }
4854
4855 case Intrinsic::arm_neon_vld1x3: {
4856 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4857 ARM::VLD1d16TPseudo,
4858 ARM::VLD1d32TPseudo,
4859 ARM::VLD1d64TPseudo };
4860 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4861 ARM::VLD1q16LowTPseudo_UPD,
4862 ARM::VLD1q32LowTPseudo_UPD,
4863 ARM::VLD1q64LowTPseudo_UPD };
4864 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4865 ARM::VLD1q16HighTPseudo,
4866 ARM::VLD1q32HighTPseudo,
4867 ARM::VLD1q64HighTPseudo };
4868 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4869 return;
4870 }
4871
4872 case Intrinsic::arm_neon_vld1x4: {
4873 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4874 ARM::VLD1d16QPseudo,
4875 ARM::VLD1d32QPseudo,
4876 ARM::VLD1d64QPseudo };
4877 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4878 ARM::VLD1q16LowQPseudo_UPD,
4879 ARM::VLD1q32LowQPseudo_UPD,
4880 ARM::VLD1q64LowQPseudo_UPD };
4881 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4882 ARM::VLD1q16HighQPseudo,
4883 ARM::VLD1q32HighQPseudo,
4884 ARM::VLD1q64HighQPseudo };
4885 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4886 return;
4887 }
4888
4889 case Intrinsic::arm_neon_vld2: {
4890 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4891 ARM::VLD2d32, ARM::VLD1q64 };
4892 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4893 ARM::VLD2q32Pseudo };
4894 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4895 return;
4896 }
4897
4898 case Intrinsic::arm_neon_vld3: {
4899 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4900 ARM::VLD3d16Pseudo,
4901 ARM::VLD3d32Pseudo,
4902 ARM::VLD1d64TPseudo };
4903 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4904 ARM::VLD3q16Pseudo_UPD,
4905 ARM::VLD3q32Pseudo_UPD };
4906 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4907 ARM::VLD3q16oddPseudo,
4908 ARM::VLD3q32oddPseudo };
4909 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4910 return;
4911 }
4912
4913 case Intrinsic::arm_neon_vld4: {
4914 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4915 ARM::VLD4d16Pseudo,
4916 ARM::VLD4d32Pseudo,
4917 ARM::VLD1d64QPseudo };
4918 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4919 ARM::VLD4q16Pseudo_UPD,
4920 ARM::VLD4q32Pseudo_UPD };
4921 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4922 ARM::VLD4q16oddPseudo,
4923 ARM::VLD4q32oddPseudo };
4924 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4925 return;
4926 }
4927
4928 case Intrinsic::arm_neon_vld2dup: {
4929 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4930 ARM::VLD2DUPd32, ARM::VLD1q64 };
4931 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4932 ARM::VLD2DUPq16EvenPseudo,
4933 ARM::VLD2DUPq32EvenPseudo };
4934 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4935 ARM::VLD2DUPq16OddPseudo,
4936 ARM::VLD2DUPq32OddPseudo };
4937 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4938 DOpcodes, QOpcodes0, QOpcodes1);
4939 return;
4940 }
4941
4942 case Intrinsic::arm_neon_vld3dup: {
4943 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4944 ARM::VLD3DUPd16Pseudo,
4945 ARM::VLD3DUPd32Pseudo,
4946 ARM::VLD1d64TPseudo };
4947 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4948 ARM::VLD3DUPq16EvenPseudo,
4949 ARM::VLD3DUPq32EvenPseudo };
4950 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4951 ARM::VLD3DUPq16OddPseudo,
4952 ARM::VLD3DUPq32OddPseudo };
4953 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4954 DOpcodes, QOpcodes0, QOpcodes1);
4955 return;
4956 }
4957
4958 case Intrinsic::arm_neon_vld4dup: {
4959 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4960 ARM::VLD4DUPd16Pseudo,
4961 ARM::VLD4DUPd32Pseudo,
4962 ARM::VLD1d64QPseudo };
4963 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4964 ARM::VLD4DUPq16EvenPseudo,
4965 ARM::VLD4DUPq32EvenPseudo };
4966 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4967 ARM::VLD4DUPq16OddPseudo,
4968 ARM::VLD4DUPq32OddPseudo };
4969 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4970 DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vld2lane: {
4975 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4976 ARM::VLD2LNd16Pseudo,
4977 ARM::VLD2LNd32Pseudo };
4978 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4979 ARM::VLD2LNq32Pseudo };
4980 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4981 return;
4982 }
4983
4984 case Intrinsic::arm_neon_vld3lane: {
4985 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4986 ARM::VLD3LNd16Pseudo,
4987 ARM::VLD3LNd32Pseudo };
4988 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4989 ARM::VLD3LNq32Pseudo };
4990 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4991 return;
4992 }
4993
4994 case Intrinsic::arm_neon_vld4lane: {
4995 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4996 ARM::VLD4LNd16Pseudo,
4997 ARM::VLD4LNd32Pseudo };
4998 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4999 ARM::VLD4LNq32Pseudo };
5000 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5001 return;
5002 }
5003
5004 case Intrinsic::arm_neon_vst1: {
5005 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5006 ARM::VST1d32, ARM::VST1d64 };
5007 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5008 ARM::VST1q32, ARM::VST1q64 };
5009 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5010 return;
5011 }
5012
5013 case Intrinsic::arm_neon_vst1x2: {
5014 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5015 ARM::VST1q32, ARM::VST1q64 };
5016 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5017 ARM::VST1d16QPseudo,
5018 ARM::VST1d32QPseudo,
5019 ARM::VST1d64QPseudo };
5020 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5021 return;
5022 }
5023
5024 case Intrinsic::arm_neon_vst1x3: {
5025 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5026 ARM::VST1d16TPseudo,
5027 ARM::VST1d32TPseudo,
5028 ARM::VST1d64TPseudo };
5029 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5030 ARM::VST1q16LowTPseudo_UPD,
5031 ARM::VST1q32LowTPseudo_UPD,
5032 ARM::VST1q64LowTPseudo_UPD };
5033 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5034 ARM::VST1q16HighTPseudo,
5035 ARM::VST1q32HighTPseudo,
5036 ARM::VST1q64HighTPseudo };
5037 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5038 return;
5039 }
5040
5041 case Intrinsic::arm_neon_vst1x4: {
5042 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5043 ARM::VST1d16QPseudo,
5044 ARM::VST1d32QPseudo,
5045 ARM::VST1d64QPseudo };
5046 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5047 ARM::VST1q16LowQPseudo_UPD,
5048 ARM::VST1q32LowQPseudo_UPD,
5049 ARM::VST1q64LowQPseudo_UPD };
5050 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5051 ARM::VST1q16HighQPseudo,
5052 ARM::VST1q32HighQPseudo,
5053 ARM::VST1q64HighQPseudo };
5054 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5055 return;
5056 }
5057
5058 case Intrinsic::arm_neon_vst2: {
5059 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5060 ARM::VST2d32, ARM::VST1q64 };
5061 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5062 ARM::VST2q32Pseudo };
5063 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5064 return;
5065 }
5066
5067 case Intrinsic::arm_neon_vst3: {
5068 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5069 ARM::VST3d16Pseudo,
5070 ARM::VST3d32Pseudo,
5071 ARM::VST1d64TPseudo };
5072 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5073 ARM::VST3q16Pseudo_UPD,
5074 ARM::VST3q32Pseudo_UPD };
5075 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5076 ARM::VST3q16oddPseudo,
5077 ARM::VST3q32oddPseudo };
5078 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5079 return;
5080 }
5081
5082 case Intrinsic::arm_neon_vst4: {
5083 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5084 ARM::VST4d16Pseudo,
5085 ARM::VST4d32Pseudo,
5086 ARM::VST1d64QPseudo };
5087 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5088 ARM::VST4q16Pseudo_UPD,
5089 ARM::VST4q32Pseudo_UPD };
5090 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5091 ARM::VST4q16oddPseudo,
5092 ARM::VST4q32oddPseudo };
5093 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5094 return;
5095 }
5096
5097 case Intrinsic::arm_neon_vst2lane: {
5098 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5099 ARM::VST2LNd16Pseudo,
5100 ARM::VST2LNd32Pseudo };
5101 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5102 ARM::VST2LNq32Pseudo };
5103 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5104 return;
5105 }
5106
5107 case Intrinsic::arm_neon_vst3lane: {
5108 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5109 ARM::VST3LNd16Pseudo,
5110 ARM::VST3LNd32Pseudo };
5111 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5112 ARM::VST3LNq32Pseudo };
5113 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5114 return;
5115 }
5116
5117 case Intrinsic::arm_neon_vst4lane: {
5118 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5119 ARM::VST4LNd16Pseudo,
5120 ARM::VST4LNd32Pseudo };
5121 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5122 ARM::VST4LNq32Pseudo };
5123 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5124 return;
5125 }
5126
5127 case Intrinsic::arm_mve_vldr_gather_base_wb:
5128 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5129 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5130 ARM::MVE_VLDRDU64_qi_pre};
5131 SelectMVE_WB(N, Opcodes,
5132 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5133 return;
5134 }
5135
5136 case Intrinsic::arm_mve_vld2q: {
5137 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5138 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5139 ARM::MVE_VLD21_16};
5140 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5141 ARM::MVE_VLD21_32};
5142 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5143 SelectMVE_VLD(N, 2, Opcodes, false);
5144 return;
5145 }
5146
5147 case Intrinsic::arm_mve_vld4q: {
5148 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5149 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5150 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5151 ARM::MVE_VLD42_16,
5152 ARM::MVE_VLD43_16};
5153 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5154 ARM::MVE_VLD42_32,
5155 ARM::MVE_VLD43_32};
5156 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5157 SelectMVE_VLD(N, 4, Opcodes, false);
5158 return;
5159 }
5160 }
5161 break;
5162 }
5163
5165 unsigned IntNo = N->getConstantOperandVal(0);
5166 switch (IntNo) {
5167 default:
5168 break;
5169
5170 // Scalar f32 -> bf16
5171 case Intrinsic::arm_neon_vcvtbfp2bf: {
5172 SDLoc dl(N);
5173 const SDValue &Src = N->getOperand(1);
5174 llvm::EVT DestTy = N->getValueType(0);
5175 SDValue Pred = getAL(CurDAG, dl);
5176 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5177 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5178 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5179 return;
5180 }
5181
5182 // Vector v4f32 -> v4bf16
5183 case Intrinsic::arm_neon_vcvtfp2bf: {
5184 SDLoc dl(N);
5185 const SDValue &Src = N->getOperand(1);
5186 SDValue Pred = getAL(CurDAG, dl);
5187 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5188 SDValue Ops[] = { Src, Pred, Reg0 };
5189 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5190 return;
5191 }
5192
5193 case Intrinsic::arm_mve_urshrl:
5194 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5195 return;
5196 case Intrinsic::arm_mve_uqshll:
5197 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5198 return;
5199 case Intrinsic::arm_mve_srshrl:
5200 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5201 return;
5202 case Intrinsic::arm_mve_sqshll:
5203 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5204 return;
5205 case Intrinsic::arm_mve_uqrshll:
5206 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5207 return;
5208 case Intrinsic::arm_mve_sqrshrl:
5209 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5210 return;
5211
5212 case Intrinsic::arm_mve_vadc:
5213 case Intrinsic::arm_mve_vadc_predicated:
5214 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5215 IntNo == Intrinsic::arm_mve_vadc_predicated);
5216 return;
5217 case Intrinsic::arm_mve_vsbc:
5218 case Intrinsic::arm_mve_vsbc_predicated:
5219 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
5220 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5221 return;
5222 case Intrinsic::arm_mve_vshlc:
5223 case Intrinsic::arm_mve_vshlc_predicated:
5224 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5225 return;
5226
5227 case Intrinsic::arm_mve_vmlldava:
5228 case Intrinsic::arm_mve_vmlldava_predicated: {
5229 static const uint16_t OpcodesU[] = {
5230 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5231 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5232 };
5233 static const uint16_t OpcodesS[] = {
5234 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5235 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5236 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5237 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5238 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5239 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5240 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5241 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5242 };
5243 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5244 OpcodesS, OpcodesU);
5245 return;
5246 }
5247
5248 case Intrinsic::arm_mve_vrmlldavha:
5249 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5250 static const uint16_t OpcodesU[] = {
5251 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5252 };
5253 static const uint16_t OpcodesS[] = {
5254 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5255 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5256 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5257 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5258 };
5259 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5260 OpcodesS, OpcodesU);
5261 return;
5262 }
5263
5264 case Intrinsic::arm_mve_vidup:
5265 case Intrinsic::arm_mve_vidup_predicated: {
5266 static const uint16_t Opcodes[] = {
5267 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5268 };
5269 SelectMVE_VxDUP(N, Opcodes, false,
5270 IntNo == Intrinsic::arm_mve_vidup_predicated);
5271 return;
5272 }
5273
5274 case Intrinsic::arm_mve_vddup:
5275 case Intrinsic::arm_mve_vddup_predicated: {
5276 static const uint16_t Opcodes[] = {
5277 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5278 };
5279 SelectMVE_VxDUP(N, Opcodes, false,
5280 IntNo == Intrinsic::arm_mve_vddup_predicated);
5281 return;
5282 }
5283
5284 case Intrinsic::arm_mve_viwdup:
5285 case Intrinsic::arm_mve_viwdup_predicated: {
5286 static const uint16_t Opcodes[] = {
5287 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5288 };
5289 SelectMVE_VxDUP(N, Opcodes, true,
5290 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5291 return;
5292 }
5293
5294 case Intrinsic::arm_mve_vdwdup:
5295 case Intrinsic::arm_mve_vdwdup_predicated: {
5296 static const uint16_t Opcodes[] = {
5297 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5298 };
5299 SelectMVE_VxDUP(N, Opcodes, true,
5300 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5301 return;
5302 }
5303
5304 case Intrinsic::arm_cde_cx1d:
5305 case Intrinsic::arm_cde_cx1da:
5306 case Intrinsic::arm_cde_cx2d:
5307 case Intrinsic::arm_cde_cx2da:
5308 case Intrinsic::arm_cde_cx3d:
5309 case Intrinsic::arm_cde_cx3da: {
5310 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5311 IntNo == Intrinsic::arm_cde_cx2da ||
5312 IntNo == Intrinsic::arm_cde_cx3da;
5313 size_t NumExtraOps;
5314 uint16_t Opcode;
5315 switch (IntNo) {
5316 case Intrinsic::arm_cde_cx1d:
5317 case Intrinsic::arm_cde_cx1da:
5318 NumExtraOps = 0;
5319 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5320 break;
5321 case Intrinsic::arm_cde_cx2d:
5322 case Intrinsic::arm_cde_cx2da:
5323 NumExtraOps = 1;
5324 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5325 break;
5326 case Intrinsic::arm_cde_cx3d:
5327 case Intrinsic::arm_cde_cx3da:
5328 NumExtraOps = 2;
5329 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5330 break;
5331 default:
5332 llvm_unreachable("Unexpected opcode");
5333 }
5334 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5335 return;
5336 }
5337 }
5338 break;
5339 }
5340
5342 SelectCMP_SWAP(N);
5343 return;
5344 }
5345
5346 SelectCode(N);
5347}
5348
5349// Inspect a register string of the form
5350// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5351// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5352// and obtain the integer operands from them, adding these operands to the
5353// provided vector.
5355 SelectionDAG *CurDAG,
5356 const SDLoc &DL,
5357 std::vector<SDValue> &Ops) {
5359 RegString.split(Fields, ':');
5360
5361 if (Fields.size() > 1) {
5362 bool AllIntFields = true;
5363
5364 for (StringRef Field : Fields) {
5365 // Need to trim out leading 'cp' characters and get the integer field.
5366 unsigned IntField;
5367 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5368 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5369 }
5370
5371 assert(AllIntFields &&
5372 "Unexpected non-integer value in special register string.");
5373 (void)AllIntFields;
5374 }
5375}
5376
5377// Maps a Banked Register string to its mask value. The mask value returned is
5378// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5379// mask operand, which expresses which register is to be used, e.g. r8, and in
5380// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5381// was invalid.
5382static inline int getBankedRegisterMask(StringRef RegString) {
5383 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5384 if (!TheReg)
5385 return -1;
5386 return TheReg->Encoding;
5387}
5388
5389// The flags here are common to those allowed for apsr in the A class cores and
5390// those allowed for the special registers in the M class cores. Returns a
5391// value representing which flags were present, -1 if invalid.
5392static inline int getMClassFlagsMask(StringRef Flags) {
5393 return StringSwitch<int>(Flags)
5394 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5395 // correct when flags are not permitted
5396 .Case("g", 0x1)
5397 .Case("nzcvq", 0x2)
5398 .Case("nzcvqg", 0x3)
5399 .Default(-1);
5400}
5401
5402// Maps MClass special registers string to its value for use in the
5403// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5404// Returns -1 to signify that the string was invalid.
5405static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5406 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5407 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5408 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5409 return -1;
5410 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5411}
5412
5414 // The mask operand contains the special register (R Bit) in bit 4, whether
5415 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5416 // bits 3-0 contains the fields to be accessed in the special register, set by
5417 // the flags provided with the register.
5418 int Mask = 0;
5419 if (Reg == "apsr") {
5420 // The flags permitted for apsr are the same flags that are allowed in
5421 // M class registers. We get the flag value and then shift the flags into
5422 // the correct place to combine with the mask.
5423 Mask = getMClassFlagsMask(Flags);
5424 if (Mask == -1)
5425 return -1;
5426 return Mask << 2;
5427 }
5428
5429 if (Reg != "cpsr" && Reg != "spsr") {
5430 return -1;
5431 }
5432
5433 // This is the same as if the flags were "fc"
5434 if (Flags.empty() || Flags == "all")
5435 return Mask | 0x9;
5436
5437 // Inspect the supplied flags string and set the bits in the mask for
5438 // the relevant and valid flags allowed for cpsr and spsr.
5439 for (char Flag : Flags) {
5440 int FlagVal;
5441 switch (Flag) {
5442 case 'c':
5443 FlagVal = 0x1;
5444 break;
5445 case 'x':
5446 FlagVal = 0x2;
5447 break;
5448 case 's':
5449 FlagVal = 0x4;
5450 break;
5451 case 'f':
5452 FlagVal = 0x8;
5453 break;
5454 default:
5455 FlagVal = 0;
5456 }
5457
5458 // This avoids allowing strings where the same flag bit appears twice.
5459 if (!FlagVal || (Mask & FlagVal))
5460 return -1;
5461 Mask |= FlagVal;
5462 }
5463
5464 // If the register is spsr then we need to set the R bit.
5465 if (Reg == "spsr")
5466 Mask |= 0x10;
5467
5468 return Mask;
5469}
5470
5471// Lower the read_register intrinsic to ARM specific DAG nodes
5472// using the supplied metadata string to select the instruction node to use
5473// and the registers/masks to construct as operands for the node.
5474bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5475 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5476 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5477 bool IsThumb2 = Subtarget->isThumb2();
5478 SDLoc DL(N);
5479
5480 std::vector<SDValue> Ops;
5481 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5482
5483 if (!Ops.empty()) {
5484 // If the special register string was constructed of fields (as defined
5485 // in the ACLE) then need to lower to MRC node (32 bit) or
5486 // MRRC node(64 bit), we can make the distinction based on the number of
5487 // operands we have.
5488 unsigned Opcode;
5489 SmallVector<EVT, 3> ResTypes;
5490 if (Ops.size() == 5){
5491 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5492 ResTypes.append({ MVT::i32, MVT::Other });
5493 } else {
5494 assert(Ops.size() == 3 &&
5495 "Invalid number of fields in special register string.");
5496 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5497 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5498 }
5499
5500 Ops.push_back(getAL(CurDAG, DL));
5501 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5502 Ops.push_back(N->getOperand(0));
5503 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5504 return true;
5505 }
5506
5507 std::string SpecialReg = RegString->getString().lower();
5508
5509 int BankedReg = getBankedRegisterMask(SpecialReg);
5510 if (BankedReg != -1) {
5511 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5512 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5513 N->getOperand(0) };
5514 ReplaceNode(
5515 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5516 DL, MVT::i32, MVT::Other, Ops));
5517 return true;
5518 }
5519
5520 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5521 // corresponding to the register that is being read from. So we switch on the
5522 // string to find which opcode we need to use.
5523 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5524 .Case("fpscr", ARM::VMRS)
5525 .Case("fpexc", ARM::VMRS_FPEXC)
5526 .Case("fpsid", ARM::VMRS_FPSID)
5527 .Case("mvfr0", ARM::VMRS_MVFR0)
5528 .Case("mvfr1", ARM::VMRS_MVFR1)
5529 .Case("mvfr2", ARM::VMRS_MVFR2)
5530 .Case("fpinst", ARM::VMRS_FPINST)
5531 .Case("fpinst2", ARM::VMRS_FPINST2)
5532 .Default(0);
5533
5534 // If an opcode was found then we can lower the read to a VFP instruction.
5535 if (Opcode) {
5536 if (!Subtarget->hasVFP2Base())
5537 return false;
5538 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5539 return false;
5540
5541 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5542 N->getOperand(0) };
5543 ReplaceNode(N,
5544 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5545 return true;
5546 }
5547
5548 // If the target is M Class then need to validate that the register string
5549 // is an acceptable value, so check that a mask can be constructed from the
5550 // string.
5551 if (Subtarget->isMClass()) {
5552 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5553 if (SYSmValue == -1)
5554 return false;
5555
5556 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5557 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5558 N->getOperand(0) };
5559 ReplaceNode(
5560 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5561 return true;
5562 }
5563
5564 // Here we know the target is not M Class so we need to check if it is one
5565 // of the remaining possible values which are apsr, cpsr or spsr.
5566 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5567 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5568 N->getOperand(0) };
5569 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5570 DL, MVT::i32, MVT::Other, Ops));
5571 return true;
5572 }
5573
5574 if (SpecialReg == "spsr") {
5575 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5576 N->getOperand(0) };
5577 ReplaceNode(
5578 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5579 MVT::i32, MVT::Other, Ops));
5580 return true;
5581 }
5582
5583 return false;
5584}
5585
5586// Lower the write_register intrinsic to ARM specific DAG nodes
5587// using the supplied metadata string to select the instruction node to use
5588// and the registers/masks to use in the nodes
5589bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5590 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5591 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5592 bool IsThumb2 = Subtarget->isThumb2();
5593 SDLoc DL(N);
5594
5595 std::vector<SDValue> Ops;
5596 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5597
5598 if (!Ops.empty()) {
5599 // If the special register string was constructed of fields (as defined
5600 // in the ACLE) then need to lower to MCR node (32 bit) or
5601 // MCRR node(64 bit), we can make the distinction based on the number of
5602 // operands we have.
5603 unsigned Opcode;
5604 if (Ops.size() == 5) {
5605 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5606 Ops.insert(Ops.begin()+2, N->getOperand(2));
5607 } else {
5608 assert(Ops.size() == 3 &&
5609 "Invalid number of fields in special register string.");
5610 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5611 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5612 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5613 }
5614
5615 Ops.push_back(getAL(CurDAG, DL));
5616 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5617 Ops.push_back(N->getOperand(0));
5618
5619 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5620 return true;
5621 }
5622
5623 std::string SpecialReg = RegString->getString().lower();
5624 int BankedReg = getBankedRegisterMask(SpecialReg);
5625 if (BankedReg != -1) {
5626 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5627 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5628 N->getOperand(0) };
5629 ReplaceNode(
5630 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5631 DL, MVT::Other, Ops));
5632 return true;
5633 }
5634
5635 // The VFP registers are written to by creating SelectionDAG nodes with
5636 // opcodes corresponding to the register that is being written. So we switch
5637 // on the string to find which opcode we need to use.
5638 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5639 .Case("fpscr", ARM::VMSR)
5640 .Case("fpexc", ARM::VMSR_FPEXC)
5641 .Case("fpsid", ARM::VMSR_FPSID)
5642 .Case("fpinst", ARM::VMSR_FPINST)
5643 .Case("fpinst2", ARM::VMSR_FPINST2)
5644 .Default(0);
5645
5646 if (Opcode) {
5647 if (!Subtarget->hasVFP2Base())
5648 return false;
5649 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5650 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5651 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5652 return true;
5653 }
5654
5655 std::pair<StringRef, StringRef> Fields;
5656 Fields = StringRef(SpecialReg).rsplit('_');
5657 std::string Reg = Fields.first.str();
5658 StringRef Flags = Fields.second;
5659
5660 // If the target was M Class then need to validate the special register value
5661 // and retrieve the mask for use in the instruction node.
5662 if (Subtarget->isMClass()) {
5663 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5664 if (SYSmValue == -1)
5665 return false;
5666
5667 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5668 N->getOperand(2), getAL(CurDAG, DL),
5669 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5670 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5671 return true;
5672 }
5673
5674 // We then check to see if a valid mask can be constructed for one of the
5675 // register string values permitted for the A and R class cores. These values
5676 // are apsr, spsr and cpsr; these are also valid on older cores.
5677 int Mask = getARClassRegisterMask(Reg, Flags);
5678 if (Mask != -1) {
5679 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5680 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5681 N->getOperand(0) };
5682 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5683 DL, MVT::Other, Ops));
5684 return true;
5685 }
5686
5687 return false;
5688}
5689
5690bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5691 std::vector<SDValue> AsmNodeOperands;
5693 bool Changed = false;
5694 unsigned NumOps = N->getNumOperands();
5695
5696 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5697 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5698 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5699 // respectively. Since there is no constraint to explicitly specify a
5700 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5701 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5702 // them into a GPRPair.
5703
5704 SDLoc dl(N);
5705 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5706
5707 SmallVector<bool, 8> OpChanged;
5708 // Glue node will be appended late.
5709 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5710 SDValue op = N->getOperand(i);
5711 AsmNodeOperands.push_back(op);
5712
5714 continue;
5715
5716 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5717 Flag = InlineAsm::Flag(C->getZExtValue());
5718 else
5719 continue;
5720
5721 // Immediate operands to inline asm in the SelectionDAG are modeled with
5722 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5723 // the second is a constant with the value of the immediate. If we get here
5724 // and we have a Kind::Imm, skip the next operand, and continue.
5725 if (Flag.isImmKind()) {
5726 SDValue op = N->getOperand(++i);
5727 AsmNodeOperands.push_back(op);
5728 continue;
5729 }
5730
5731 const unsigned NumRegs = Flag.getNumOperandRegisters();
5732 if (NumRegs)
5733 OpChanged.push_back(false);
5734
5735 unsigned DefIdx = 0;
5736 bool IsTiedToChangedOp = false;
5737 // If it's a use that is tied with a previous def, it has no
5738 // reg class constraint.
5739 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5740 IsTiedToChangedOp = OpChanged[DefIdx];
5741
5742 // Memory operands to inline asm in the SelectionDAG are modeled with two
5743 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5744 // operand. If we get here and we have a Kind::Mem, skip the next operand
5745 // (so it doesn't get misinterpreted), and continue. We do this here because
5746 // it's important to update the OpChanged array correctly before moving on.
5747 if (Flag.isMemKind()) {
5748 SDValue op = N->getOperand(++i);
5749 AsmNodeOperands.push_back(op);
5750 continue;
5751 }
5752
5753 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5754 !Flag.isRegDefEarlyClobberKind())
5755 continue;
5756
5757 unsigned RC;
5758 const bool HasRC = Flag.hasRegClassConstraint(RC);
5759 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5760 || NumRegs != 2)
5761 continue;
5762
5763 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5764 SDValue V0 = N->getOperand(i+1);
5765 SDValue V1 = N->getOperand(i+2);
5766 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5767 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5768 SDValue PairedReg;
5770
5771 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5772 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5773 // the original GPRs.
5774
5775 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5776 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5777 SDValue Chain = SDValue(N,0);
5778
5779 SDNode *GU = N->getGluedUser();
5780 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5781 Chain.getValue(1));
5782
5783 // Extract values from a GPRPair reg and copy to the original GPR reg.
5784 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5785 RegCopy);
5786 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5787 RegCopy);
5788 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5789 RegCopy.getValue(1));
5790 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5791
5792 // Update the original glue user.
5793 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5794 Ops.push_back(T1.getValue(1));
5795 CurDAG->UpdateNodeOperands(GU, Ops);
5796 } else {
5797 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5798 // GPRPair and then pass the GPRPair to the inline asm.
5799 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5800
5801 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5802 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5803 Chain.getValue(1));
5804 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5805 T0.getValue(1));
5806 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5807
5808 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5809 // i32 VRs of inline asm with it.
5810 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5811 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5812 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5813
5814 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5815 Glue = Chain.getValue(1);
5816 }
5817
5818 Changed = true;
5819
5820 if(PairedReg.getNode()) {
5821 OpChanged[OpChanged.size() -1 ] = true;
5822 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5823 if (IsTiedToChangedOp)
5824 Flag.setMatchingOp(DefIdx);
5825 else
5826 Flag.setRegClass(ARM::GPRPairRegClassID);
5827 // Replace the current flag.
5828 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5829 Flag, dl, MVT::i32);
5830 // Add the new register node and skip the original two GPRs.
5831 AsmNodeOperands.push_back(PairedReg);
5832 // Skip the next two GPRs.
5833 i += 2;
5834 }
5835 }
5836
5837 if (Glue.getNode())
5838 AsmNodeOperands.push_back(Glue);
5839 if (!Changed)
5840 return false;
5841
5842 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5843 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5844 New->setNodeId(-1);
5845 ReplaceNode(N, New.getNode());
5846 return true;
5847}
5848
5849bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5850 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5851 std::vector<SDValue> &OutOps) {
5852 switch(ConstraintID) {
5853 default:
5854 llvm_unreachable("Unexpected asm memory constraint");
5855 case InlineAsm::ConstraintCode::m:
5856 case InlineAsm::ConstraintCode::o:
5857 case InlineAsm::ConstraintCode::Q:
5858 case InlineAsm::ConstraintCode::Um:
5859 case InlineAsm::ConstraintCode::Un:
5860 case InlineAsm::ConstraintCode::Uq:
5861 case InlineAsm::ConstraintCode::Us:
5862 case InlineAsm::ConstraintCode::Ut:
5863 case InlineAsm::ConstraintCode::Uv:
5864 case InlineAsm::ConstraintCode::Uy:
5865 // Require the address to be in a register. That is safe for all ARM
5866 // variants and it is hard to do anything much smarter without knowing
5867 // how the operand is used.
5868 OutOps.push_back(Op);
5869 return false;
5870 }
5871 return true;
5872}
5873
5874/// createARMISelDag - This pass converts a legalized DAG into a
5875/// ARM-specific DAG, ready for instruction scheduling.
5876///
5878 CodeGenOptLevel OptLevel) {
5879 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5880}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
#define PASS_NAME
#define DEBUG_TYPE
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define op(i)
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
bool isSwift() const
Definition: ARMSubtarget.h:292
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:311
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isLikeA9() const
Definition: ARMSubtarget.h:297
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isLittle() const
Definition: ARMSubtarget.h:435
bool isMClass() const
Definition: ARMSubtarget.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
Base class for LoadSDNode and StoreSDNode.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
An instruction for reading from memory.
Definition: Instructions.h:176
This class is used to represent ISD::LOAD nodes.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MLOAD node.
This is an abstract virtual class for memory operations.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
size_t size() const
Definition: SmallVector.h:78
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
std::string lower() const
Definition: StringRef.cpp:113
std::pair< StringRef, StringRef > rsplit(StringRef Separator) const
Split into two substrings around the last occurrence of a separator string.
Definition: StringRef.h:733
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1168
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1165
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ NearestTiesToEven
roundTiesToEven.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const