LLVM 19.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Constants.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/IR/IntrinsicsARM.h"
33#include "llvm/IR/LLVMContext.h"
35#include "llvm/Support/Debug.h"
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "arm-isel"
43#define PASS_NAME "ARM Instruction Selection"
44
45static cl::opt<bool>
46DisableShifterOp("disable-shifter-op", cl::Hidden,
47 cl::desc("Disable isel of shifter-op"),
48 cl::init(false));
49
50//===--------------------------------------------------------------------===//
51/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52/// instructions for SelectionDAG operations.
53///
54namespace {
55
56class ARMDAGToDAGISel : public SelectionDAGISel {
57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58 /// make the right decision when generating code for different targets.
59 const ARMSubtarget *Subtarget;
60
61public:
62 ARMDAGToDAGISel() = delete;
63
64 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
65 : SelectionDAGISel(tm, OptLevel) {}
66
67 bool runOnMachineFunction(MachineFunction &MF) override {
68 // Reset the subtarget each time through.
69 Subtarget = &MF.getSubtarget<ARMSubtarget>();
71 return true;
72 }
73
74 void PreprocessISelDAG() override;
75
76 /// getI32Imm - Return a target constant of type i32 with the specified
77 /// value.
78 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
79 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
80 }
81
82 void Select(SDNode *N) override;
83
84 /// Return true as some complex patterns, like those that call
85 /// canExtractShiftFromMul can modify the DAG inplace.
86 bool ComplexPatternFuncMutatesDAG() const override { return true; }
87
88 bool hasNoVMLxHazardUse(SDNode *N) const;
89 bool isShifterOpProfitable(const SDValue &Shift,
90 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
91 bool SelectRegShifterOperand(SDValue N, SDValue &A,
92 SDValue &B, SDValue &C,
93 bool CheckProfitability = true);
94 bool SelectImmShifterOperand(SDValue N, SDValue &A,
95 SDValue &B, bool CheckProfitability = true);
96 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
97 SDValue &C) {
98 // Don't apply the profitability check
99 return SelectRegShifterOperand(N, A, B, C, false);
100 }
101 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
102 // Don't apply the profitability check
103 return SelectImmShifterOperand(N, A, B, false);
104 }
105 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
106 if (!N.hasOneUse())
107 return false;
108 return SelectImmShifterOperand(N, A, B, false);
109 }
110
111 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
112
113 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
115
116 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
117 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
118 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
119 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
120 return true;
121 }
122
123 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
126 SDValue &Offset, SDValue &Opc);
127 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
128 SDValue &Offset, SDValue &Opc);
129 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
130 bool SelectAddrMode3(SDValue N, SDValue &Base,
131 SDValue &Offset, SDValue &Opc);
132 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
133 SDValue &Offset, SDValue &Opc);
134 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
135 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
137 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
138 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
139
140 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
141
142 // Thumb Addressing Modes:
143 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
144 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
145 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
146 SDValue &OffImm);
147 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
150 SDValue &OffImm);
151 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
152 SDValue &OffImm);
153 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
154 template <unsigned Shift>
155 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
156
157 // Thumb 2 Addressing Modes:
158 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
159 template <unsigned Shift>
160 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
161 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
162 SDValue &OffImm);
163 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
164 SDValue &OffImm);
165 template <unsigned Shift>
166 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
167 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
168 unsigned Shift);
169 template <unsigned Shift>
170 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
171 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
172 SDValue &OffReg, SDValue &ShImm);
173 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174
175 template<int Min, int Max>
176 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
177
178 inline bool is_so_imm(unsigned Imm) const {
179 return ARM_AM::getSOImmVal(Imm) != -1;
180 }
181
182 inline bool is_so_imm_not(unsigned Imm) const {
183 return ARM_AM::getSOImmVal(~Imm) != -1;
184 }
185
186 inline bool is_t2_so_imm(unsigned Imm) const {
187 return ARM_AM::getT2SOImmVal(Imm) != -1;
188 }
189
190 inline bool is_t2_so_imm_not(unsigned Imm) const {
191 return ARM_AM::getT2SOImmVal(~Imm) != -1;
192 }
193
194 // Include the pieces autogenerated from the target description.
195#include "ARMGenDAGISel.inc"
196
197private:
198 void transferMemOperands(SDNode *Src, SDNode *Dst);
199
200 /// Indexed (pre/post inc/dec) load matching code for ARM.
201 bool tryARMIndexedLoad(SDNode *N);
202 bool tryT1IndexedLoad(SDNode *N);
203 bool tryT2IndexedLoad(SDNode *N);
204 bool tryMVEIndexedLoad(SDNode *N);
205 bool tryFMULFixed(SDNode *N, SDLoc dl);
206 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
207 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
208 bool IsUnsigned,
209 bool FixedToFloat);
210
211 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
212 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
213 /// loads of D registers and even subregs and odd subregs of Q registers.
214 /// For NumVecs <= 2, QOpcodes1 is not used.
215 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
216 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
217 const uint16_t *QOpcodes1);
218
219 /// SelectVST - Select NEON store intrinsics. NumVecs should
220 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
221 /// stores of D registers and even subregs and odd subregs of Q registers.
222 /// For NumVecs <= 2, QOpcodes1 is not used.
223 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
224 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
225 const uint16_t *QOpcodes1);
226
227 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
228 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
229 /// load/store of D registers and Q registers.
230 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
231 unsigned NumVecs, const uint16_t *DOpcodes,
232 const uint16_t *QOpcodes);
233
234 /// Helper functions for setting up clusters of MVE predication operands.
235 template <typename SDValueVector>
236 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
237 SDValue PredicateMask);
238 template <typename SDValueVector>
239 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
240 SDValue PredicateMask, SDValue Inactive);
241
242 template <typename SDValueVector>
243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
244 template <typename SDValueVector>
245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
246
247 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
248 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
249
250 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
251 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
252 bool HasSaturationOperand);
253
254 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
255 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
256 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
257
258 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
259 /// vector lanes.
260 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
261
262 /// Select long MVE vector reductions with two vector operands
263 /// Stride is the number of vector element widths the instruction can operate
264 /// on:
265 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
266 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
267 /// Stride is used when addressing the OpcodesS array which contains multiple
268 /// opcodes for each element width.
269 /// TySize is the index into the list of element types listed above
270 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
271 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
272 size_t Stride, size_t TySize);
273
274 /// Select a 64-bit MVE vector reduction with two vector operands
275 /// arm_mve_vmlldava_[predicated]
276 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
277 const uint16_t *OpcodesU);
278 /// Select a 72-bit MVE vector rounding reduction with two vector operands
279 /// int_arm_mve_vrmlldavha[_predicated]
280 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
281 const uint16_t *OpcodesU);
282
283 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
284 /// should be 2 or 4. The opcode array specifies the instructions
285 /// used for 8, 16 and 32-bit lane sizes respectively, and each
286 /// pointer points to a set of NumVecs sub-opcodes used for the
287 /// different stages (e.g. VLD20 versus VLD21) of each load family.
288 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
289 const uint16_t *const *Opcodes, bool HasWriteback);
290
291 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
292 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
293 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
294 bool Wrapping, bool Predicated);
295
296 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
297 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
298 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
299 /// the accumulator and the immediate operand, i.e. 0
300 /// for CX1*, 1 for CX2*, 2 for CX3*
301 /// \arg \c HasAccum whether the instruction has an accumulator operand
302 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
303 bool HasAccum);
304
305 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
306 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
307 /// for loading D registers.
308 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
309 unsigned NumVecs, const uint16_t *DOpcodes,
310 const uint16_t *QOpcodes0 = nullptr,
311 const uint16_t *QOpcodes1 = nullptr);
312
313 /// Try to select SBFX/UBFX instructions for ARM.
314 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
315
316 bool tryInsertVectorElt(SDNode *N);
317
318 // Select special operations if node forms integer ABS pattern
319 bool tryABSOp(SDNode *N);
320
321 bool tryReadRegister(SDNode *N);
322 bool tryWriteRegister(SDNode *N);
323
324 bool tryInlineAsm(SDNode *N);
325
326 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
327
328 void SelectCMP_SWAP(SDNode *N);
329
330 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
331 /// inline asm expressions.
333 InlineAsm::ConstraintCode ConstraintID,
334 std::vector<SDValue> &OutOps) override;
335
336 // Form pairs of consecutive R, S, D, or Q registers.
338 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
339 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
340 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
341
342 // Form sequences of 4 consecutive S, D, or Q registers.
343 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
344 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
345 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346
347 // Get the alignment operand for a NEON VLD or VST instruction.
348 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
349 bool is64BitVector);
350
351 /// Checks if N is a multiplication by a constant where we can extract out a
352 /// power of two from the constant so that it can be used in a shift, but only
353 /// if it simplifies the materialization of the constant. Returns true if it
354 /// is, and assigns to PowerOfTwo the power of two that should be extracted
355 /// out and to NewMulConst the new constant to be multiplied by.
356 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
357 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
358
359 /// Replace N with M in CurDAG, in a way that also ensures that M gets
360 /// selected when N would have been selected.
361 void replaceDAGValue(const SDValue &N, SDValue M);
362};
363
364class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
365public:
366 static char ID;
367 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
369 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
370};
371}
372
373char ARMDAGToDAGISelLegacy::ID = 0;
374
375INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
376
377/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
378/// operand. If so Imm will receive the 32-bit value.
379static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
380 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
381 Imm = N->getAsZExtVal();
382 return true;
383 }
384 return false;
385}
386
387// isInt32Immediate - This method tests to see if a constant operand.
388// If so Imm will receive the 32 bit value.
389static bool isInt32Immediate(SDValue N, unsigned &Imm) {
390 return isInt32Immediate(N.getNode(), Imm);
391}
392
393// isOpcWithIntImmediate - This method tests to see if the node is a specific
394// opcode and that it has a immediate integer right operand.
395// If so Imm will receive the 32 bit value.
396static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
397 return N->getOpcode() == Opc &&
398 isInt32Immediate(N->getOperand(1).getNode(), Imm);
399}
400
401/// Check whether a particular node is a constant value representable as
402/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
403///
404/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
405static bool isScaledConstantInRange(SDValue Node, int Scale,
406 int RangeMin, int RangeMax,
407 int &ScaledConstant) {
408 assert(Scale > 0 && "Invalid scale!");
409
410 // Check that this is a constant.
411 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
412 if (!C)
413 return false;
414
415 ScaledConstant = (int) C->getZExtValue();
416 if ((ScaledConstant % Scale) != 0)
417 return false;
418
419 ScaledConstant /= Scale;
420 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
421}
422
423void ARMDAGToDAGISel::PreprocessISelDAG() {
424 if (!Subtarget->hasV6T2Ops())
425 return;
426
427 bool isThumb2 = Subtarget->isThumb();
428 // We use make_early_inc_range to avoid invalidation issues.
429 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
430 if (N.getOpcode() != ISD::ADD)
431 continue;
432
433 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
434 // leading zeros, followed by consecutive set bits, followed by 1 or 2
435 // trailing zeros, e.g. 1020.
436 // Transform the expression to
437 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
438 // of trailing zeros of c2. The left shift would be folded as an shifter
439 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
440 // node (UBFX).
441
442 SDValue N0 = N.getOperand(0);
443 SDValue N1 = N.getOperand(1);
444 unsigned And_imm = 0;
445 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
446 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
447 std::swap(N0, N1);
448 }
449 if (!And_imm)
450 continue;
451
452 // Check if the AND mask is an immediate of the form: 000.....1111111100
453 unsigned TZ = llvm::countr_zero(And_imm);
454 if (TZ != 1 && TZ != 2)
455 // Be conservative here. Shifter operands aren't always free. e.g. On
456 // Swift, left shifter operand of 1 / 2 for free but others are not.
457 // e.g.
458 // ubfx r3, r1, #16, #8
459 // ldr.w r3, [r0, r3, lsl #2]
460 // vs.
461 // mov.w r9, #1020
462 // and.w r2, r9, r1, lsr #14
463 // ldr r2, [r0, r2]
464 continue;
465 And_imm >>= TZ;
466 if (And_imm & (And_imm + 1))
467 continue;
468
469 // Look for (and (srl X, c1), c2).
470 SDValue Srl = N1.getOperand(0);
471 unsigned Srl_imm = 0;
472 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
473 (Srl_imm <= 2))
474 continue;
475
476 // Make sure first operand is not a shifter operand which would prevent
477 // folding of the left shift.
478 SDValue CPTmp0;
479 SDValue CPTmp1;
480 SDValue CPTmp2;
481 if (isThumb2) {
482 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
483 continue;
484 } else {
485 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
486 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
487 continue;
488 }
489
490 // Now make the transformation.
491 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
492 Srl.getOperand(0),
493 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
494 MVT::i32));
495 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
496 Srl,
497 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
498 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
499 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
500 CurDAG->UpdateNodeOperands(&N, N0, N1);
501 }
502}
503
504/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
505/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
506/// least on current ARM implementations) which should be avoidded.
507bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
508 if (OptLevel == CodeGenOptLevel::None)
509 return true;
510
511 if (!Subtarget->hasVMLxHazards())
512 return true;
513
514 if (!N->hasOneUse())
515 return false;
516
517 SDNode *Use = *N->use_begin();
518 if (Use->getOpcode() == ISD::CopyToReg)
519 return true;
520 if (Use->isMachineOpcode()) {
521 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
522 CurDAG->getSubtarget().getInstrInfo());
523
524 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
525 if (MCID.mayStore())
526 return true;
527 unsigned Opcode = MCID.getOpcode();
528 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
529 return true;
530 // vmlx feeding into another vmlx. We actually want to unfold
531 // the use later in the MLxExpansion pass. e.g.
532 // vmla
533 // vmla (stall 8 cycles)
534 //
535 // vmul (5 cycles)
536 // vadd (5 cycles)
537 // vmla
538 // This adds up to about 18 - 19 cycles.
539 //
540 // vmla
541 // vmul (stall 4 cycles)
542 // vadd adds up to about 14 cycles.
543 return TII->isFpMLxInstruction(Opcode);
544 }
545
546 return false;
547}
548
549bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
550 ARM_AM::ShiftOpc ShOpcVal,
551 unsigned ShAmt) {
552 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
553 return true;
554 if (Shift.hasOneUse())
555 return true;
556 // R << 2 is free.
557 return ShOpcVal == ARM_AM::lsl &&
558 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
559}
560
561bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
562 unsigned MaxShift,
563 unsigned &PowerOfTwo,
564 SDValue &NewMulConst) const {
565 assert(N.getOpcode() == ISD::MUL);
566 assert(MaxShift > 0);
567
568 // If the multiply is used in more than one place then changing the constant
569 // will make other uses incorrect, so don't.
570 if (!N.hasOneUse()) return false;
571 // Check if the multiply is by a constant
572 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
573 if (!MulConst) return false;
574 // If the constant is used in more than one place then modifying it will mean
575 // we need to materialize two constants instead of one, which is a bad idea.
576 if (!MulConst->hasOneUse()) return false;
577 unsigned MulConstVal = MulConst->getZExtValue();
578 if (MulConstVal == 0) return false;
579
580 // Find the largest power of 2 that MulConstVal is a multiple of
581 PowerOfTwo = MaxShift;
582 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
583 --PowerOfTwo;
584 if (PowerOfTwo == 0) return false;
585 }
586
587 // Only optimise if the new cost is better
588 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
589 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
590 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
591 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
592 return NewCost < OldCost;
593}
594
595void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
596 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
597 ReplaceUses(N, M);
598}
599
600bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
601 SDValue &BaseReg,
602 SDValue &Opc,
603 bool CheckProfitability) {
605 return false;
606
607 // If N is a multiply-by-constant and it's profitable to extract a shift and
608 // use it in a shifted operand do so.
609 if (N.getOpcode() == ISD::MUL) {
610 unsigned PowerOfTwo = 0;
611 SDValue NewMulConst;
612 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
613 HandleSDNode Handle(N);
614 SDLoc Loc(N);
615 replaceDAGValue(N.getOperand(1), NewMulConst);
616 BaseReg = Handle.getValue();
617 Opc = CurDAG->getTargetConstant(
618 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
619 return true;
620 }
621 }
622
623 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
624
625 // Don't match base register only case. That is matched to a separate
626 // lower complexity pattern with explicit register operand.
627 if (ShOpcVal == ARM_AM::no_shift) return false;
628
629 BaseReg = N.getOperand(0);
630 unsigned ShImmVal = 0;
631 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
632 if (!RHS) return false;
633 ShImmVal = RHS->getZExtValue() & 31;
634 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
635 SDLoc(N), MVT::i32);
636 return true;
637}
638
639bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
640 SDValue &BaseReg,
641 SDValue &ShReg,
642 SDValue &Opc,
643 bool CheckProfitability) {
645 return false;
646
647 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
648
649 // Don't match base register only case. That is matched to a separate
650 // lower complexity pattern with explicit register operand.
651 if (ShOpcVal == ARM_AM::no_shift) return false;
652
653 BaseReg = N.getOperand(0);
654 unsigned ShImmVal = 0;
655 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
656 if (RHS) return false;
657
658 ShReg = N.getOperand(1);
659 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
660 return false;
661 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
662 SDLoc(N), MVT::i32);
663 return true;
664}
665
666// Determine whether an ISD::OR's operands are suitable to turn the operation
667// into an addition, which often has more compact encodings.
668bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
669 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
670 Out = N;
671 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
672}
673
674
675bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
676 SDValue &Base,
677 SDValue &OffImm) {
678 // Match simple R + imm12 operands.
679
680 // Base only.
681 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
682 !CurDAG->isBaseWithConstantOffset(N)) {
683 if (N.getOpcode() == ISD::FrameIndex) {
684 // Match frame index.
685 int FI = cast<FrameIndexSDNode>(N)->getIndex();
686 Base = CurDAG->getTargetFrameIndex(
687 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
688 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
689 return true;
690 }
691
692 if (N.getOpcode() == ARMISD::Wrapper &&
693 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
694 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
695 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
696 Base = N.getOperand(0);
697 } else
698 Base = N;
699 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
700 return true;
701 }
702
703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
704 int RHSC = (int)RHS->getSExtValue();
705 if (N.getOpcode() == ISD::SUB)
706 RHSC = -RHSC;
707
708 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
709 Base = N.getOperand(0);
710 if (Base.getOpcode() == ISD::FrameIndex) {
711 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
712 Base = CurDAG->getTargetFrameIndex(
713 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
714 }
715 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
716 return true;
717 }
718 }
719
720 // Base only.
721 Base = N;
722 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
723 return true;
724}
725
726
727
728bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
729 SDValue &Opc) {
730 if (N.getOpcode() == ISD::MUL &&
731 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
732 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
733 // X * [3,5,9] -> X + X * [2,4,8] etc.
734 int RHSC = (int)RHS->getZExtValue();
735 if (RHSC & 1) {
736 RHSC = RHSC & ~1;
738 if (RHSC < 0) {
740 RHSC = - RHSC;
741 }
742 if (isPowerOf2_32(RHSC)) {
743 unsigned ShAmt = Log2_32(RHSC);
744 Base = Offset = N.getOperand(0);
745 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
747 SDLoc(N), MVT::i32);
748 return true;
749 }
750 }
751 }
752 }
753
754 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
755 // ISD::OR that is equivalent to an ISD::ADD.
756 !CurDAG->isBaseWithConstantOffset(N))
757 return false;
758
759 // Leave simple R +/- imm12 operands for LDRi12
760 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
761 int RHSC;
762 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
763 -0x1000+1, 0x1000, RHSC)) // 12 bits.
764 return false;
765 }
766
767 // Otherwise this is R +/- [possibly shifted] R.
769 ARM_AM::ShiftOpc ShOpcVal =
770 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
771 unsigned ShAmt = 0;
772
773 Base = N.getOperand(0);
774 Offset = N.getOperand(1);
775
776 if (ShOpcVal != ARM_AM::no_shift) {
777 // Check to see if the RHS of the shift is a constant, if not, we can't fold
778 // it.
779 if (ConstantSDNode *Sh =
780 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
781 ShAmt = Sh->getZExtValue();
782 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
783 Offset = N.getOperand(1).getOperand(0);
784 else {
785 ShAmt = 0;
786 ShOpcVal = ARM_AM::no_shift;
787 }
788 } else {
789 ShOpcVal = ARM_AM::no_shift;
790 }
791 }
792
793 // Try matching (R shl C) + (R).
794 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
795 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
796 N.getOperand(0).hasOneUse())) {
797 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
798 if (ShOpcVal != ARM_AM::no_shift) {
799 // Check to see if the RHS of the shift is a constant, if not, we can't
800 // fold it.
801 if (ConstantSDNode *Sh =
802 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
803 ShAmt = Sh->getZExtValue();
804 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
805 Offset = N.getOperand(0).getOperand(0);
806 Base = N.getOperand(1);
807 } else {
808 ShAmt = 0;
809 ShOpcVal = ARM_AM::no_shift;
810 }
811 } else {
812 ShOpcVal = ARM_AM::no_shift;
813 }
814 }
815 }
816
817 // If Offset is a multiply-by-constant and it's profitable to extract a shift
818 // and use it in a shifted operand do so.
819 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
820 unsigned PowerOfTwo = 0;
821 SDValue NewMulConst;
822 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
823 HandleSDNode Handle(Offset);
824 replaceDAGValue(Offset.getOperand(1), NewMulConst);
825 Offset = Handle.getValue();
826 ShAmt = PowerOfTwo;
827 ShOpcVal = ARM_AM::lsl;
828 }
829 }
830
831 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
832 SDLoc(N), MVT::i32);
833 return true;
834}
835
836bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
837 SDValue &Offset, SDValue &Opc) {
838 unsigned Opcode = Op->getOpcode();
839 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
840 ? cast<LoadSDNode>(Op)->getAddressingMode()
841 : cast<StoreSDNode>(Op)->getAddressingMode();
844 int Val;
845 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
846 return false;
847
848 Offset = N;
849 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
850 unsigned ShAmt = 0;
851 if (ShOpcVal != ARM_AM::no_shift) {
852 // Check to see if the RHS of the shift is a constant, if not, we can't fold
853 // it.
854 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
855 ShAmt = Sh->getZExtValue();
856 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
857 Offset = N.getOperand(0);
858 else {
859 ShAmt = 0;
860 ShOpcVal = ARM_AM::no_shift;
861 }
862 } else {
863 ShOpcVal = ARM_AM::no_shift;
864 }
865 }
866
867 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
868 SDLoc(N), MVT::i32);
869 return true;
870}
871
872bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
873 SDValue &Offset, SDValue &Opc) {
874 unsigned Opcode = Op->getOpcode();
875 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
876 ? cast<LoadSDNode>(Op)->getAddressingMode()
877 : cast<StoreSDNode>(Op)->getAddressingMode();
880 int Val;
881 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
882 if (AddSub == ARM_AM::sub) Val *= -1;
883 Offset = CurDAG->getRegister(0, MVT::i32);
884 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
885 return true;
886 }
887
888 return false;
889}
890
891
892bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
893 SDValue &Offset, SDValue &Opc) {
894 unsigned Opcode = Op->getOpcode();
895 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
896 ? cast<LoadSDNode>(Op)->getAddressingMode()
897 : cast<StoreSDNode>(Op)->getAddressingMode();
900 int Val;
901 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
902 Offset = CurDAG->getRegister(0, MVT::i32);
903 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
905 SDLoc(Op), MVT::i32);
906 return true;
907 }
908
909 return false;
910}
911
912bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
913 Base = N;
914 return true;
915}
916
917bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
919 SDValue &Opc) {
920 if (N.getOpcode() == ISD::SUB) {
921 // X - C is canonicalize to X + -C, no need to handle it here.
922 Base = N.getOperand(0);
923 Offset = N.getOperand(1);
924 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
925 MVT::i32);
926 return true;
927 }
928
929 if (!CurDAG->isBaseWithConstantOffset(N)) {
930 Base = N;
931 if (N.getOpcode() == ISD::FrameIndex) {
932 int FI = cast<FrameIndexSDNode>(N)->getIndex();
933 Base = CurDAG->getTargetFrameIndex(
934 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
935 }
936 Offset = CurDAG->getRegister(0, MVT::i32);
937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
938 MVT::i32);
939 return true;
940 }
941
942 // If the RHS is +/- imm8, fold into addr mode.
943 int RHSC;
944 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
945 -256 + 1, 256, RHSC)) { // 8 bits.
946 Base = N.getOperand(0);
947 if (Base.getOpcode() == ISD::FrameIndex) {
948 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
949 Base = CurDAG->getTargetFrameIndex(
950 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
951 }
952 Offset = CurDAG->getRegister(0, MVT::i32);
953
955 if (RHSC < 0) {
957 RHSC = -RHSC;
958 }
959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
960 MVT::i32);
961 return true;
962 }
963
964 Base = N.getOperand(0);
965 Offset = N.getOperand(1);
966 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
967 MVT::i32);
968 return true;
969}
970
971bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
972 SDValue &Offset, SDValue &Opc) {
973 unsigned Opcode = Op->getOpcode();
974 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
975 ? cast<LoadSDNode>(Op)->getAddressingMode()
976 : cast<StoreSDNode>(Op)->getAddressingMode();
979 int Val;
980 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
981 Offset = CurDAG->getRegister(0, MVT::i32);
982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
983 MVT::i32);
984 return true;
985 }
986
987 Offset = N;
988 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
989 MVT::i32);
990 return true;
991}
992
993bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
994 bool FP16) {
995 if (!CurDAG->isBaseWithConstantOffset(N)) {
996 Base = N;
997 if (N.getOpcode() == ISD::FrameIndex) {
998 int FI = cast<FrameIndexSDNode>(N)->getIndex();
999 Base = CurDAG->getTargetFrameIndex(
1000 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1001 } else if (N.getOpcode() == ARMISD::Wrapper &&
1002 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1003 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1004 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1005 Base = N.getOperand(0);
1006 }
1007 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1008 SDLoc(N), MVT::i32);
1009 return true;
1010 }
1011
1012 // If the RHS is +/- imm8, fold into addr mode.
1013 int RHSC;
1014 const int Scale = FP16 ? 2 : 4;
1015
1016 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1017 Base = N.getOperand(0);
1018 if (Base.getOpcode() == ISD::FrameIndex) {
1019 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1020 Base = CurDAG->getTargetFrameIndex(
1021 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1022 }
1023
1025 if (RHSC < 0) {
1027 RHSC = -RHSC;
1028 }
1029
1030 if (FP16)
1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1032 SDLoc(N), MVT::i32);
1033 else
1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1035 SDLoc(N), MVT::i32);
1036
1037 return true;
1038 }
1039
1040 Base = N;
1041
1042 if (FP16)
1043 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1044 SDLoc(N), MVT::i32);
1045 else
1046 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1047 SDLoc(N), MVT::i32);
1048
1049 return true;
1050}
1051
1052bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1054 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1055}
1056
1057bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1059 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1060}
1061
1062bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1063 SDValue &Align) {
1064 Addr = N;
1065
1066 unsigned Alignment = 0;
1067
1068 MemSDNode *MemN = cast<MemSDNode>(Parent);
1069
1070 if (isa<LSBaseSDNode>(MemN) ||
1071 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1072 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1073 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1074 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1075 // The maximum alignment is equal to the memory size being referenced.
1076 llvm::Align MMOAlign = MemN->getAlign();
1077 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1078 if (MMOAlign.value() >= MemSize && MemSize > 1)
1079 Alignment = MemSize;
1080 } else {
1081 // All other uses of addrmode6 are for intrinsics. For now just record
1082 // the raw alignment value; it will be refined later based on the legal
1083 // alignment operands for the intrinsic.
1084 Alignment = MemN->getAlign().value();
1085 }
1086
1087 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1088 return true;
1089}
1090
1091bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1092 SDValue &Offset) {
1093 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1095 if (AM != ISD::POST_INC)
1096 return false;
1097 Offset = N;
1098 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1099 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1100 Offset = CurDAG->getRegister(0, MVT::i32);
1101 }
1102 return true;
1103}
1104
1105bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1106 SDValue &Offset, SDValue &Label) {
1107 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1108 Offset = N.getOperand(0);
1109 SDValue N1 = N.getOperand(1);
1110 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1111 return true;
1112 }
1113
1114 return false;
1115}
1116
1117
1118//===----------------------------------------------------------------------===//
1119// Thumb Addressing Modes
1120//===----------------------------------------------------------------------===//
1121
1123 // Negative numbers are difficult to materialise in thumb1. If we are
1124 // selecting the add of a negative, instead try to select ri with a zero
1125 // offset, so create the add node directly which will become a sub.
1126 if (N.getOpcode() != ISD::ADD)
1127 return false;
1128
1129 // Look for an imm which is not legal for ld/st, but is legal for sub.
1130 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1131 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1132
1133 return false;
1134}
1135
1136bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1137 SDValue &Offset) {
1138 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1139 if (!isNullConstant(N))
1140 return false;
1141
1142 Base = Offset = N;
1143 return true;
1144 }
1145
1146 Base = N.getOperand(0);
1147 Offset = N.getOperand(1);
1148 return true;
1149}
1150
1151bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1152 SDValue &Offset) {
1154 return false; // Select ri instead
1155 return SelectThumbAddrModeRRSext(N, Base, Offset);
1156}
1157
1158bool
1159ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1160 SDValue &Base, SDValue &OffImm) {
1162 Base = N;
1163 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1164 return true;
1165 }
1166
1167 if (!CurDAG->isBaseWithConstantOffset(N)) {
1168 if (N.getOpcode() == ISD::ADD) {
1169 return false; // We want to select register offset instead
1170 } else if (N.getOpcode() == ARMISD::Wrapper &&
1171 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1172 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1173 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1174 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1175 Base = N.getOperand(0);
1176 } else {
1177 Base = N;
1178 }
1179
1180 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1181 return true;
1182 }
1183
1184 // If the RHS is + imm5 * scale, fold into addr mode.
1185 int RHSC;
1186 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1187 Base = N.getOperand(0);
1188 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1189 return true;
1190 }
1191
1192 // Offset is too large, so use register offset instead.
1193 return false;
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1200}
1201
1202bool
1203ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1204 SDValue &OffImm) {
1205 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1206}
1207
1208bool
1209ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1210 SDValue &OffImm) {
1211 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1212}
1213
1214bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1215 SDValue &Base, SDValue &OffImm) {
1216 if (N.getOpcode() == ISD::FrameIndex) {
1217 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1218 // Only multiples of 4 are allowed for the offset, so the frame object
1219 // alignment must be at least 4.
1220 MachineFrameInfo &MFI = MF->getFrameInfo();
1221 if (MFI.getObjectAlign(FI) < Align(4))
1222 MFI.setObjectAlignment(FI, Align(4));
1223 Base = CurDAG->getTargetFrameIndex(
1224 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1226 return true;
1227 }
1228
1229 if (!CurDAG->isBaseWithConstantOffset(N))
1230 return false;
1231
1232 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1233 // If the RHS is + imm8 * scale, fold into addr mode.
1234 int RHSC;
1235 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1236 Base = N.getOperand(0);
1237 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1238 // Make sure the offset is inside the object, or we might fail to
1239 // allocate an emergency spill slot. (An out-of-range access is UB, but
1240 // it could show up anyway.)
1241 MachineFrameInfo &MFI = MF->getFrameInfo();
1242 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1243 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1244 // indexed by the LHS must be 4-byte aligned.
1245 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1246 MFI.setObjectAlignment(FI, Align(4));
1247 if (MFI.getObjectAlign(FI) >= Align(4)) {
1248 Base = CurDAG->getTargetFrameIndex(
1249 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1250 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1251 return true;
1252 }
1253 }
1254 }
1255 }
1256
1257 return false;
1258}
1259
1260template <unsigned Shift>
1261bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1262 SDValue &OffImm) {
1263 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1264 int RHSC;
1265 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1266 RHSC)) {
1267 Base = N.getOperand(0);
1268 if (N.getOpcode() == ISD::SUB)
1269 RHSC = -RHSC;
1270 OffImm =
1271 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1272 return true;
1273 }
1274 }
1275
1276 // Base only.
1277 Base = N;
1278 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1279 return true;
1280}
1281
1282
1283//===----------------------------------------------------------------------===//
1284// Thumb 2 Addressing Modes
1285//===----------------------------------------------------------------------===//
1286
1287
1288bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289 SDValue &Base, SDValue &OffImm) {
1290 // Match simple R + imm12 operands.
1291
1292 // Base only.
1293 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294 !CurDAG->isBaseWithConstantOffset(N)) {
1295 if (N.getOpcode() == ISD::FrameIndex) {
1296 // Match frame index.
1297 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1298 Base = CurDAG->getTargetFrameIndex(
1299 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301 return true;
1302 }
1303
1304 if (N.getOpcode() == ARMISD::Wrapper &&
1305 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1306 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1307 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1308 Base = N.getOperand(0);
1309 if (Base.getOpcode() == ISD::TargetConstantPool)
1310 return false; // We want to select t2LDRpci instead.
1311 } else
1312 Base = N;
1313 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1314 return true;
1315 }
1316
1317 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1318 if (SelectT2AddrModeImm8(N, Base, OffImm))
1319 // Let t2LDRi8 handle (R - imm8).
1320 return false;
1321
1322 int RHSC = (int)RHS->getZExtValue();
1323 if (N.getOpcode() == ISD::SUB)
1324 RHSC = -RHSC;
1325
1326 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1327 Base = N.getOperand(0);
1328 if (Base.getOpcode() == ISD::FrameIndex) {
1329 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1330 Base = CurDAG->getTargetFrameIndex(
1331 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1332 }
1333 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1334 return true;
1335 }
1336 }
1337
1338 // Base only.
1339 Base = N;
1340 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1341 return true;
1342}
1343
1344template <unsigned Shift>
1345bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1346 SDValue &OffImm) {
1347 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1348 int RHSC;
1349 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1350 Base = N.getOperand(0);
1351 if (Base.getOpcode() == ISD::FrameIndex) {
1352 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1353 Base = CurDAG->getTargetFrameIndex(
1354 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1355 }
1356
1357 if (N.getOpcode() == ISD::SUB)
1358 RHSC = -RHSC;
1359 OffImm =
1360 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1361 return true;
1362 }
1363 }
1364
1365 // Base only.
1366 Base = N;
1367 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1368 return true;
1369}
1370
1371bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1372 SDValue &Base, SDValue &OffImm) {
1373 // Match simple R - imm8 operands.
1374 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1375 !CurDAG->isBaseWithConstantOffset(N))
1376 return false;
1377
1378 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1379 int RHSC = (int)RHS->getSExtValue();
1380 if (N.getOpcode() == ISD::SUB)
1381 RHSC = -RHSC;
1382
1383 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1384 Base = N.getOperand(0);
1385 if (Base.getOpcode() == ISD::FrameIndex) {
1386 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1387 Base = CurDAG->getTargetFrameIndex(
1388 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1389 }
1390 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1391 return true;
1392 }
1393 }
1394
1395 return false;
1396}
1397
1398bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1399 SDValue &OffImm){
1400 unsigned Opcode = Op->getOpcode();
1401 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1402 ? cast<LoadSDNode>(Op)->getAddressingMode()
1403 : cast<StoreSDNode>(Op)->getAddressingMode();
1404 int RHSC;
1405 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1406 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1407 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1408 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1409 return true;
1410 }
1411
1412 return false;
1413}
1414
1415template <unsigned Shift>
1416bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1417 SDValue &OffImm) {
1418 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1419 int RHSC;
1420 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1421 RHSC)) {
1422 Base = N.getOperand(0);
1423 if (Base.getOpcode() == ISD::FrameIndex) {
1424 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1425 Base = CurDAG->getTargetFrameIndex(
1426 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1427 }
1428
1429 if (N.getOpcode() == ISD::SUB)
1430 RHSC = -RHSC;
1431 OffImm =
1432 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1433 return true;
1434 }
1435 }
1436
1437 // Base only.
1438 Base = N;
1439 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1440 return true;
1441}
1442
1443template <unsigned Shift>
1444bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1445 SDValue &OffImm) {
1446 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1447}
1448
1449bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1450 SDValue &OffImm,
1451 unsigned Shift) {
1452 unsigned Opcode = Op->getOpcode();
1454 switch (Opcode) {
1455 case ISD::LOAD:
1456 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1457 break;
1458 case ISD::STORE:
1459 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1460 break;
1461 case ISD::MLOAD:
1462 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1463 break;
1464 case ISD::MSTORE:
1465 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1466 break;
1467 default:
1468 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1469 }
1470
1471 int RHSC;
1472 // 7 bit constant, shifted by Shift.
1473 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1474 OffImm =
1475 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1476 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1477 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1478 MVT::i32);
1479 return true;
1480 }
1481 return false;
1482}
1483
1484template <int Min, int Max>
1485bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1486 int Val;
1487 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1488 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1489 return true;
1490 }
1491 return false;
1492}
1493
1494bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1495 SDValue &Base,
1496 SDValue &OffReg, SDValue &ShImm) {
1497 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1498 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1499 return false;
1500
1501 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1502 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1503 int RHSC = (int)RHS->getZExtValue();
1504 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1505 return false;
1506 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1507 return false;
1508 }
1509
1510 // Look for (R + R) or (R + (R << [1,2,3])).
1511 unsigned ShAmt = 0;
1512 Base = N.getOperand(0);
1513 OffReg = N.getOperand(1);
1514
1515 // Swap if it is ((R << c) + R).
1517 if (ShOpcVal != ARM_AM::lsl) {
1518 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1519 if (ShOpcVal == ARM_AM::lsl)
1520 std::swap(Base, OffReg);
1521 }
1522
1523 if (ShOpcVal == ARM_AM::lsl) {
1524 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1525 // it.
1526 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1527 ShAmt = Sh->getZExtValue();
1528 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1529 OffReg = OffReg.getOperand(0);
1530 else {
1531 ShAmt = 0;
1532 }
1533 }
1534 }
1535
1536 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1537 // and use it in a shifted operand do so.
1538 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1539 unsigned PowerOfTwo = 0;
1540 SDValue NewMulConst;
1541 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1542 HandleSDNode Handle(OffReg);
1543 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1544 OffReg = Handle.getValue();
1545 ShAmt = PowerOfTwo;
1546 }
1547 }
1548
1549 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1550
1551 return true;
1552}
1553
1554bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1555 SDValue &OffImm) {
1556 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1557 // instructions.
1558 Base = N;
1559 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1560
1561 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1562 return true;
1563
1564 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1565 if (!RHS)
1566 return true;
1567
1568 uint32_t RHSC = (int)RHS->getZExtValue();
1569 if (RHSC > 1020 || RHSC % 4 != 0)
1570 return true;
1571
1572 Base = N.getOperand(0);
1573 if (Base.getOpcode() == ISD::FrameIndex) {
1574 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1575 Base = CurDAG->getTargetFrameIndex(
1576 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1577 }
1578
1579 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1580 return true;
1581}
1582
1583//===--------------------------------------------------------------------===//
1584
1585/// getAL - Returns a ARMCC::AL immediate node.
1586static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1587 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1588}
1589
1590void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1591 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1592 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1593}
1594
1595bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1596 LoadSDNode *LD = cast<LoadSDNode>(N);
1597 ISD::MemIndexedMode AM = LD->getAddressingMode();
1598 if (AM == ISD::UNINDEXED)
1599 return false;
1600
1601 EVT LoadedVT = LD->getMemoryVT();
1602 SDValue Offset, AMOpc;
1603 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1604 unsigned Opcode = 0;
1605 bool Match = false;
1606 if (LoadedVT == MVT::i32 && isPre &&
1607 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1608 Opcode = ARM::LDR_PRE_IMM;
1609 Match = true;
1610 } else if (LoadedVT == MVT::i32 && !isPre &&
1611 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1612 Opcode = ARM::LDR_POST_IMM;
1613 Match = true;
1614 } else if (LoadedVT == MVT::i32 &&
1615 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1616 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1617 Match = true;
1618
1619 } else if (LoadedVT == MVT::i16 &&
1620 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1621 Match = true;
1622 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1623 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1624 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1625 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1626 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1627 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1628 Match = true;
1629 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1630 }
1631 } else {
1632 if (isPre &&
1633 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1634 Match = true;
1635 Opcode = ARM::LDRB_PRE_IMM;
1636 } else if (!isPre &&
1637 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1638 Match = true;
1639 Opcode = ARM::LDRB_POST_IMM;
1640 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1641 Match = true;
1642 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1643 }
1644 }
1645 }
1646
1647 if (Match) {
1648 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1649 SDValue Chain = LD->getChain();
1650 SDValue Base = LD->getBasePtr();
1651 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1652 CurDAG->getRegister(0, MVT::i32), Chain };
1653 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1654 MVT::Other, Ops);
1655 transferMemOperands(N, New);
1656 ReplaceNode(N, New);
1657 return true;
1658 } else {
1659 SDValue Chain = LD->getChain();
1660 SDValue Base = LD->getBasePtr();
1661 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1662 CurDAG->getRegister(0, MVT::i32), Chain };
1663 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1664 MVT::Other, Ops);
1665 transferMemOperands(N, New);
1666 ReplaceNode(N, New);
1667 return true;
1668 }
1669 }
1670
1671 return false;
1672}
1673
1674bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1675 LoadSDNode *LD = cast<LoadSDNode>(N);
1676 EVT LoadedVT = LD->getMemoryVT();
1677 ISD::MemIndexedMode AM = LD->getAddressingMode();
1678 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1679 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1680 return false;
1681
1682 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1683 if (!COffs || COffs->getZExtValue() != 4)
1684 return false;
1685
1686 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1687 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1688 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1689 // ISel.
1690 SDValue Chain = LD->getChain();
1691 SDValue Base = LD->getBasePtr();
1692 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1693 CurDAG->getRegister(0, MVT::i32), Chain };
1694 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1695 MVT::i32, MVT::Other, Ops);
1696 transferMemOperands(N, New);
1697 ReplaceNode(N, New);
1698 return true;
1699}
1700
1701bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1702 LoadSDNode *LD = cast<LoadSDNode>(N);
1703 ISD::MemIndexedMode AM = LD->getAddressingMode();
1704 if (AM == ISD::UNINDEXED)
1705 return false;
1706
1707 EVT LoadedVT = LD->getMemoryVT();
1708 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1710 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1711 unsigned Opcode = 0;
1712 bool Match = false;
1713 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1714 switch (LoadedVT.getSimpleVT().SimpleTy) {
1715 case MVT::i32:
1716 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1717 break;
1718 case MVT::i16:
1719 if (isSExtLd)
1720 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1721 else
1722 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1723 break;
1724 case MVT::i8:
1725 case MVT::i1:
1726 if (isSExtLd)
1727 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1728 else
1729 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1730 break;
1731 default:
1732 return false;
1733 }
1734 Match = true;
1735 }
1736
1737 if (Match) {
1738 SDValue Chain = LD->getChain();
1739 SDValue Base = LD->getBasePtr();
1740 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1741 CurDAG->getRegister(0, MVT::i32), Chain };
1742 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1743 MVT::Other, Ops);
1744 transferMemOperands(N, New);
1745 ReplaceNode(N, New);
1746 return true;
1747 }
1748
1749 return false;
1750}
1751
1752bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1753 EVT LoadedVT;
1754 unsigned Opcode = 0;
1755 bool isSExtLd, isPre;
1756 Align Alignment;
1757 ARMVCC::VPTCodes Pred;
1758 SDValue PredReg;
1759 SDValue Chain, Base, Offset;
1760
1761 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1762 ISD::MemIndexedMode AM = LD->getAddressingMode();
1763 if (AM == ISD::UNINDEXED)
1764 return false;
1765 LoadedVT = LD->getMemoryVT();
1766 if (!LoadedVT.isVector())
1767 return false;
1768
1769 Chain = LD->getChain();
1770 Base = LD->getBasePtr();
1771 Offset = LD->getOffset();
1772 Alignment = LD->getAlign();
1773 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1774 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1775 Pred = ARMVCC::None;
1776 PredReg = CurDAG->getRegister(0, MVT::i32);
1777 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1778 ISD::MemIndexedMode AM = LD->getAddressingMode();
1779 if (AM == ISD::UNINDEXED)
1780 return false;
1781 LoadedVT = LD->getMemoryVT();
1782 if (!LoadedVT.isVector())
1783 return false;
1784
1785 Chain = LD->getChain();
1786 Base = LD->getBasePtr();
1787 Offset = LD->getOffset();
1788 Alignment = LD->getAlign();
1789 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1790 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1791 Pred = ARMVCC::Then;
1792 PredReg = LD->getMask();
1793 } else
1794 llvm_unreachable("Expected a Load or a Masked Load!");
1795
1796 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1797 // as opposed to a vldrw.32). This can allow extra addressing modes or
1798 // alignments for what is otherwise an equivalent instruction.
1799 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1800
1801 SDValue NewOffset;
1802 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1808 } else if (LoadedVT == MVT::v8i8 &&
1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1810 if (isSExtLd)
1811 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1812 else
1813 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1814 } else if (LoadedVT == MVT::v4i8 &&
1815 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1816 if (isSExtLd)
1817 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1818 else
1819 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1820 } else if (Alignment >= Align(4) &&
1821 (CanChangeType || LoadedVT == MVT::v4i32 ||
1822 LoadedVT == MVT::v4f32) &&
1823 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1824 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1825 else if (Alignment >= Align(2) &&
1826 (CanChangeType || LoadedVT == MVT::v8i16 ||
1827 LoadedVT == MVT::v8f16) &&
1828 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1829 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1830 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1831 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1832 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1833 else
1834 return false;
1835
1836 SDValue Ops[] = {Base,
1837 NewOffset,
1838 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1839 PredReg,
1840 CurDAG->getRegister(0, MVT::i32), // tp_reg
1841 Chain};
1842 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1843 N->getValueType(0), MVT::Other, Ops);
1844 transferMemOperands(N, New);
1845 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1846 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1847 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1848 CurDAG->RemoveDeadNode(N);
1849 return true;
1850}
1851
1852/// Form a GPRPair pseudo register from a pair of GPR regs.
1853SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1854 SDLoc dl(V0.getNode());
1855 SDValue RegClass =
1856 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1857 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1858 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1859 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1860 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1861}
1862
1863/// Form a D register from a pair of S registers.
1864SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1865 SDLoc dl(V0.getNode());
1866 SDValue RegClass =
1867 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1868 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1869 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1870 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1871 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1872}
1873
1874/// Form a quad register from a pair of D registers.
1875SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1876 SDLoc dl(V0.getNode());
1877 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1878 MVT::i32);
1879 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1880 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1881 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1882 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1883}
1884
1885/// Form 4 consecutive D registers from a pair of Q registers.
1886SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1887 SDLoc dl(V0.getNode());
1888 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1889 MVT::i32);
1890 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1891 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1892 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1893 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1894}
1895
1896/// Form 4 consecutive S registers.
1897SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1898 SDValue V2, SDValue V3) {
1899 SDLoc dl(V0.getNode());
1900 SDValue RegClass =
1901 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1902 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1903 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1904 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1905 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1906 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1907 V2, SubReg2, V3, SubReg3 };
1908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1909}
1910
1911/// Form 4 consecutive D registers.
1912SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1913 SDValue V2, SDValue V3) {
1914 SDLoc dl(V0.getNode());
1915 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1916 MVT::i32);
1917 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1918 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1919 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1920 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1921 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1922 V2, SubReg2, V3, SubReg3 };
1923 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1924}
1925
1926/// Form 4 consecutive Q registers.
1927SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1928 SDValue V2, SDValue V3) {
1929 SDLoc dl(V0.getNode());
1930 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1931 MVT::i32);
1932 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1933 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1934 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1935 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1936 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1937 V2, SubReg2, V3, SubReg3 };
1938 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1939}
1940
1941/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1942/// of a NEON VLD or VST instruction. The supported values depend on the
1943/// number of registers being loaded.
1944SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1945 unsigned NumVecs, bool is64BitVector) {
1946 unsigned NumRegs = NumVecs;
1947 if (!is64BitVector && NumVecs < 3)
1948 NumRegs *= 2;
1949
1950 unsigned Alignment = Align->getAsZExtVal();
1951 if (Alignment >= 32 && NumRegs == 4)
1952 Alignment = 32;
1953 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1954 Alignment = 16;
1955 else if (Alignment >= 8)
1956 Alignment = 8;
1957 else
1958 Alignment = 0;
1959
1960 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1961}
1962
1963static bool isVLDfixed(unsigned Opc)
1964{
1965 switch (Opc) {
1966 default: return false;
1967 case ARM::VLD1d8wb_fixed : return true;
1968 case ARM::VLD1d16wb_fixed : return true;
1969 case ARM::VLD1d64Qwb_fixed : return true;
1970 case ARM::VLD1d32wb_fixed : return true;
1971 case ARM::VLD1d64wb_fixed : return true;
1972 case ARM::VLD1d8TPseudoWB_fixed : return true;
1973 case ARM::VLD1d16TPseudoWB_fixed : return true;
1974 case ARM::VLD1d32TPseudoWB_fixed : return true;
1975 case ARM::VLD1d64TPseudoWB_fixed : return true;
1976 case ARM::VLD1d8QPseudoWB_fixed : return true;
1977 case ARM::VLD1d16QPseudoWB_fixed : return true;
1978 case ARM::VLD1d32QPseudoWB_fixed : return true;
1979 case ARM::VLD1d64QPseudoWB_fixed : return true;
1980 case ARM::VLD1q8wb_fixed : return true;
1981 case ARM::VLD1q16wb_fixed : return true;
1982 case ARM::VLD1q32wb_fixed : return true;
1983 case ARM::VLD1q64wb_fixed : return true;
1984 case ARM::VLD1DUPd8wb_fixed : return true;
1985 case ARM::VLD1DUPd16wb_fixed : return true;
1986 case ARM::VLD1DUPd32wb_fixed : return true;
1987 case ARM::VLD1DUPq8wb_fixed : return true;
1988 case ARM::VLD1DUPq16wb_fixed : return true;
1989 case ARM::VLD1DUPq32wb_fixed : return true;
1990 case ARM::VLD2d8wb_fixed : return true;
1991 case ARM::VLD2d16wb_fixed : return true;
1992 case ARM::VLD2d32wb_fixed : return true;
1993 case ARM::VLD2q8PseudoWB_fixed : return true;
1994 case ARM::VLD2q16PseudoWB_fixed : return true;
1995 case ARM::VLD2q32PseudoWB_fixed : return true;
1996 case ARM::VLD2DUPd8wb_fixed : return true;
1997 case ARM::VLD2DUPd16wb_fixed : return true;
1998 case ARM::VLD2DUPd32wb_fixed : return true;
1999 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
2000 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
2001 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
2002 }
2003}
2004
2005static bool isVSTfixed(unsigned Opc)
2006{
2007 switch (Opc) {
2008 default: return false;
2009 case ARM::VST1d8wb_fixed : return true;
2010 case ARM::VST1d16wb_fixed : return true;
2011 case ARM::VST1d32wb_fixed : return true;
2012 case ARM::VST1d64wb_fixed : return true;
2013 case ARM::VST1q8wb_fixed : return true;
2014 case ARM::VST1q16wb_fixed : return true;
2015 case ARM::VST1q32wb_fixed : return true;
2016 case ARM::VST1q64wb_fixed : return true;
2017 case ARM::VST1d8TPseudoWB_fixed : return true;
2018 case ARM::VST1d16TPseudoWB_fixed : return true;
2019 case ARM::VST1d32TPseudoWB_fixed : return true;
2020 case ARM::VST1d64TPseudoWB_fixed : return true;
2021 case ARM::VST1d8QPseudoWB_fixed : return true;
2022 case ARM::VST1d16QPseudoWB_fixed : return true;
2023 case ARM::VST1d32QPseudoWB_fixed : return true;
2024 case ARM::VST1d64QPseudoWB_fixed : return true;
2025 case ARM::VST2d8wb_fixed : return true;
2026 case ARM::VST2d16wb_fixed : return true;
2027 case ARM::VST2d32wb_fixed : return true;
2028 case ARM::VST2q8PseudoWB_fixed : return true;
2029 case ARM::VST2q16PseudoWB_fixed : return true;
2030 case ARM::VST2q32PseudoWB_fixed : return true;
2031 }
2032}
2033
2034// Get the register stride update opcode of a VLD/VST instruction that
2035// is otherwise equivalent to the given fixed stride updating instruction.
2036static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2037 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2038 && "Incorrect fixed stride updating instruction.");
2039 switch (Opc) {
2040 default: break;
2041 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2042 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2043 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2044 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2045 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2046 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2047 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2048 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2049 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2050 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2051 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2052 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2053 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2054 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2055 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2056 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2057 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2058 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2059 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2060 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2061 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2062 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2063 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2064 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2065 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2066 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2067 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2068
2069 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2070 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2071 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2072 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2073 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2074 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2075 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2076 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2077 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2078 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2079 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2080 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2081 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2082 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2083 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2084 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2085
2086 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2087 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2088 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2089 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2090 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2091 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2092
2093 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2094 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2095 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2096 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2097 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2098 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2099
2100 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2101 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2102 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2103 }
2104 return Opc; // If not one we handle, return it unchanged.
2105}
2106
2107/// Returns true if the given increment is a Constant known to be equal to the
2108/// access size performed by a NEON load/store. This means the "[rN]!" form can
2109/// be used.
2110static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2111 auto C = dyn_cast<ConstantSDNode>(Inc);
2112 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2113}
2114
2115void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2116 const uint16_t *DOpcodes,
2117 const uint16_t *QOpcodes0,
2118 const uint16_t *QOpcodes1) {
2119 assert(Subtarget->hasNEON());
2120 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2121 SDLoc dl(N);
2122
2123 SDValue MemAddr, Align;
2124 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2125 // nodes are not intrinsics.
2126 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2127 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2128 return;
2129
2130 SDValue Chain = N->getOperand(0);
2131 EVT VT = N->getValueType(0);
2132 bool is64BitVector = VT.is64BitVector();
2133 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2134
2135 unsigned OpcodeIndex;
2136 switch (VT.getSimpleVT().SimpleTy) {
2137 default: llvm_unreachable("unhandled vld type");
2138 // Double-register operations:
2139 case MVT::v8i8: OpcodeIndex = 0; break;
2140 case MVT::v4f16:
2141 case MVT::v4bf16:
2142 case MVT::v4i16: OpcodeIndex = 1; break;
2143 case MVT::v2f32:
2144 case MVT::v2i32: OpcodeIndex = 2; break;
2145 case MVT::v1i64: OpcodeIndex = 3; break;
2146 // Quad-register operations:
2147 case MVT::v16i8: OpcodeIndex = 0; break;
2148 case MVT::v8f16:
2149 case MVT::v8bf16:
2150 case MVT::v8i16: OpcodeIndex = 1; break;
2151 case MVT::v4f32:
2152 case MVT::v4i32: OpcodeIndex = 2; break;
2153 case MVT::v2f64:
2154 case MVT::v2i64: OpcodeIndex = 3; break;
2155 }
2156
2157 EVT ResTy;
2158 if (NumVecs == 1)
2159 ResTy = VT;
2160 else {
2161 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2162 if (!is64BitVector)
2163 ResTyElts *= 2;
2164 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2165 }
2166 std::vector<EVT> ResTys;
2167 ResTys.push_back(ResTy);
2168 if (isUpdating)
2169 ResTys.push_back(MVT::i32);
2170 ResTys.push_back(MVT::Other);
2171
2172 SDValue Pred = getAL(CurDAG, dl);
2173 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2174 SDNode *VLd;
2176
2177 // Double registers and VLD1/VLD2 quad registers are directly supported.
2178 if (is64BitVector || NumVecs <= 2) {
2179 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2180 QOpcodes0[OpcodeIndex]);
2181 Ops.push_back(MemAddr);
2182 Ops.push_back(Align);
2183 if (isUpdating) {
2184 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2185 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2186 if (!IsImmUpdate) {
2187 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2188 // check for the opcode rather than the number of vector elements.
2189 if (isVLDfixed(Opc))
2191 Ops.push_back(Inc);
2192 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2193 // the operands if not such an opcode.
2194 } else if (!isVLDfixed(Opc))
2195 Ops.push_back(Reg0);
2196 }
2197 Ops.push_back(Pred);
2198 Ops.push_back(Reg0);
2199 Ops.push_back(Chain);
2200 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2201
2202 } else {
2203 // Otherwise, quad registers are loaded with two separate instructions,
2204 // where one loads the even registers and the other loads the odd registers.
2205 EVT AddrTy = MemAddr.getValueType();
2206
2207 // Load the even subregs. This is always an updating load, so that it
2208 // provides the address to the second load for the odd subregs.
2209 SDValue ImplDef =
2210 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2211 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2212 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2213 ResTy, AddrTy, MVT::Other, OpsA);
2214 Chain = SDValue(VLdA, 2);
2215
2216 // Load the odd subregs.
2217 Ops.push_back(SDValue(VLdA, 1));
2218 Ops.push_back(Align);
2219 if (isUpdating) {
2220 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2221 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2222 "only constant post-increment update allowed for VLD3/4");
2223 (void)Inc;
2224 Ops.push_back(Reg0);
2225 }
2226 Ops.push_back(SDValue(VLdA, 0));
2227 Ops.push_back(Pred);
2228 Ops.push_back(Reg0);
2229 Ops.push_back(Chain);
2230 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2231 }
2232
2233 // Transfer memoperands.
2234 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2236
2237 if (NumVecs == 1) {
2238 ReplaceNode(N, VLd);
2239 return;
2240 }
2241
2242 // Extract out the subregisters.
2243 SDValue SuperReg = SDValue(VLd, 0);
2244 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2245 ARM::qsub_3 == ARM::qsub_0 + 3,
2246 "Unexpected subreg numbering");
2247 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2248 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2249 ReplaceUses(SDValue(N, Vec),
2250 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2251 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2252 if (isUpdating)
2253 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2254 CurDAG->RemoveDeadNode(N);
2255}
2256
2257void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2258 const uint16_t *DOpcodes,
2259 const uint16_t *QOpcodes0,
2260 const uint16_t *QOpcodes1) {
2261 assert(Subtarget->hasNEON());
2262 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2263 SDLoc dl(N);
2264
2265 SDValue MemAddr, Align;
2266 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2267 // nodes are not intrinsics.
2268 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2269 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2270 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2271 return;
2272
2273 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2274
2275 SDValue Chain = N->getOperand(0);
2276 EVT VT = N->getOperand(Vec0Idx).getValueType();
2277 bool is64BitVector = VT.is64BitVector();
2278 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2279
2280 unsigned OpcodeIndex;
2281 switch (VT.getSimpleVT().SimpleTy) {
2282 default: llvm_unreachable("unhandled vst type");
2283 // Double-register operations:
2284 case MVT::v8i8: OpcodeIndex = 0; break;
2285 case MVT::v4f16:
2286 case MVT::v4bf16:
2287 case MVT::v4i16: OpcodeIndex = 1; break;
2288 case MVT::v2f32:
2289 case MVT::v2i32: OpcodeIndex = 2; break;
2290 case MVT::v1i64: OpcodeIndex = 3; break;
2291 // Quad-register operations:
2292 case MVT::v16i8: OpcodeIndex = 0; break;
2293 case MVT::v8f16:
2294 case MVT::v8bf16:
2295 case MVT::v8i16: OpcodeIndex = 1; break;
2296 case MVT::v4f32:
2297 case MVT::v4i32: OpcodeIndex = 2; break;
2298 case MVT::v2f64:
2299 case MVT::v2i64: OpcodeIndex = 3; break;
2300 }
2301
2302 std::vector<EVT> ResTys;
2303 if (isUpdating)
2304 ResTys.push_back(MVT::i32);
2305 ResTys.push_back(MVT::Other);
2306
2307 SDValue Pred = getAL(CurDAG, dl);
2308 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2310
2311 // Double registers and VST1/VST2 quad registers are directly supported.
2312 if (is64BitVector || NumVecs <= 2) {
2313 SDValue SrcReg;
2314 if (NumVecs == 1) {
2315 SrcReg = N->getOperand(Vec0Idx);
2316 } else if (is64BitVector) {
2317 // Form a REG_SEQUENCE to force register allocation.
2318 SDValue V0 = N->getOperand(Vec0Idx + 0);
2319 SDValue V1 = N->getOperand(Vec0Idx + 1);
2320 if (NumVecs == 2)
2321 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2322 else {
2323 SDValue V2 = N->getOperand(Vec0Idx + 2);
2324 // If it's a vst3, form a quad D-register and leave the last part as
2325 // an undef.
2326 SDValue V3 = (NumVecs == 3)
2327 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2328 : N->getOperand(Vec0Idx + 3);
2329 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2330 }
2331 } else {
2332 // Form a QQ register.
2333 SDValue Q0 = N->getOperand(Vec0Idx);
2334 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2335 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2336 }
2337
2338 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2339 QOpcodes0[OpcodeIndex]);
2340 Ops.push_back(MemAddr);
2341 Ops.push_back(Align);
2342 if (isUpdating) {
2343 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2344 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2345 if (!IsImmUpdate) {
2346 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2347 // check for the opcode rather than the number of vector elements.
2348 if (isVSTfixed(Opc))
2350 Ops.push_back(Inc);
2351 }
2352 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2353 // the operands if not such an opcode.
2354 else if (!isVSTfixed(Opc))
2355 Ops.push_back(Reg0);
2356 }
2357 Ops.push_back(SrcReg);
2358 Ops.push_back(Pred);
2359 Ops.push_back(Reg0);
2360 Ops.push_back(Chain);
2361 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2362
2363 // Transfer memoperands.
2364 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2365
2366 ReplaceNode(N, VSt);
2367 return;
2368 }
2369
2370 // Otherwise, quad registers are stored with two separate instructions,
2371 // where one stores the even registers and the other stores the odd registers.
2372
2373 // Form the QQQQ REG_SEQUENCE.
2374 SDValue V0 = N->getOperand(Vec0Idx + 0);
2375 SDValue V1 = N->getOperand(Vec0Idx + 1);
2376 SDValue V2 = N->getOperand(Vec0Idx + 2);
2377 SDValue V3 = (NumVecs == 3)
2378 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2379 : N->getOperand(Vec0Idx + 3);
2380 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2381
2382 // Store the even D registers. This is always an updating store, so that it
2383 // provides the address to the second store for the odd subregs.
2384 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2385 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2386 MemAddr.getValueType(),
2387 MVT::Other, OpsA);
2388 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2389 Chain = SDValue(VStA, 1);
2390
2391 // Store the odd D registers.
2392 Ops.push_back(SDValue(VStA, 0));
2393 Ops.push_back(Align);
2394 if (isUpdating) {
2395 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2396 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2397 "only constant post-increment update allowed for VST3/4");
2398 (void)Inc;
2399 Ops.push_back(Reg0);
2400 }
2401 Ops.push_back(RegSeq);
2402 Ops.push_back(Pred);
2403 Ops.push_back(Reg0);
2404 Ops.push_back(Chain);
2405 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2406 Ops);
2407 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2408 ReplaceNode(N, VStB);
2409}
2410
2411void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2412 unsigned NumVecs,
2413 const uint16_t *DOpcodes,
2414 const uint16_t *QOpcodes) {
2415 assert(Subtarget->hasNEON());
2416 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2417 SDLoc dl(N);
2418
2419 SDValue MemAddr, Align;
2420 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2421 // nodes are not intrinsics.
2422 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2423 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2424 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2425 return;
2426
2427 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2428
2429 SDValue Chain = N->getOperand(0);
2430 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2431 EVT VT = N->getOperand(Vec0Idx).getValueType();
2432 bool is64BitVector = VT.is64BitVector();
2433
2434 unsigned Alignment = 0;
2435 if (NumVecs != 3) {
2436 Alignment = Align->getAsZExtVal();
2437 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2438 if (Alignment > NumBytes)
2439 Alignment = NumBytes;
2440 if (Alignment < 8 && Alignment < NumBytes)
2441 Alignment = 0;
2442 // Alignment must be a power of two; make sure of that.
2443 Alignment = (Alignment & -Alignment);
2444 if (Alignment == 1)
2445 Alignment = 0;
2446 }
2447 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2448
2449 unsigned OpcodeIndex;
2450 switch (VT.getSimpleVT().SimpleTy) {
2451 default: llvm_unreachable("unhandled vld/vst lane type");
2452 // Double-register operations:
2453 case MVT::v8i8: OpcodeIndex = 0; break;
2454 case MVT::v4f16:
2455 case MVT::v4bf16:
2456 case MVT::v4i16: OpcodeIndex = 1; break;
2457 case MVT::v2f32:
2458 case MVT::v2i32: OpcodeIndex = 2; break;
2459 // Quad-register operations:
2460 case MVT::v8f16:
2461 case MVT::v8bf16:
2462 case MVT::v8i16: OpcodeIndex = 0; break;
2463 case MVT::v4f32:
2464 case MVT::v4i32: OpcodeIndex = 1; break;
2465 }
2466
2467 std::vector<EVT> ResTys;
2468 if (IsLoad) {
2469 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2470 if (!is64BitVector)
2471 ResTyElts *= 2;
2472 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2473 MVT::i64, ResTyElts));
2474 }
2475 if (isUpdating)
2476 ResTys.push_back(MVT::i32);
2477 ResTys.push_back(MVT::Other);
2478
2479 SDValue Pred = getAL(CurDAG, dl);
2480 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2481
2483 Ops.push_back(MemAddr);
2484 Ops.push_back(Align);
2485 if (isUpdating) {
2486 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2487 bool IsImmUpdate =
2488 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2489 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2490 }
2491
2492 SDValue SuperReg;
2493 SDValue V0 = N->getOperand(Vec0Idx + 0);
2494 SDValue V1 = N->getOperand(Vec0Idx + 1);
2495 if (NumVecs == 2) {
2496 if (is64BitVector)
2497 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2498 else
2499 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2500 } else {
2501 SDValue V2 = N->getOperand(Vec0Idx + 2);
2502 SDValue V3 = (NumVecs == 3)
2503 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2504 : N->getOperand(Vec0Idx + 3);
2505 if (is64BitVector)
2506 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2507 else
2508 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2509 }
2510 Ops.push_back(SuperReg);
2511 Ops.push_back(getI32Imm(Lane, dl));
2512 Ops.push_back(Pred);
2513 Ops.push_back(Reg0);
2514 Ops.push_back(Chain);
2515
2516 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2517 QOpcodes[OpcodeIndex]);
2518 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2519 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2520 if (!IsLoad) {
2521 ReplaceNode(N, VLdLn);
2522 return;
2523 }
2524
2525 // Extract the subregisters.
2526 SuperReg = SDValue(VLdLn, 0);
2527 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2528 ARM::qsub_3 == ARM::qsub_0 + 3,
2529 "Unexpected subreg numbering");
2530 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2531 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2532 ReplaceUses(SDValue(N, Vec),
2533 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2534 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2535 if (isUpdating)
2536 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2537 CurDAG->RemoveDeadNode(N);
2538}
2539
2540template <typename SDValueVector>
2541void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2542 SDValue PredicateMask) {
2543 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2544 Ops.push_back(PredicateMask);
2545 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2546}
2547
2548template <typename SDValueVector>
2549void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2550 SDValue PredicateMask,
2551 SDValue Inactive) {
2552 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2553 Ops.push_back(PredicateMask);
2554 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2555 Ops.push_back(Inactive);
2556}
2557
2558template <typename SDValueVector>
2559void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2560 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2561 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2562 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2563}
2564
2565template <typename SDValueVector>
2566void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2567 EVT InactiveTy) {
2568 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2569 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2570 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2571 Ops.push_back(SDValue(
2572 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2573}
2574
2575void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2576 bool Predicated) {
2577 SDLoc Loc(N);
2579
2580 uint16_t Opcode;
2581 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2582 case 32:
2583 Opcode = Opcodes[0];
2584 break;
2585 case 64:
2586 Opcode = Opcodes[1];
2587 break;
2588 default:
2589 llvm_unreachable("bad vector element size in SelectMVE_WB");
2590 }
2591
2592 Ops.push_back(N->getOperand(2)); // vector of base addresses
2593
2594 int32_t ImmValue = N->getConstantOperandVal(3);
2595 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2596
2597 if (Predicated)
2598 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2599 else
2600 AddEmptyMVEPredicateToOps(Ops, Loc);
2601
2602 Ops.push_back(N->getOperand(0)); // chain
2603
2605 VTs.push_back(N->getValueType(1));
2606 VTs.push_back(N->getValueType(0));
2607 VTs.push_back(N->getValueType(2));
2608
2609 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2610 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2611 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2612 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2613 transferMemOperands(N, New);
2614 CurDAG->RemoveDeadNode(N);
2615}
2616
2617void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2618 bool Immediate,
2619 bool HasSaturationOperand) {
2620 SDLoc Loc(N);
2622
2623 // Two 32-bit halves of the value to be shifted
2624 Ops.push_back(N->getOperand(1));
2625 Ops.push_back(N->getOperand(2));
2626
2627 // The shift count
2628 if (Immediate) {
2629 int32_t ImmValue = N->getConstantOperandVal(3);
2630 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2631 } else {
2632 Ops.push_back(N->getOperand(3));
2633 }
2634
2635 // The immediate saturation operand, if any
2636 if (HasSaturationOperand) {
2637 int32_t SatOp = N->getConstantOperandVal(4);
2638 int SatBit = (SatOp == 64 ? 0 : 1);
2639 Ops.push_back(getI32Imm(SatBit, Loc));
2640 }
2641
2642 // MVE scalar shifts are IT-predicable, so include the standard
2643 // predicate arguments.
2644 Ops.push_back(getAL(CurDAG, Loc));
2645 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2646
2647 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2648}
2649
2650void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2651 uint16_t OpcodeWithNoCarry,
2652 bool Add, bool Predicated) {
2653 SDLoc Loc(N);
2655 uint16_t Opcode;
2656
2657 unsigned FirstInputOp = Predicated ? 2 : 1;
2658
2659 // Two input vectors and the input carry flag
2660 Ops.push_back(N->getOperand(FirstInputOp));
2661 Ops.push_back(N->getOperand(FirstInputOp + 1));
2662 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2663 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2664 uint32_t CarryMask = 1 << 29;
2665 uint32_t CarryExpected = Add ? 0 : CarryMask;
2666 if (CarryInConstant &&
2667 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2668 Opcode = OpcodeWithNoCarry;
2669 } else {
2670 Ops.push_back(CarryIn);
2671 Opcode = OpcodeWithCarry;
2672 }
2673
2674 if (Predicated)
2675 AddMVEPredicateToOps(Ops, Loc,
2676 N->getOperand(FirstInputOp + 3), // predicate
2677 N->getOperand(FirstInputOp - 1)); // inactive
2678 else
2679 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2680
2681 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2682}
2683
2684void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2685 SDLoc Loc(N);
2687
2688 // One vector input, followed by a 32-bit word of bits to shift in
2689 // and then an immediate shift count
2690 Ops.push_back(N->getOperand(1));
2691 Ops.push_back(N->getOperand(2));
2692 int32_t ImmValue = N->getConstantOperandVal(3);
2693 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2694
2695 if (Predicated)
2696 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2697 else
2698 AddEmptyMVEPredicateToOps(Ops, Loc);
2699
2700 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2701}
2702
2703static bool SDValueToConstBool(SDValue SDVal) {
2704 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2705 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2706 uint64_t Value = SDValConstant->getZExtValue();
2707 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2708 return Value;
2709}
2710
2711void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2712 const uint16_t *OpcodesS,
2713 const uint16_t *OpcodesU,
2714 size_t Stride, size_t TySize) {
2715 assert(TySize < Stride && "Invalid TySize");
2716 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2717 bool IsSub = SDValueToConstBool(N->getOperand(2));
2718 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2719 if (IsUnsigned) {
2720 assert(!IsSub &&
2721 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2722 assert(!IsExchange &&
2723 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2724 }
2725
2726 auto OpIsZero = [N](size_t OpNo) {
2727 return isNullConstant(N->getOperand(OpNo));
2728 };
2729
2730 // If the input accumulator value is not zero, select an instruction with
2731 // accumulator, otherwise select an instruction without accumulator
2732 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2733
2734 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2735 if (IsSub)
2736 Opcodes += 4 * Stride;
2737 if (IsExchange)
2738 Opcodes += 2 * Stride;
2739 if (IsAccum)
2740 Opcodes += Stride;
2741 uint16_t Opcode = Opcodes[TySize];
2742
2743 SDLoc Loc(N);
2745 // Push the accumulator operands, if they are used
2746 if (IsAccum) {
2747 Ops.push_back(N->getOperand(4));
2748 Ops.push_back(N->getOperand(5));
2749 }
2750 // Push the two vector operands
2751 Ops.push_back(N->getOperand(6));
2752 Ops.push_back(N->getOperand(7));
2753
2754 if (Predicated)
2755 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2756 else
2757 AddEmptyMVEPredicateToOps(Ops, Loc);
2758
2759 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2760}
2761
2762void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2763 const uint16_t *OpcodesS,
2764 const uint16_t *OpcodesU) {
2765 EVT VecTy = N->getOperand(6).getValueType();
2766 size_t SizeIndex;
2767 switch (VecTy.getVectorElementType().getSizeInBits()) {
2768 case 16:
2769 SizeIndex = 0;
2770 break;
2771 case 32:
2772 SizeIndex = 1;
2773 break;
2774 default:
2775 llvm_unreachable("bad vector element size");
2776 }
2777
2778 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2779}
2780
2781void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2782 const uint16_t *OpcodesS,
2783 const uint16_t *OpcodesU) {
2784 assert(
2785 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2786 32 &&
2787 "bad vector element size");
2788 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2789}
2790
2791void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2792 const uint16_t *const *Opcodes,
2793 bool HasWriteback) {
2794 EVT VT = N->getValueType(0);
2795 SDLoc Loc(N);
2796
2797 const uint16_t *OurOpcodes;
2798 switch (VT.getVectorElementType().getSizeInBits()) {
2799 case 8:
2800 OurOpcodes = Opcodes[0];
2801 break;
2802 case 16:
2803 OurOpcodes = Opcodes[1];
2804 break;
2805 case 32:
2806 OurOpcodes = Opcodes[2];
2807 break;
2808 default:
2809 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2810 }
2811
2812 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2813 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2814 unsigned PtrOperand = HasWriteback ? 1 : 2;
2815
2816 auto Data = SDValue(
2817 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2818 SDValue Chain = N->getOperand(0);
2819 // Add a MVE_VLDn instruction for each Vec, except the last
2820 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2821 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2822 auto LoadInst =
2823 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2824 Data = SDValue(LoadInst, 0);
2825 Chain = SDValue(LoadInst, 1);
2826 transferMemOperands(N, LoadInst);
2827 }
2828 // The last may need a writeback on it
2829 if (HasWriteback)
2830 ResultTys = {DataTy, MVT::i32, MVT::Other};
2831 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2832 auto LoadInst =
2833 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2834 transferMemOperands(N, LoadInst);
2835
2836 unsigned i;
2837 for (i = 0; i < NumVecs; i++)
2838 ReplaceUses(SDValue(N, i),
2839 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2840 SDValue(LoadInst, 0)));
2841 if (HasWriteback)
2842 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2843 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2844 CurDAG->RemoveDeadNode(N);
2845}
2846
2847void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2848 bool Wrapping, bool Predicated) {
2849 EVT VT = N->getValueType(0);
2850 SDLoc Loc(N);
2851
2852 uint16_t Opcode;
2853 switch (VT.getScalarSizeInBits()) {
2854 case 8:
2855 Opcode = Opcodes[0];
2856 break;
2857 case 16:
2858 Opcode = Opcodes[1];
2859 break;
2860 case 32:
2861 Opcode = Opcodes[2];
2862 break;
2863 default:
2864 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2865 }
2866
2868 unsigned OpIdx = 1;
2869
2870 SDValue Inactive;
2871 if (Predicated)
2872 Inactive = N->getOperand(OpIdx++);
2873
2874 Ops.push_back(N->getOperand(OpIdx++)); // base
2875 if (Wrapping)
2876 Ops.push_back(N->getOperand(OpIdx++)); // limit
2877
2878 SDValue ImmOp = N->getOperand(OpIdx++); // step
2879 int ImmValue = ImmOp->getAsZExtVal();
2880 Ops.push_back(getI32Imm(ImmValue, Loc));
2881
2882 if (Predicated)
2883 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2884 else
2885 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2886
2887 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2888}
2889
2890void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2891 size_t NumExtraOps, bool HasAccum) {
2892 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2893 SDLoc Loc(N);
2895
2896 unsigned OpIdx = 1;
2897
2898 // Convert and append the immediate operand designating the coprocessor.
2899 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2900 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2901 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2902
2903 // For accumulating variants copy the low and high order parts of the
2904 // accumulator into a register pair and add it to the operand vector.
2905 if (HasAccum) {
2906 SDValue AccLo = N->getOperand(OpIdx++);
2907 SDValue AccHi = N->getOperand(OpIdx++);
2908 if (IsBigEndian)
2909 std::swap(AccLo, AccHi);
2910 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2911 }
2912
2913 // Copy extra operands as-is.
2914 for (size_t I = 0; I < NumExtraOps; I++)
2915 Ops.push_back(N->getOperand(OpIdx++));
2916
2917 // Convert and append the immediate operand
2918 SDValue Imm = N->getOperand(OpIdx);
2919 uint32_t ImmVal = Imm->getAsZExtVal();
2920 Ops.push_back(getI32Imm(ImmVal, Loc));
2921
2922 // Accumulating variants are IT-predicable, add predicate operands.
2923 if (HasAccum) {
2924 SDValue Pred = getAL(CurDAG, Loc);
2925 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2926 Ops.push_back(Pred);
2927 Ops.push_back(PredReg);
2928 }
2929
2930 // Create the CDE intruction
2931 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2932 SDValue ResultPair = SDValue(InstrNode, 0);
2933
2934 // The original intrinsic had two outputs, and the output of the dual-register
2935 // CDE instruction is a register pair. We need to extract the two subregisters
2936 // and replace all uses of the original outputs with the extracted
2937 // subregisters.
2938 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2939 if (IsBigEndian)
2940 std::swap(SubRegs[0], SubRegs[1]);
2941
2942 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2943 if (SDValue(N, ResIdx).use_empty())
2944 continue;
2945 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2946 MVT::i32, ResultPair);
2947 ReplaceUses(SDValue(N, ResIdx), SubReg);
2948 }
2949
2950 CurDAG->RemoveDeadNode(N);
2951}
2952
2953void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2954 bool isUpdating, unsigned NumVecs,
2955 const uint16_t *DOpcodes,
2956 const uint16_t *QOpcodes0,
2957 const uint16_t *QOpcodes1) {
2958 assert(Subtarget->hasNEON());
2959 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2960 SDLoc dl(N);
2961
2962 SDValue MemAddr, Align;
2963 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2964 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2965 return;
2966
2967 SDValue Chain = N->getOperand(0);
2968 EVT VT = N->getValueType(0);
2969 bool is64BitVector = VT.is64BitVector();
2970
2971 unsigned Alignment = 0;
2972 if (NumVecs != 3) {
2973 Alignment = Align->getAsZExtVal();
2974 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2975 if (Alignment > NumBytes)
2976 Alignment = NumBytes;
2977 if (Alignment < 8 && Alignment < NumBytes)
2978 Alignment = 0;
2979 // Alignment must be a power of two; make sure of that.
2980 Alignment = (Alignment & -Alignment);
2981 if (Alignment == 1)
2982 Alignment = 0;
2983 }
2984 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2985
2986 unsigned OpcodeIndex;
2987 switch (VT.getSimpleVT().SimpleTy) {
2988 default: llvm_unreachable("unhandled vld-dup type");
2989 case MVT::v8i8:
2990 case MVT::v16i8: OpcodeIndex = 0; break;
2991 case MVT::v4i16:
2992 case MVT::v8i16:
2993 case MVT::v4f16:
2994 case MVT::v8f16:
2995 case MVT::v4bf16:
2996 case MVT::v8bf16:
2997 OpcodeIndex = 1; break;
2998 case MVT::v2f32:
2999 case MVT::v2i32:
3000 case MVT::v4f32:
3001 case MVT::v4i32: OpcodeIndex = 2; break;
3002 case MVT::v1f64:
3003 case MVT::v1i64: OpcodeIndex = 3; break;
3004 }
3005
3006 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3007 if (!is64BitVector)
3008 ResTyElts *= 2;
3009 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3010
3011 std::vector<EVT> ResTys;
3012 ResTys.push_back(ResTy);
3013 if (isUpdating)
3014 ResTys.push_back(MVT::i32);
3015 ResTys.push_back(MVT::Other);
3016
3017 SDValue Pred = getAL(CurDAG, dl);
3018 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3019
3021 Ops.push_back(MemAddr);
3022 Ops.push_back(Align);
3023 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3024 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3025 : QOpcodes1[OpcodeIndex];
3026 if (isUpdating) {
3027 SDValue Inc = N->getOperand(2);
3028 bool IsImmUpdate =
3029 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3030 if (IsImmUpdate) {
3031 if (!isVLDfixed(Opc))
3032 Ops.push_back(Reg0);
3033 } else {
3034 if (isVLDfixed(Opc))
3036 Ops.push_back(Inc);
3037 }
3038 }
3039 if (is64BitVector || NumVecs == 1) {
3040 // Double registers and VLD1 quad registers are directly supported.
3041 } else {
3042 SDValue ImplDef = SDValue(
3043 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3044 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3045 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3046 MVT::Other, OpsA);
3047 Ops.push_back(SDValue(VLdA, 0));
3048 Chain = SDValue(VLdA, 1);
3049 }
3050
3051 Ops.push_back(Pred);
3052 Ops.push_back(Reg0);
3053 Ops.push_back(Chain);
3054
3055 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3056
3057 // Transfer memoperands.
3058 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3059 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3060
3061 // Extract the subregisters.
3062 if (NumVecs == 1) {
3063 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3064 } else {
3065 SDValue SuperReg = SDValue(VLdDup, 0);
3066 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3067 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3068 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3069 ReplaceUses(SDValue(N, Vec),
3070 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3071 }
3072 }
3073 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3074 if (isUpdating)
3075 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3076 CurDAG->RemoveDeadNode(N);
3077}
3078
3079bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3080 if (!Subtarget->hasMVEIntegerOps())
3081 return false;
3082
3083 SDLoc dl(N);
3084
3085 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3086 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3087 // inserts of the correct type:
3088 SDValue Ins1 = SDValue(N, 0);
3089 SDValue Ins2 = N->getOperand(0);
3090 EVT VT = Ins1.getValueType();
3091 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3092 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3093 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3094 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3095 return false;
3096
3097 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3098 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3099 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3100 return false;
3101
3102 // If the inserted values will be able to use T/B already, leave it to the
3103 // existing tablegen patterns. For example VCVTT/VCVTB.
3104 SDValue Val1 = Ins1.getOperand(1);
3105 SDValue Val2 = Ins2.getOperand(1);
3106 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3107 return false;
3108
3109 // Check if the inserted values are both extracts.
3110 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3111 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3113 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3114 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3115 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3116 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3117 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3118 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3119 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3120 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3121 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3122
3123 // If the two extracted lanes are from the same place and adjacent, this
3124 // simplifies into a f32 lane move.
3125 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3126 ExtractLane1 == ExtractLane2 + 1) {
3127 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3128 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3129 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3130 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3131 NewExt);
3132 ReplaceUses(Ins1, NewIns);
3133 return true;
3134 }
3135
3136 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3137 // extracting odd lanes.
3138 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3139 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3140 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3141 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3142 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3143 if (ExtractLane1 % 2 != 0)
3144 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3145 if (ExtractLane2 % 2 != 0)
3146 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3147 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3148 SDValue NewIns =
3149 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3150 Ins2.getOperand(0), SDValue(VINS, 0));
3151 ReplaceUses(Ins1, NewIns);
3152 return true;
3153 }
3154 }
3155
3156 // The inserted values are not extracted - if they are f16 then insert them
3157 // directly using a VINS.
3158 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3159 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3160 SDValue NewIns =
3161 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3162 Ins2.getOperand(0), SDValue(VINS, 0));
3163 ReplaceUses(Ins1, NewIns);
3164 return true;
3165 }
3166
3167 return false;
3168}
3169
3170bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3171 SDNode *FMul,
3172 bool IsUnsigned,
3173 bool FixedToFloat) {
3174 auto Type = N->getValueType(0);
3175 unsigned ScalarBits = Type.getScalarSizeInBits();
3176 if (ScalarBits > 32)
3177 return false;
3178
3179 SDNodeFlags FMulFlags = FMul->getFlags();
3180 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3181 // allowed in 16 bit unsigned floats
3182 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3183 return false;
3184
3185 SDValue ImmNode = FMul->getOperand(1);
3186 SDValue VecVal = FMul->getOperand(0);
3187 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3188 VecVal->getOpcode() == ISD::SINT_TO_FP)
3189 VecVal = VecVal->getOperand(0);
3190
3191 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3192 return false;
3193
3194 if (ImmNode.getOpcode() == ISD::BITCAST) {
3195 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3196 return false;
3197 ImmNode = ImmNode.getOperand(0);
3198 }
3199
3200 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3201 return false;
3202
3203 APFloat ImmAPF(0.0f);
3204 switch (ImmNode.getOpcode()) {
3205 case ARMISD::VMOVIMM:
3206 case ARMISD::VDUP: {
3207 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3208 return false;
3209 unsigned Imm = ImmNode.getConstantOperandVal(0);
3210 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3211 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3212 ImmAPF =
3213 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3214 APInt(ScalarBits, Imm));
3215 break;
3216 }
3217 case ARMISD::VMOVFPIMM: {
3219 break;
3220 }
3221 default:
3222 return false;
3223 }
3224
3225 // Where n is the number of fractional bits, multiplying by 2^n will convert
3226 // from float to fixed and multiplying by 2^-n will convert from fixed to
3227 // float. Taking log2 of the factor (after taking the inverse in the case of
3228 // float to fixed) will give n.
3229 APFloat ToConvert = ImmAPF;
3230 if (FixedToFloat) {
3231 if (!ImmAPF.getExactInverse(&ToConvert))
3232 return false;
3233 }
3234 APSInt Converted(64, false);
3235 bool IsExact;
3237 &IsExact);
3238 if (!IsExact || !Converted.isPowerOf2())
3239 return false;
3240
3241 unsigned FracBits = Converted.logBase2();
3242 if (FracBits > ScalarBits)
3243 return false;
3244
3246 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3247 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3248
3249 unsigned int Opcode;
3250 switch (ScalarBits) {
3251 case 16:
3252 if (FixedToFloat)
3253 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3254 else
3255 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3256 break;
3257 case 32:
3258 if (FixedToFloat)
3259 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3260 else
3261 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3262 break;
3263 default:
3264 llvm_unreachable("unexpected number of scalar bits");
3265 break;
3266 }
3267
3268 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3269 return true;
3270}
3271
3272bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3273 // Transform a floating-point to fixed-point conversion to a VCVT
3274 if (!Subtarget->hasMVEFloatOps())
3275 return false;
3276 EVT Type = N->getValueType(0);
3277 if (!Type.isVector())
3278 return false;
3279 unsigned int ScalarBits = Type.getScalarSizeInBits();
3280
3281 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3282 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3283 SDNode *Node = N->getOperand(0).getNode();
3284
3285 // floating-point to fixed-point with one fractional bit gets turned into an
3286 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3287 if (Node->getOpcode() == ISD::FADD) {
3288 if (Node->getOperand(0) != Node->getOperand(1))
3289 return false;
3290 SDNodeFlags Flags = Node->getFlags();
3291 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3292 // allowed in 16 bit unsigned floats
3293 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3294 return false;
3295
3296 unsigned Opcode;
3297 switch (ScalarBits) {
3298 case 16:
3299 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3300 break;
3301 case 32:
3302 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3303 break;
3304 }
3305 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3306 CurDAG->getConstant(1, dl, MVT::i32)};
3307 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3308
3309 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3310 return true;
3311 }
3312
3313 if (Node->getOpcode() != ISD::FMUL)
3314 return false;
3315
3316 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3317}
3318
3319bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3320 // Transform a fixed-point to floating-point conversion to a VCVT
3321 if (!Subtarget->hasMVEFloatOps())
3322 return false;
3323 auto Type = N->getValueType(0);
3324 if (!Type.isVector())
3325 return false;
3326
3327 auto LHS = N->getOperand(0);
3328 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3329 return false;
3330
3331 return transformFixedFloatingPointConversion(
3332 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3333}
3334
3335bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3336 if (!Subtarget->hasV6T2Ops())
3337 return false;
3338
3339 unsigned Opc = isSigned
3340 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3341 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3342 SDLoc dl(N);
3343
3344 // For unsigned extracts, check for a shift right and mask
3345 unsigned And_imm = 0;
3346 if (N->getOpcode() == ISD::AND) {
3347 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3348
3349 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3350 if (And_imm & (And_imm + 1))
3351 return false;
3352
3353 unsigned Srl_imm = 0;
3354 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3355 Srl_imm)) {
3356 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3357
3358 // Mask off the unnecessary bits of the AND immediate; normally
3359 // DAGCombine will do this, but that might not happen if
3360 // targetShrinkDemandedConstant chooses a different immediate.
3361 And_imm &= -1U >> Srl_imm;
3362
3363 // Note: The width operand is encoded as width-1.
3364 unsigned Width = llvm::countr_one(And_imm) - 1;
3365 unsigned LSB = Srl_imm;
3366
3367 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3368
3369 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3370 // It's cheaper to use a right shift to extract the top bits.
3371 if (Subtarget->isThumb()) {
3372 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3373 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3374 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3375 getAL(CurDAG, dl), Reg0, Reg0 };
3376 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3377 return true;
3378 }
3379
3380 // ARM models shift instructions as MOVsi with shifter operand.
3382 SDValue ShOpc =
3383 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3384 MVT::i32);
3385 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3386 getAL(CurDAG, dl), Reg0, Reg0 };
3387 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3388 return true;
3389 }
3390
3391 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3392 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3393 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3394 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3395 getAL(CurDAG, dl), Reg0 };
3396 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3397 return true;
3398 }
3399 }
3400 return false;
3401 }
3402
3403 // Otherwise, we're looking for a shift of a shift
3404 unsigned Shl_imm = 0;
3405 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3406 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3407 unsigned Srl_imm = 0;
3408 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3409 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3410 // Note: The width operand is encoded as width-1.
3411 unsigned Width = 32 - Srl_imm - 1;
3412 int LSB = Srl_imm - Shl_imm;
3413 if (LSB < 0)
3414 return false;
3415 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3416 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3417 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3418 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3419 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3420 getAL(CurDAG, dl), Reg0 };
3421 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3422 return true;
3423 }
3424 }
3425
3426 // Or we are looking for a shift of an and, with a mask operand
3427 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3428 isShiftedMask_32(And_imm)) {
3429 unsigned Srl_imm = 0;
3430 unsigned LSB = llvm::countr_zero(And_imm);
3431 // Shift must be the same as the ands lsb
3432 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3433 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3434 unsigned MSB = llvm::Log2_32(And_imm);
3435 // Note: The width operand is encoded as width-1.
3436 unsigned Width = MSB - LSB;
3437 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3438 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3439 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3440 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3441 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3442 getAL(CurDAG, dl), Reg0 };
3443 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3444 return true;
3445 }
3446 }
3447
3448 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3449 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3450 unsigned LSB = 0;
3451 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3452 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3453 return false;
3454
3455 if (LSB + Width > 32)
3456 return false;
3457
3458 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3459 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3460 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3461 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3462 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3463 getAL(CurDAG, dl), Reg0 };
3464 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3465 return true;
3466 }
3467
3468 return false;
3469}
3470
3471/// Target-specific DAG combining for ISD::SUB.
3472/// Target-independent combining lowers SELECT_CC nodes of the form
3473/// select_cc setg[ge] X, 0, X, -X
3474/// select_cc setgt X, -1, X, -X
3475/// select_cc setl[te] X, 0, -X, X
3476/// select_cc setlt X, 1, -X, X
3477/// which represent Integer ABS into:
3478/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3479/// ARM instruction selection detects the latter and matches it to
3480/// ARM::ABS or ARM::t2ABS machine node.
3481bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3482 SDValue SUBSrc0 = N->getOperand(0);
3483 SDValue SUBSrc1 = N->getOperand(1);
3484 EVT VT = N->getValueType(0);
3485
3486 if (Subtarget->isThumb1Only())
3487 return false;
3488
3489 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3490 return false;
3491
3492 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3493 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3494 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3495 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3496 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3497 EVT XType = SRASrc0.getValueType();
3498 unsigned Size = XType.getSizeInBits() - 1;
3499
3500 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3501 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3502 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3503 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3504 return true;
3505 }
3506
3507 return false;
3508}
3509
3510/// We've got special pseudo-instructions for these
3511void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3512 unsigned Opcode;
3513 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3514 if (MemTy == MVT::i8)
3515 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3516 else if (MemTy == MVT::i16)
3517 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3518 else if (MemTy == MVT::i32)
3519 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3520 else
3521 llvm_unreachable("Unknown AtomicCmpSwap type");
3522
3523 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3524 N->getOperand(0)};
3525 SDNode *CmpSwap = CurDAG->getMachineNode(
3526 Opcode, SDLoc(N),
3527 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3528
3529 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3530 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3531
3532 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3533 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3534 CurDAG->RemoveDeadNode(N);
3535}
3536
3537static std::optional<std::pair<unsigned, unsigned>>
3539 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3540 unsigned LastOne = A.countr_zero();
3541 if (A.popcount() != (FirstOne - LastOne + 1))
3542 return std::nullopt;
3543 return std::make_pair(FirstOne, LastOne);
3544}
3545
3546void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3547 assert(N->getOpcode() == ARMISD::CMPZ);
3548 SwitchEQNEToPLMI = false;
3549
3550 if (!Subtarget->isThumb())
3551 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3552 // LSR don't exist as standalone instructions - they need the barrel shifter.
3553 return;
3554
3555 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3556 SDValue And = N->getOperand(0);
3557 if (!And->hasOneUse())
3558 return;
3559
3560 SDValue Zero = N->getOperand(1);
3561 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3562 return;
3563 SDValue X = And.getOperand(0);
3564 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3565
3566 if (!C)
3567 return;
3568 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3569 if (!Range)
3570 return;
3571
3572 // There are several ways to lower this:
3573 SDNode *NewN;
3574 SDLoc dl(N);
3575
3576 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3577 if (Subtarget->isThumb2()) {
3578 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3579 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3580 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3581 CurDAG->getRegister(0, MVT::i32) };
3582 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3583 } else {
3584 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3585 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3586 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3587 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3588 }
3589 };
3590
3591 if (Range->second == 0) {
3592 // 1. Mask includes the LSB -> Simply shift the top N bits off
3593 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3594 ReplaceNode(And.getNode(), NewN);
3595 } else if (Range->first == 31) {
3596 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3597 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3598 ReplaceNode(And.getNode(), NewN);
3599 } else if (Range->first == Range->second) {
3600 // 3. Only one bit is set. We can shift this into the sign bit and use a
3601 // PL/MI comparison.
3602 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3603 ReplaceNode(And.getNode(), NewN);
3604
3605 SwitchEQNEToPLMI = true;
3606 } else if (!Subtarget->hasV6T2Ops()) {
3607 // 4. Do a double shift to clear bottom and top bits, but only in
3608 // thumb-1 mode as in thumb-2 we can use UBFX.
3609 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3610 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3611 Range->second + (31 - Range->first));
3612 ReplaceNode(And.getNode(), NewN);
3613 }
3614}
3615
3616static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3617 unsigned Opc128[3]) {
3618 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3619 "Unexpected vector shuffle length");
3620 switch (VT.getScalarSizeInBits()) {
3621 default:
3622 llvm_unreachable("Unexpected vector shuffle element size");
3623 case 8:
3624 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3625 case 16:
3626 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3627 case 32:
3628 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3629 }
3630}
3631
3632void ARMDAGToDAGISel::Select(SDNode *N) {
3633 SDLoc dl(N);
3634
3635 if (N->isMachineOpcode()) {
3636 N->setNodeId(-1);
3637 return; // Already selected.
3638 }
3639
3640 switch (N->getOpcode()) {
3641 default: break;
3642 case ISD::STORE: {
3643 // For Thumb1, match an sp-relative store in C++. This is a little
3644 // unfortunate, but I don't think I can make the chain check work
3645 // otherwise. (The chain of the store has to be the same as the chain
3646 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3647 // a direct reference to "SP".)
3648 //
3649 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3650 // a different addressing mode from other four-byte stores.
3651 //
3652 // This pattern usually comes up with call arguments.
3653 StoreSDNode *ST = cast<StoreSDNode>(N);
3654 SDValue Ptr = ST->getBasePtr();
3655 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3656 int RHSC = 0;
3657 if (Ptr.getOpcode() == ISD::ADD &&
3658 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3659 Ptr = Ptr.getOperand(0);
3660
3661 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3662 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3663 Ptr.getOperand(0) == ST->getChain()) {
3664 SDValue Ops[] = {ST->getValue(),
3665 CurDAG->getRegister(ARM::SP, MVT::i32),
3666 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3667 getAL(CurDAG, dl),
3668 CurDAG->getRegister(0, MVT::i32),
3669 ST->getChain()};
3670 MachineSDNode *ResNode =
3671 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3672 MachineMemOperand *MemOp = ST->getMemOperand();
3673 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3674 ReplaceNode(N, ResNode);
3675 return;
3676 }
3677 }
3678 break;
3679 }
3681 if (tryWriteRegister(N))
3682 return;
3683 break;
3684 case ISD::READ_REGISTER:
3685 if (tryReadRegister(N))
3686 return;
3687 break;
3688 case ISD::INLINEASM:
3689 case ISD::INLINEASM_BR:
3690 if (tryInlineAsm(N))
3691 return;
3692 break;
3693 case ISD::SUB:
3694 // Select special operations if SUB node forms integer ABS pattern
3695 if (tryABSOp(N))
3696 return;
3697 // Other cases are autogenerated.
3698 break;
3699 case ISD::Constant: {
3700 unsigned Val = N->getAsZExtVal();
3701 // If we can't materialize the constant we need to use a literal pool
3702 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3703 !Subtarget->genExecuteOnly()) {
3704 SDValue CPIdx = CurDAG->getTargetConstantPool(
3705 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3706 TLI->getPointerTy(CurDAG->getDataLayout()));
3707
3708 SDNode *ResNode;
3709 if (Subtarget->isThumb()) {
3710 SDValue Ops[] = {
3711 CPIdx,
3712 getAL(CurDAG, dl),
3713 CurDAG->getRegister(0, MVT::i32),
3714 CurDAG->getEntryNode()
3715 };
3716 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3717 Ops);
3718 } else {
3719 SDValue Ops[] = {
3720 CPIdx,
3721 CurDAG->getTargetConstant(0, dl, MVT::i32),
3722 getAL(CurDAG, dl),
3723 CurDAG->getRegister(0, MVT::i32),
3724 CurDAG->getEntryNode()
3725 };
3726 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3727 Ops);
3728 }
3729 // Annotate the Node with memory operand information so that MachineInstr
3730 // queries work properly. This e.g. gives the register allocation the
3731 // required information for rematerialization.
3732 MachineFunction& MF = CurDAG->getMachineFunction();
3736
3737 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3738
3739 ReplaceNode(N, ResNode);
3740 return;
3741 }
3742
3743 // Other cases are autogenerated.
3744 break;
3745 }
3746 case ISD::FrameIndex: {
3747 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3748 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3749 SDValue TFI = CurDAG->getTargetFrameIndex(
3750 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3751 if (Subtarget->isThumb1Only()) {
3752 // Set the alignment of the frame object to 4, to avoid having to generate
3753 // more than one ADD
3754 MachineFrameInfo &MFI = MF->getFrameInfo();
3755 if (MFI.getObjectAlign(FI) < Align(4))
3756 MFI.setObjectAlignment(FI, Align(4));
3757 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3758 CurDAG->getTargetConstant(0, dl, MVT::i32));
3759 return;
3760 } else {
3761 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3762 ARM::t2ADDri : ARM::ADDri);
3763 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3764 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3765 CurDAG->getRegister(0, MVT::i32) };
3766 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3767 return;
3768 }
3769 }
3771 if (tryInsertVectorElt(N))
3772 return;
3773 break;
3774 }
3775 case ISD::SRL:
3776 if (tryV6T2BitfieldExtractOp(N, false))
3777 return;
3778 break;
3780 case ISD::SRA:
3781 if (tryV6T2BitfieldExtractOp(N, true))
3782 return;
3783 break;
3784 case ISD::FP_TO_UINT:
3785 case ISD::FP_TO_SINT:
3788 if (tryFP_TO_INT(N, dl))
3789 return;
3790 break;
3791 case ISD::FMUL:
3792 if (tryFMULFixed(N, dl))
3793 return;
3794 break;
3795 case ISD::MUL:
3796 if (Subtarget->isThumb1Only())
3797 break;
3798 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3799 unsigned RHSV = C->getZExtValue();
3800 if (!RHSV) break;
3801 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3802 unsigned ShImm = Log2_32(RHSV-1);
3803 if (ShImm >= 32)
3804 break;
3805 SDValue V = N->getOperand(0);
3806 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3807 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3808 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3809 if (Subtarget->isThumb()) {
3810 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3811 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3812 return;
3813 } else {
3814 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3815 Reg0 };
3816 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3817 return;
3818 }
3819 }
3820 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3821 unsigned ShImm = Log2_32(RHSV+1);
3822 if (ShImm >= 32)
3823 break;
3824 SDValue V = N->getOperand(0);
3825 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3826 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3827 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3828 if (Subtarget->isThumb()) {
3829 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3830 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3831 return;
3832 } else {
3833 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3834 Reg0 };
3835 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3836 return;
3837 }
3838 }
3839 }
3840 break;
3841 case ISD::AND: {
3842 // Check for unsigned bitfield extract
3843 if (tryV6T2BitfieldExtractOp(N, false))
3844 return;
3845
3846 // If an immediate is used in an AND node, it is possible that the immediate
3847 // can be more optimally materialized when negated. If this is the case we
3848 // can negate the immediate and use a BIC instead.
3849 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3850 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3851 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3852
3853 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3854 // immediate can be negated and fit in the immediate operand of
3855 // a t2BIC, don't do any manual transform here as this can be
3856 // handled by the generic ISel machinery.
3857 bool PreferImmediateEncoding =
3858 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3859 if (!PreferImmediateEncoding &&
3860 ConstantMaterializationCost(Imm, Subtarget) >
3861 ConstantMaterializationCost(~Imm, Subtarget)) {
3862 // The current immediate costs more to materialize than a negated
3863 // immediate, so negate the immediate and use a BIC.
3864 SDValue NewImm =
3865 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3866 // If the new constant didn't exist before, reposition it in the topological
3867 // ordering so it is just before N. Otherwise, don't touch its location.
3868 if (NewImm->getNodeId() == -1)
3869 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3870
3871 if (!Subtarget->hasThumb2()) {
3872 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3873 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3874 CurDAG->getRegister(0, MVT::i32)};
3875 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3876 return;
3877 } else {
3878 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3879 CurDAG->getRegister(0, MVT::i32),
3880 CurDAG->getRegister(0, MVT::i32)};
3881 ReplaceNode(N,
3882 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3883 return;
3884 }
3885 }
3886 }
3887
3888 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3889 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3890 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3891 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3892 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3893 EVT VT = N->getValueType(0);
3894 if (VT != MVT::i32)
3895 break;
3896 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3897 ? ARM::t2MOVTi16
3898 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3899 if (!Opc)
3900 break;
3901 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3902 N1C = dyn_cast<ConstantSDNode>(N1);
3903 if (!N1C)
3904 break;
3905 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3906 SDValue N2 = N0.getOperand(1);
3907 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3908 if (!N2C)
3909 break;
3910 unsigned N1CVal = N1C->getZExtValue();
3911 unsigned N2CVal = N2C->getZExtValue();
3912 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3913 (N1CVal & 0xffffU) == 0xffffU &&
3914 (N2CVal & 0xffffU) == 0x0U) {
3915 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3916 dl, MVT::i32);
3917 SDValue Ops[] = { N0.getOperand(0), Imm16,
3918 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3919 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3920 return;
3921 }
3922 }
3923
3924 break;
3925 }
3926 case ARMISD::UMAAL: {
3927 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3928 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3929 N->getOperand(2), N->getOperand(3),
3930 getAL(CurDAG, dl),
3931 CurDAG->getRegister(0, MVT::i32) };
3932 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3933 return;
3934 }
3935 case ARMISD::UMLAL:{
3936 if (Subtarget->isThumb()) {
3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3938 N->getOperand(3), getAL(CurDAG, dl),
3939 CurDAG->getRegister(0, MVT::i32)};
3940 ReplaceNode(
3941 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3942 return;
3943 }else{
3944 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3945 N->getOperand(3), getAL(CurDAG, dl),
3946 CurDAG->getRegister(0, MVT::i32),
3947 CurDAG->getRegister(0, MVT::i32) };
3948 ReplaceNode(N, CurDAG->getMachineNode(
3949 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3950 MVT::i32, MVT::i32, Ops));
3951 return;
3952 }
3953 }
3954 case ARMISD::SMLAL:{
3955 if (Subtarget->isThumb()) {
3956 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3957 N->getOperand(3), getAL(CurDAG, dl),
3958 CurDAG->getRegister(0, MVT::i32)};
3959 ReplaceNode(
3960 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3961 return;
3962 }else{
3963 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3964 N->getOperand(3), getAL(CurDAG, dl),
3965 CurDAG->getRegister(0, MVT::i32),
3966 CurDAG->getRegister(0, MVT::i32) };
3967 ReplaceNode(N, CurDAG->getMachineNode(
3968 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3969 MVT::i32, MVT::i32, Ops));
3970 return;
3971 }
3972 }
3973 case ARMISD::SUBE: {
3974 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3975 break;
3976 // Look for a pattern to match SMMLS
3977 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3978 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3979 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3980 !SDValue(N, 1).use_empty())
3981 break;
3982
3983 if (Subtarget->isThumb())
3984 assert(Subtarget->hasThumb2() &&
3985 "This pattern should not be generated for Thumb");
3986
3987 SDValue SmulLoHi = N->getOperand(1);
3988 SDValue Subc = N->getOperand(2);
3989 SDValue Zero = Subc.getOperand(0);
3990
3991 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3992 N->getOperand(1) != SmulLoHi.getValue(1) ||
3993 N->getOperand(2) != Subc.getValue(1))
3994 break;
3995
3996 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3997 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3998 N->getOperand(0), getAL(CurDAG, dl),
3999 CurDAG->getRegister(0, MVT::i32) };
4000 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
4001 return;
4002 }
4003 case ISD::LOAD: {
4004 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4005 return;
4006 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4007 if (tryT2IndexedLoad(N))
4008 return;
4009 } else if (Subtarget->isThumb()) {
4010 if (tryT1IndexedLoad(N))
4011 return;
4012 } else if (tryARMIndexedLoad(N))
4013 return;
4014 // Other cases are autogenerated.
4015 break;
4016 }
4017 case ISD::MLOAD:
4018 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4019 return;
4020 // Other cases are autogenerated.
4021 break;
4022 case ARMISD::WLSSETUP: {
4023 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4024 N->getOperand(0));
4025 ReplaceUses(N, New);
4026 CurDAG->RemoveDeadNode(N);
4027 return;
4028 }
4029 case ARMISD::WLS: {
4030 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4031 N->getOperand(1), N->getOperand(2),
4032 N->getOperand(0));
4033 ReplaceUses(N, New);
4034 CurDAG->RemoveDeadNode(N);
4035 return;
4036 }
4037 case ARMISD::LE: {
4038 SDValue Ops[] = { N->getOperand(1),
4039 N->getOperand(2),
4040 N->getOperand(0) };
4041 unsigned Opc = ARM::t2LoopEnd;
4042 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4043 ReplaceUses(N, New);
4044 CurDAG->RemoveDeadNode(N);
4045 return;
4046 }
4047 case ARMISD::LDRD: {
4048 if (Subtarget->isThumb2())
4049 break; // TableGen handles isel in this case.
4050 SDValue Base, RegOffset, ImmOffset;
4051 const SDValue &Chain = N->getOperand(0);
4052 const SDValue &Addr = N->getOperand(1);
4053 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4054 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4055 // The register-offset variant of LDRD mandates that the register
4056 // allocated to RegOffset is not reused in any of the remaining operands.
4057 // This restriction is currently not enforced. Therefore emitting this
4058 // variant is explicitly avoided.
4059 Base = Addr;
4060 RegOffset = CurDAG->getRegister(0, MVT::i32);
4061 }
4062 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4063 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4064 {MVT::Untyped, MVT::Other}, Ops);
4065 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4066 SDValue(New, 0));
4067 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4068 SDValue(New, 0));
4069 transferMemOperands(N, New);
4070 ReplaceUses(SDValue(N, 0), Lo);
4071 ReplaceUses(SDValue(N, 1), Hi);
4072 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4073 CurDAG->RemoveDeadNode(N);
4074 return;
4075 }
4076 case ARMISD::STRD: {
4077 if (Subtarget->isThumb2())
4078 break; // TableGen handles isel in this case.
4079 SDValue Base, RegOffset, ImmOffset;
4080 const SDValue &Chain = N->getOperand(0);
4081 const SDValue &Addr = N->getOperand(3);
4082 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4083 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4084 // The register-offset variant of STRD mandates that the register
4085 // allocated to RegOffset is not reused in any of the remaining operands.
4086 // This restriction is currently not enforced. Therefore emitting this
4087 // variant is explicitly avoided.
4088 Base = Addr;
4089 RegOffset = CurDAG->getRegister(0, MVT::i32);
4090 }
4091 SDNode *RegPair =
4092 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4093 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4094 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4095 transferMemOperands(N, New);
4096 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4097 CurDAG->RemoveDeadNode(N);
4098 return;
4099 }
4100 case ARMISD::LOOP_DEC: {
4101 SDValue Ops[] = { N->getOperand(1),
4102 N->getOperand(2),
4103 N->getOperand(0) };
4104 SDNode *Dec =
4105 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4106 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4107 ReplaceUses(N, Dec);
4108 CurDAG->RemoveDeadNode(N);
4109 return;
4110 }
4111 case ARMISD::BRCOND: {
4112 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4113 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4114 // Pattern complexity = 6 cost = 1 size = 0
4115
4116 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4117 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4118 // Pattern complexity = 6 cost = 1 size = 0
4119
4120 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4121 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4122 // Pattern complexity = 6 cost = 1 size = 0
4123
4124 unsigned Opc = Subtarget->isThumb() ?
4125 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4126 SDValue Chain = N->getOperand(0);
4127 SDValue N1 = N->getOperand(1);
4128 SDValue N2 = N->getOperand(2);
4129 SDValue N3 = N->getOperand(3);
4130 SDValue InGlue = N->getOperand(4);
4134
4135 unsigned CC = (unsigned)N2->getAsZExtVal();
4136
4137 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4138 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4139 SDValue Int = InGlue.getOperand(0);
4140 uint64_t ID = Int->getConstantOperandVal(1);
4141
4142 // Handle low-overhead loops.
4143 if (ID == Intrinsic::loop_decrement_reg) {
4144 SDValue Elements = Int.getOperand(2);
4145 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4146 dl, MVT::i32);
4147
4148 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4149 SDNode *LoopDec =
4150 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4151 CurDAG->getVTList(MVT::i32, MVT::Other),
4152 Args);
4153 ReplaceUses(Int.getNode(), LoopDec);
4154
4155 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4156 SDNode *LoopEnd =
4157 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4158
4159 ReplaceUses(N, LoopEnd);
4160 CurDAG->RemoveDeadNode(N);
4161 CurDAG->RemoveDeadNode(InGlue.getNode());
4162 CurDAG->RemoveDeadNode(Int.getNode());
4163 return;
4164 }
4165 }
4166
4167 bool SwitchEQNEToPLMI;
4168 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4169 InGlue = N->getOperand(4);
4170
4171 if (SwitchEQNEToPLMI) {
4172 switch ((ARMCC::CondCodes)CC) {
4173 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4174 case ARMCC::NE:
4176 break;
4177 case ARMCC::EQ:
4179 break;
4180 }
4181 }
4182 }
4183
4184 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4185 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4186 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4187 MVT::Glue, Ops);
4188 Chain = SDValue(ResNode, 0);
4189 if (N->getNumValues() == 2) {
4190 InGlue = SDValue(ResNode, 1);
4191 ReplaceUses(SDValue(N, 1), InGlue);
4192 }
4193 ReplaceUses(SDValue(N, 0),
4194 SDValue(Chain.getNode(), Chain.getResNo()));
4195 CurDAG->RemoveDeadNode(N);
4196 return;
4197 }
4198
4199 case ARMISD::CMPZ: {
4200 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4201 // This allows us to avoid materializing the expensive negative constant.
4202 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4203 // for its glue output.
4204 SDValue X = N->getOperand(0);
4205 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4206 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4207 int64_t Addend = -C->getSExtValue();
4208
4209 SDNode *Add = nullptr;
4210 // ADDS can be better than CMN if the immediate fits in a
4211 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4212 // Outside that range we can just use a CMN which is 32-bit but has a
4213 // 12-bit immediate range.
4214 if (Addend < 1<<8) {
4215 if (Subtarget->isThumb2()) {
4216 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4217 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4218 CurDAG->getRegister(0, MVT::i32) };
4219 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4220 } else {
4221 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4222 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4223 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4224 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4225 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4226 }
4227 }
4228 if (Add) {
4229 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4230 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4231 }
4232 }
4233 // Other cases are autogenerated.
4234 break;
4235 }
4236
4237 case ARMISD::CMOV: {
4238 SDValue InGlue = N->getOperand(4);
4239
4240 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4241 bool SwitchEQNEToPLMI;
4242 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4243
4244 if (SwitchEQNEToPLMI) {
4245 SDValue ARMcc = N->getOperand(2);
4247
4248 switch (CC) {
4249 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4250 case ARMCC::NE:
4251 CC = ARMCC::MI;
4252 break;
4253 case ARMCC::EQ:
4254 CC = ARMCC::PL;
4255 break;
4256 }
4257 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4258 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4259 N->getOperand(3), N->getOperand(4)};
4260 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4261 }
4262
4263 }
4264 // Other cases are autogenerated.
4265 break;
4266 }
4267 case ARMISD::VZIP: {
4268 EVT VT = N->getValueType(0);
4269 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273 SDValue Pred = getAL(CurDAG, dl);
4274 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4275 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4276 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4277 return;
4278 }
4279 case ARMISD::VUZP: {
4280 EVT VT = N->getValueType(0);
4281 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285 SDValue Pred = getAL(CurDAG, dl);
4286 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4287 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4288 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4289 return;
4290 }
4291 case ARMISD::VTRN: {
4292 EVT VT = N->getValueType(0);
4293 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296 SDValue Pred = getAL(CurDAG, dl);
4297 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4298 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4299 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4300 return;
4301 }
4302 case ARMISD::BUILD_VECTOR: {
4303 EVT VecVT = N->getValueType(0);
4304 EVT EltVT = VecVT.getVectorElementType();
4305 unsigned NumElts = VecVT.getVectorNumElements();
4306 if (EltVT == MVT::f64) {
4307 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4308 ReplaceNode(
4309 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310 return;
4311 }
4312 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313 if (NumElts == 2) {
4314 ReplaceNode(
4315 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4316 return;
4317 }
4318 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4319 ReplaceNode(N,
4320 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4321 N->getOperand(2), N->getOperand(3)));
4322 return;
4323 }
4324
4325 case ARMISD::VLD1DUP: {
4326 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327 ARM::VLD1DUPd32 };
4328 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329 ARM::VLD1DUPq32 };
4330 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4331 return;
4332 }
4333
4334 case ARMISD::VLD2DUP: {
4335 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336 ARM::VLD2DUPd32 };
4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4338 return;
4339 }
4340
4341 case ARMISD::VLD3DUP: {
4342 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343 ARM::VLD3DUPd16Pseudo,
4344 ARM::VLD3DUPd32Pseudo };
4345 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4346 return;
4347 }
4348
4349 case ARMISD::VLD4DUP: {
4350 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351 ARM::VLD4DUPd16Pseudo,
4352 ARM::VLD4DUPd32Pseudo };
4353 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4354 return;
4355 }
4356
4357 case ARMISD::VLD1DUP_UPD: {
4358 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359 ARM::VLD1DUPd16wb_fixed,
4360 ARM::VLD1DUPd32wb_fixed };
4361 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362 ARM::VLD1DUPq16wb_fixed,
4363 ARM::VLD1DUPq32wb_fixed };
4364 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4365 return;
4366 }
4367
4368 case ARMISD::VLD2DUP_UPD: {
4369 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370 ARM::VLD2DUPd16wb_fixed,
4371 ARM::VLD2DUPd32wb_fixed,
4372 ARM::VLD1q64wb_fixed };
4373 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374 ARM::VLD2DUPq16EvenPseudo,
4375 ARM::VLD2DUPq32EvenPseudo };
4376 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377 ARM::VLD2DUPq16OddPseudoWB_fixed,
4378 ARM::VLD2DUPq32OddPseudoWB_fixed };
4379 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4380 return;
4381 }
4382
4383 case ARMISD::VLD3DUP_UPD: {
4384 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385 ARM::VLD3DUPd16Pseudo_UPD,
4386 ARM::VLD3DUPd32Pseudo_UPD,
4387 ARM::VLD1d64TPseudoWB_fixed };
4388 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389 ARM::VLD3DUPq16EvenPseudo,
4390 ARM::VLD3DUPq32EvenPseudo };
4391 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392 ARM::VLD3DUPq16OddPseudo_UPD,
4393 ARM::VLD3DUPq32OddPseudo_UPD };
4394 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4395 return;
4396 }
4397
4398 case ARMISD::VLD4DUP_UPD: {
4399 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400 ARM::VLD4DUPd16Pseudo_UPD,
4401 ARM::VLD4DUPd32Pseudo_UPD,
4402 ARM::VLD1d64QPseudoWB_fixed };
4403 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404 ARM::VLD4DUPq16EvenPseudo,
4405 ARM::VLD4DUPq32EvenPseudo };
4406 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407 ARM::VLD4DUPq16OddPseudo_UPD,
4408 ARM::VLD4DUPq32OddPseudo_UPD };
4409 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4410 return;
4411 }
4412
4413 case ARMISD::VLD1_UPD: {
4414 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415 ARM::VLD1d16wb_fixed,
4416 ARM::VLD1d32wb_fixed,
4417 ARM::VLD1d64wb_fixed };
4418 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419 ARM::VLD1q16wb_fixed,
4420 ARM::VLD1q32wb_fixed,
4421 ARM::VLD1q64wb_fixed };
4422 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4423 return;
4424 }
4425
4426 case ARMISD::VLD2_UPD: {
4427 if (Subtarget->hasNEON()) {
4428 static const uint16_t DOpcodes[] = {
4429 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430 ARM::VLD1q64wb_fixed};
4431 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432 ARM::VLD2q16PseudoWB_fixed,
4433 ARM::VLD2q32PseudoWB_fixed};
4434 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4435 } else {
4436 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437 ARM::MVE_VLD21_8_wb};
4438 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439 ARM::MVE_VLD21_16_wb};
4440 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441 ARM::MVE_VLD21_32_wb};
4442 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443 SelectMVE_VLD(N, 2, Opcodes, true);
4444 }
4445 return;
4446 }
4447
4448 case ARMISD::VLD3_UPD: {
4449 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450 ARM::VLD3d16Pseudo_UPD,
4451 ARM::VLD3d32Pseudo_UPD,
4452 ARM::VLD1d64TPseudoWB_fixed};
4453 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454 ARM::VLD3q16Pseudo_UPD,
4455 ARM::VLD3q32Pseudo_UPD };
4456 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457 ARM::VLD3q16oddPseudo_UPD,
4458 ARM::VLD3q32oddPseudo_UPD };
4459 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4460 return;
4461 }
4462
4463 case ARMISD::VLD4_UPD: {
4464 if (Subtarget->hasNEON()) {
4465 static const uint16_t DOpcodes[] = {
4466 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467 ARM::VLD1d64QPseudoWB_fixed};
4468 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469 ARM::VLD4q16Pseudo_UPD,
4470 ARM::VLD4q32Pseudo_UPD};
4471 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472 ARM::VLD4q16oddPseudo_UPD,
4473 ARM::VLD4q32oddPseudo_UPD};
4474 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4475 } else {
4476 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477 ARM::MVE_VLD42_8,
4478 ARM::MVE_VLD43_8_wb};
4479 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480 ARM::MVE_VLD42_16,
4481 ARM::MVE_VLD43_16_wb};
4482 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483 ARM::MVE_VLD42_32,
4484 ARM::MVE_VLD43_32_wb};
4485 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486 SelectMVE_VLD(N, 4, Opcodes, true);
4487 }
4488 return;
4489 }
4490
4491 case ARMISD::VLD1x2_UPD: {
4492 if (Subtarget->hasNEON()) {
4493 static const uint16_t DOpcodes[] = {
4494 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495 ARM::VLD1q64wb_fixed};
4496 static const uint16_t QOpcodes[] = {
4497 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4500 return;
4501 }
4502 break;
4503 }
4504
4505 case ARMISD::VLD1x3_UPD: {
4506 if (Subtarget->hasNEON()) {
4507 static const uint16_t DOpcodes[] = {
4508 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510 static const uint16_t QOpcodes0[] = {
4511 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513 static const uint16_t QOpcodes1[] = {
4514 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4517 return;
4518 }
4519 break;
4520 }
4521
4522 case ARMISD::VLD1x4_UPD: {
4523 if (Subtarget->hasNEON()) {
4524 static const uint16_t DOpcodes[] = {
4525 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527 static const uint16_t QOpcodes0[] = {
4528 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530 static const uint16_t QOpcodes1[] = {
4531 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4534 return;
4535 }
4536 break;
4537 }
4538
4539 case ARMISD::VLD2LN_UPD: {
4540 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541 ARM::VLD2LNd16Pseudo_UPD,
4542 ARM::VLD2LNd32Pseudo_UPD };
4543 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544 ARM::VLD2LNq32Pseudo_UPD };
4545 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4546 return;
4547 }
4548
4549 case ARMISD::VLD3LN_UPD: {
4550 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551 ARM::VLD3LNd16Pseudo_UPD,
4552 ARM::VLD3LNd32Pseudo_UPD };
4553 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554 ARM::VLD3LNq32Pseudo_UPD };
4555 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4556 return;
4557 }
4558
4559 case ARMISD::VLD4LN_UPD: {
4560 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561 ARM::VLD4LNd16Pseudo_UPD,
4562 ARM::VLD4LNd32Pseudo_UPD };
4563 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564 ARM::VLD4LNq32Pseudo_UPD };
4565 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4566 return;
4567 }
4568
4569 case ARMISD::VST1_UPD: {
4570 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571 ARM::VST1d16wb_fixed,
4572 ARM::VST1d32wb_fixed,
4573 ARM::VST1d64wb_fixed };
4574 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575 ARM::VST1q16wb_fixed,
4576 ARM::VST1q32wb_fixed,
4577 ARM::VST1q64wb_fixed };
4578 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4579 return;
4580 }
4581
4582 case ARMISD::VST2_UPD: {
4583 if (Subtarget->hasNEON()) {
4584 static const uint16_t DOpcodes[] = {
4585 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586 ARM::VST1q64wb_fixed};
4587 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588 ARM::VST2q16PseudoWB_fixed,
4589 ARM::VST2q32PseudoWB_fixed};
4590 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4591 return;
4592 }
4593 break;
4594 }
4595
4596 case ARMISD::VST3_UPD: {
4597 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598 ARM::VST3d16Pseudo_UPD,
4599 ARM::VST3d32Pseudo_UPD,
4600 ARM::VST1d64TPseudoWB_fixed};
4601 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602 ARM::VST3q16Pseudo_UPD,
4603 ARM::VST3q32Pseudo_UPD };
4604 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605 ARM::VST3q16oddPseudo_UPD,
4606 ARM::VST3q32oddPseudo_UPD };
4607 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4608 return;
4609 }
4610
4611 case ARMISD::VST4_UPD: {
4612 if (Subtarget->hasNEON()) {
4613 static const uint16_t DOpcodes[] = {
4614 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615 ARM::VST1d64QPseudoWB_fixed};
4616 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617 ARM::VST4q16Pseudo_UPD,
4618 ARM::VST4q32Pseudo_UPD};
4619 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620 ARM::VST4q16oddPseudo_UPD,
4621 ARM::VST4q32oddPseudo_UPD};
4622 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4623 return;
4624 }
4625 break;
4626 }
4627
4628 case ARMISD::VST1x2_UPD: {
4629 if (Subtarget->hasNEON()) {
4630 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631 ARM::VST1q16wb_fixed,
4632 ARM::VST1q32wb_fixed,
4633 ARM::VST1q64wb_fixed};
4634 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635 ARM::VST1d16QPseudoWB_fixed,
4636 ARM::VST1d32QPseudoWB_fixed,
4637 ARM::VST1d64QPseudoWB_fixed };
4638 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4639 return;
4640 }
4641 break;
4642 }
4643
4644 case ARMISD::VST1x3_UPD: {
4645 if (Subtarget->hasNEON()) {
4646 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647 ARM::VST1d16TPseudoWB_fixed,
4648 ARM::VST1d32TPseudoWB_fixed,
4649 ARM::VST1d64TPseudoWB_fixed };
4650 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651 ARM::VST1q16LowTPseudo_UPD,
4652 ARM::VST1q32LowTPseudo_UPD,
4653 ARM::VST1q64LowTPseudo_UPD };
4654 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655 ARM::VST1q16HighTPseudo_UPD,
4656 ARM::VST1q32HighTPseudo_UPD,
4657 ARM::VST1q64HighTPseudo_UPD };
4658 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4659 return;
4660 }
4661 break;
4662 }
4663
4664 case ARMISD::VST1x4_UPD: {
4665 if (Subtarget->hasNEON()) {
4666 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667 ARM::VST1d16QPseudoWB_fixed,
4668 ARM::VST1d32QPseudoWB_fixed,
4669 ARM::VST1d64QPseudoWB_fixed };
4670 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671 ARM::VST1q16LowQPseudo_UPD,
4672 ARM::VST1q32LowQPseudo_UPD,
4673 ARM::VST1q64LowQPseudo_UPD };
4674 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675 ARM::VST1q16HighQPseudo_UPD,
4676 ARM::VST1q32HighQPseudo_UPD,
4677 ARM::VST1q64HighQPseudo_UPD };
4678 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4679 return;
4680 }
4681 break;
4682 }
4683 case ARMISD::VST2LN_UPD: {
4684 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685 ARM::VST2LNd16Pseudo_UPD,
4686 ARM::VST2LNd32Pseudo_UPD };
4687 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688 ARM::VST2LNq32Pseudo_UPD };
4689 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4690 return;
4691 }
4692
4693 case ARMISD::VST3LN_UPD: {
4694 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695 ARM::VST3LNd16Pseudo_UPD,
4696 ARM::VST3LNd32Pseudo_UPD };
4697 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698 ARM::VST3LNq32Pseudo_UPD };
4699 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4700 return;
4701 }
4702
4703 case ARMISD::VST4LN_UPD: {
4704 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705 ARM::VST4LNd16Pseudo_UPD,
4706 ARM::VST4LNd32Pseudo_UPD };
4707 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708 ARM::VST4LNq32Pseudo_UPD };
4709 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4710 return;
4711 }
4712
4715 unsigned IntNo = N->getConstantOperandVal(1);
4716 switch (IntNo) {
4717 default:
4718 break;
4719
4720 case Intrinsic::arm_mrrc:
4721 case Intrinsic::arm_mrrc2: {
4722 SDLoc dl(N);
4723 SDValue Chain = N->getOperand(0);
4724 unsigned Opc;
4725
4726 if (Subtarget->isThumb())
4727 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728 else
4729 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730
4732 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4733 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4734 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4735
4736 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737 // instruction will always be '1111' but it is possible in assembly language to specify
4738 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739 if (Opc != ARM::MRRC2) {
4740 Ops.push_back(getAL(CurDAG, dl));
4741 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4742 }
4743
4744 Ops.push_back(Chain);
4745
4746 // Writes to two registers.
4747 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748
4749 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4750 return;
4751 }
4752 case Intrinsic::arm_ldaexd:
4753 case Intrinsic::arm_ldrexd: {
4754 SDLoc dl(N);
4755 SDValue Chain = N->getOperand(0);
4756 SDValue MemAddr = N->getOperand(2);
4757 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758
4759 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762
4763 // arm_ldrexd returns a i64 value in {i32, i32}
4764 std::vector<EVT> ResTys;
4765 if (isThumb) {
4766 ResTys.push_back(MVT::i32);
4767 ResTys.push_back(MVT::i32);
4768 } else
4769 ResTys.push_back(MVT::Untyped);
4770 ResTys.push_back(MVT::Other);
4771
4772 // Place arguments in the right order.
4773 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774 CurDAG->getRegister(0, MVT::i32), Chain};
4775 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4776 // Transfer memoperands.
4777 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4778 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4779
4780 // Remap uses.
4781 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4782 if (!SDValue(N, 0).use_empty()) {
4784 if (isThumb)
4785 Result = SDValue(Ld, 0);
4786 else {
4787 SDValue SubRegIdx =
4788 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4789 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4790 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4791 Result = SDValue(ResNode,0);
4792 }
4793 ReplaceUses(SDValue(N, 0), Result);
4794 }
4795 if (!SDValue(N, 1).use_empty()) {
4797 if (isThumb)
4798 Result = SDValue(Ld, 1);
4799 else {
4800 SDValue SubRegIdx =
4801 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4802 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4803 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4804 Result = SDValue(ResNode,0);
4805 }
4806 ReplaceUses(SDValue(N, 1), Result);
4807 }
4808 ReplaceUses(SDValue(N, 2), OutChain);
4809 CurDAG->RemoveDeadNode(N);
4810 return;
4811 }
4812 case Intrinsic::arm_stlexd:
4813 case Intrinsic::arm_strexd: {
4814 SDLoc dl(N);
4815 SDValue Chain = N->getOperand(0);
4816 SDValue Val0 = N->getOperand(2);
4817 SDValue Val1 = N->getOperand(3);
4818 SDValue MemAddr = N->getOperand(4);
4819
4820 // Store exclusive double return a i32 value which is the return status
4821 // of the issued store.
4822 const EVT ResTys[] = {MVT::i32, MVT::Other};
4823
4824 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825 // Place arguments in the right order.
4827 if (isThumb) {
4828 Ops.push_back(Val0);
4829 Ops.push_back(Val1);
4830 } else
4831 // arm_strexd uses GPRPair.
4832 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4833 Ops.push_back(MemAddr);
4834 Ops.push_back(getAL(CurDAG, dl));
4835 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4836 Ops.push_back(Chain);
4837
4838 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841
4842 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4843 // Transfer memoperands.
4844 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4845 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4846
4847 ReplaceNode(N, St);
4848 return;
4849 }
4850
4851 case Intrinsic::arm_neon_vld1: {
4852 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853 ARM::VLD1d32, ARM::VLD1d64 };
4854 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855 ARM::VLD1q32, ARM::VLD1q64};
4856 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4857 return;
4858 }
4859
4860 case Intrinsic::arm_neon_vld1x2: {
4861 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862 ARM::VLD1q32, ARM::VLD1q64 };
4863 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864 ARM::VLD1d16QPseudo,
4865 ARM::VLD1d32QPseudo,
4866 ARM::VLD1d64QPseudo };
4867 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4868 return;
4869 }
4870
4871 case Intrinsic::arm_neon_vld1x3: {
4872 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873 ARM::VLD1d16TPseudo,
4874 ARM::VLD1d32TPseudo,
4875 ARM::VLD1d64TPseudo };
4876 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877 ARM::VLD1q16LowTPseudo_UPD,
4878 ARM::VLD1q32LowTPseudo_UPD,
4879 ARM::VLD1q64LowTPseudo_UPD };
4880 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881 ARM::VLD1q16HighTPseudo,
4882 ARM::VLD1q32HighTPseudo,
4883 ARM::VLD1q64HighTPseudo };
4884 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4885 return;
4886 }
4887
4888 case Intrinsic::arm_neon_vld1x4: {
4889 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890 ARM::VLD1d16QPseudo,
4891 ARM::VLD1d32QPseudo,
4892 ARM::VLD1d64QPseudo };
4893 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894 ARM::VLD1q16LowQPseudo_UPD,
4895 ARM::VLD1q32LowQPseudo_UPD,
4896 ARM::VLD1q64LowQPseudo_UPD };
4897 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898 ARM::VLD1q16HighQPseudo,
4899 ARM::VLD1q32HighQPseudo,
4900 ARM::VLD1q64HighQPseudo };
4901 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4902 return;
4903 }
4904
4905 case Intrinsic::arm_neon_vld2: {
4906 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907 ARM::VLD2d32, ARM::VLD1q64 };
4908 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909 ARM::VLD2q32Pseudo };
4910 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4911 return;
4912 }
4913
4914 case Intrinsic::arm_neon_vld3: {
4915 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916 ARM::VLD3d16Pseudo,
4917 ARM::VLD3d32Pseudo,
4918 ARM::VLD1d64TPseudo };
4919 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920 ARM::VLD3q16Pseudo_UPD,
4921 ARM::VLD3q32Pseudo_UPD };
4922 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923 ARM::VLD3q16oddPseudo,
4924 ARM::VLD3q32oddPseudo };
4925 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4926 return;
4927 }
4928
4929 case Intrinsic::arm_neon_vld4: {
4930 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931 ARM::VLD4d16Pseudo,
4932 ARM::VLD4d32Pseudo,
4933 ARM::VLD1d64QPseudo };
4934 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935 ARM::VLD4q16Pseudo_UPD,
4936 ARM::VLD4q32Pseudo_UPD };
4937 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938 ARM::VLD4q16oddPseudo,
4939 ARM::VLD4q32oddPseudo };
4940 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4941 return;
4942 }
4943
4944 case Intrinsic::arm_neon_vld2dup: {
4945 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946 ARM::VLD2DUPd32, ARM::VLD1q64 };
4947 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948 ARM::VLD2DUPq16EvenPseudo,
4949 ARM::VLD2DUPq32EvenPseudo };
4950 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951 ARM::VLD2DUPq16OddPseudo,
4952 ARM::VLD2DUPq32OddPseudo };
4953 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4954 DOpcodes, QOpcodes0, QOpcodes1);
4955 return;
4956 }
4957
4958 case Intrinsic::arm_neon_vld3dup: {
4959 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960 ARM::VLD3DUPd16Pseudo,
4961 ARM::VLD3DUPd32Pseudo,
4962 ARM::VLD1d64TPseudo };
4963 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964 ARM::VLD3DUPq16EvenPseudo,
4965 ARM::VLD3DUPq32EvenPseudo };
4966 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967 ARM::VLD3DUPq16OddPseudo,
4968 ARM::VLD3DUPq32OddPseudo };
4969 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4970 DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vld4dup: {
4975 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976 ARM::VLD4DUPd16Pseudo,
4977 ARM::VLD4DUPd32Pseudo,
4978 ARM::VLD1d64QPseudo };
4979 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980 ARM::VLD4DUPq16EvenPseudo,
4981 ARM::VLD4DUPq32EvenPseudo };
4982 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983 ARM::VLD4DUPq16OddPseudo,
4984 ARM::VLD4DUPq32OddPseudo };
4985 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4986 DOpcodes, QOpcodes0, QOpcodes1);
4987 return;
4988 }
4989
4990 case Intrinsic::arm_neon_vld2lane: {
4991 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4992 ARM::VLD2LNd16Pseudo,
4993 ARM::VLD2LNd32Pseudo };
4994 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4995 ARM::VLD2LNq32Pseudo };
4996 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4997 return;
4998 }
4999
5000 case Intrinsic::arm_neon_vld3lane: {
5001 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5002 ARM::VLD3LNd16Pseudo,
5003 ARM::VLD3LNd32Pseudo };
5004 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5005 ARM::VLD3LNq32Pseudo };
5006 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5007 return;
5008 }
5009
5010 case Intrinsic::arm_neon_vld4lane: {
5011 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5012 ARM::VLD4LNd16Pseudo,
5013 ARM::VLD4LNd32Pseudo };
5014 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5015 ARM::VLD4LNq32Pseudo };
5016 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5017 return;
5018 }
5019
5020 case Intrinsic::arm_neon_vst1: {
5021 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5022 ARM::VST1d32, ARM::VST1d64 };
5023 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5024 ARM::VST1q32, ARM::VST1q64 };
5025 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5026 return;
5027 }
5028
5029 case Intrinsic::arm_neon_vst1x2: {
5030 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5031 ARM::VST1q32, ARM::VST1q64 };
5032 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5033 ARM::VST1d16QPseudo,
5034 ARM::VST1d32QPseudo,
5035 ARM::VST1d64QPseudo };
5036 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5037 return;
5038 }
5039
5040 case Intrinsic::arm_neon_vst1x3: {
5041 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5042 ARM::VST1d16TPseudo,
5043 ARM::VST1d32TPseudo,
5044 ARM::VST1d64TPseudo };
5045 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5046 ARM::VST1q16LowTPseudo_UPD,
5047 ARM::VST1q32LowTPseudo_UPD,
5048 ARM::VST1q64LowTPseudo_UPD };
5049 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5050 ARM::VST1q16HighTPseudo,
5051 ARM::VST1q32HighTPseudo,
5052 ARM::VST1q64HighTPseudo };
5053 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5054 return;
5055 }
5056
5057 case Intrinsic::arm_neon_vst1x4: {
5058 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5059 ARM::VST1d16QPseudo,
5060 ARM::VST1d32QPseudo,
5061 ARM::VST1d64QPseudo };
5062 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5063 ARM::VST1q16LowQPseudo_UPD,
5064 ARM::VST1q32LowQPseudo_UPD,
5065 ARM::VST1q64LowQPseudo_UPD };
5066 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5067 ARM::VST1q16HighQPseudo,
5068 ARM::VST1q32HighQPseudo,
5069 ARM::VST1q64HighQPseudo };
5070 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5071 return;
5072 }
5073
5074 case Intrinsic::arm_neon_vst2: {
5075 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5076 ARM::VST2d32, ARM::VST1q64 };
5077 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5078 ARM::VST2q32Pseudo };
5079 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5080 return;
5081 }
5082
5083 case Intrinsic::arm_neon_vst3: {
5084 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5085 ARM::VST3d16Pseudo,
5086 ARM::VST3d32Pseudo,
5087 ARM::VST1d64TPseudo };
5088 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5089 ARM::VST3q16Pseudo_UPD,
5090 ARM::VST3q32Pseudo_UPD };
5091 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5092 ARM::VST3q16oddPseudo,
5093 ARM::VST3q32oddPseudo };
5094 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5095 return;
5096 }
5097
5098 case Intrinsic::arm_neon_vst4: {
5099 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5100 ARM::VST4d16Pseudo,
5101 ARM::VST4d32Pseudo,
5102 ARM::VST1d64QPseudo };
5103 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5104 ARM::VST4q16Pseudo_UPD,
5105 ARM::VST4q32Pseudo_UPD };
5106 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5107 ARM::VST4q16oddPseudo,
5108 ARM::VST4q32oddPseudo };
5109 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5110 return;
5111 }
5112
5113 case Intrinsic::arm_neon_vst2lane: {
5114 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5115 ARM::VST2LNd16Pseudo,
5116 ARM::VST2LNd32Pseudo };
5117 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5118 ARM::VST2LNq32Pseudo };
5119 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5120 return;
5121 }
5122
5123 case Intrinsic::arm_neon_vst3lane: {
5124 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5125 ARM::VST3LNd16Pseudo,
5126 ARM::VST3LNd32Pseudo };
5127 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5128 ARM::VST3LNq32Pseudo };
5129 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5130 return;
5131 }
5132
5133 case Intrinsic::arm_neon_vst4lane: {
5134 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5135 ARM::VST4LNd16Pseudo,
5136 ARM::VST4LNd32Pseudo };
5137 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5138 ARM::VST4LNq32Pseudo };
5139 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5140 return;
5141 }
5142
5143 case Intrinsic::arm_mve_vldr_gather_base_wb:
5144 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5145 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5146 ARM::MVE_VLDRDU64_qi_pre};
5147 SelectMVE_WB(N, Opcodes,
5148 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5149 return;
5150 }
5151
5152 case Intrinsic::arm_mve_vld2q: {
5153 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5154 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5155 ARM::MVE_VLD21_16};
5156 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5157 ARM::MVE_VLD21_32};
5158 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5159 SelectMVE_VLD(N, 2, Opcodes, false);
5160 return;
5161 }
5162
5163 case Intrinsic::arm_mve_vld4q: {
5164 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5165 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5166 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5167 ARM::MVE_VLD42_16,
5168 ARM::MVE_VLD43_16};
5169 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5170 ARM::MVE_VLD42_32,
5171 ARM::MVE_VLD43_32};
5172 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5173 SelectMVE_VLD(N, 4, Opcodes, false);
5174 return;
5175 }
5176 }
5177 break;
5178 }
5179
5181 unsigned IntNo = N->getConstantOperandVal(0);
5182 switch (IntNo) {
5183 default:
5184 break;
5185
5186 // Scalar f32 -> bf16
5187 case Intrinsic::arm_neon_vcvtbfp2bf: {
5188 SDLoc dl(N);
5189 const SDValue &Src = N->getOperand(1);
5190 llvm::EVT DestTy = N->getValueType(0);
5191 SDValue Pred = getAL(CurDAG, dl);
5192 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5193 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5194 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5195 return;
5196 }
5197
5198 // Vector v4f32 -> v4bf16
5199 case Intrinsic::arm_neon_vcvtfp2bf: {
5200 SDLoc dl(N);
5201 const SDValue &Src = N->getOperand(1);
5202 SDValue Pred = getAL(CurDAG, dl);
5203 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5204 SDValue Ops[] = { Src, Pred, Reg0 };
5205 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5206 return;
5207 }
5208
5209 case Intrinsic::arm_mve_urshrl:
5210 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5211 return;
5212 case Intrinsic::arm_mve_uqshll:
5213 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5214 return;
5215 case Intrinsic::arm_mve_srshrl:
5216 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5217 return;
5218 case Intrinsic::arm_mve_sqshll:
5219 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5220 return;
5221 case Intrinsic::arm_mve_uqrshll:
5222 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5223 return;
5224 case Intrinsic::arm_mve_sqrshrl:
5225 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5226 return;
5227
5228 case Intrinsic::arm_mve_vadc:
5229 case Intrinsic::arm_mve_vadc_predicated:
5230 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5231 IntNo == Intrinsic::arm_mve_vadc_predicated);
5232 return;
5233 case Intrinsic::arm_mve_vsbc:
5234 case Intrinsic::arm_mve_vsbc_predicated:
5235 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5236 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5237 return;
5238 case Intrinsic::arm_mve_vshlc:
5239 case Intrinsic::arm_mve_vshlc_predicated:
5240 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5241 return;
5242
5243 case Intrinsic::arm_mve_vmlldava:
5244 case Intrinsic::arm_mve_vmlldava_predicated: {
5245 static const uint16_t OpcodesU[] = {
5246 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5247 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5248 };
5249 static const uint16_t OpcodesS[] = {
5250 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5251 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5252 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5253 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5254 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5255 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5256 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5257 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5258 };
5259 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5260 OpcodesS, OpcodesU);
5261 return;
5262 }
5263
5264 case Intrinsic::arm_mve_vrmlldavha:
5265 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5266 static const uint16_t OpcodesU[] = {
5267 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5268 };
5269 static const uint16_t OpcodesS[] = {
5270 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5271 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5272 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5273 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5274 };
5275 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5276 OpcodesS, OpcodesU);
5277 return;
5278 }
5279
5280 case Intrinsic::arm_mve_vidup:
5281 case Intrinsic::arm_mve_vidup_predicated: {
5282 static const uint16_t Opcodes[] = {
5283 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5284 };
5285 SelectMVE_VxDUP(N, Opcodes, false,
5286 IntNo == Intrinsic::arm_mve_vidup_predicated);
5287 return;
5288 }
5289
5290 case Intrinsic::arm_mve_vddup:
5291 case Intrinsic::arm_mve_vddup_predicated: {
5292 static const uint16_t Opcodes[] = {
5293 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5294 };
5295 SelectMVE_VxDUP(N, Opcodes, false,
5296 IntNo == Intrinsic::arm_mve_vddup_predicated);
5297 return;
5298 }
5299
5300 case Intrinsic::arm_mve_viwdup:
5301 case Intrinsic::arm_mve_viwdup_predicated: {
5302 static const uint16_t Opcodes[] = {
5303 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5304 };
5305 SelectMVE_VxDUP(N, Opcodes, true,
5306 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5307 return;
5308 }
5309
5310 case Intrinsic::arm_mve_vdwdup:
5311 case Intrinsic::arm_mve_vdwdup_predicated: {
5312 static const uint16_t Opcodes[] = {
5313 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5314 };
5315 SelectMVE_VxDUP(N, Opcodes, true,
5316 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5317 return;
5318 }
5319
5320 case Intrinsic::arm_cde_cx1d:
5321 case Intrinsic::arm_cde_cx1da:
5322 case Intrinsic::arm_cde_cx2d:
5323 case Intrinsic::arm_cde_cx2da:
5324 case Intrinsic::arm_cde_cx3d:
5325 case Intrinsic::arm_cde_cx3da: {
5326 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5327 IntNo == Intrinsic::arm_cde_cx2da ||
5328 IntNo == Intrinsic::arm_cde_cx3da;
5329 size_t NumExtraOps;
5330 uint16_t Opcode;
5331 switch (IntNo) {
5332 case Intrinsic::arm_cde_cx1d:
5333 case Intrinsic::arm_cde_cx1da:
5334 NumExtraOps = 0;
5335 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5336 break;
5337 case Intrinsic::arm_cde_cx2d:
5338 case Intrinsic::arm_cde_cx2da:
5339 NumExtraOps = 1;
5340 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5341 break;
5342 case Intrinsic::arm_cde_cx3d:
5343 case Intrinsic::arm_cde_cx3da:
5344 NumExtraOps = 2;
5345 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5346 break;
5347 default:
5348 llvm_unreachable("Unexpected opcode");
5349 }
5350 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5351 return;
5352 }
5353 }
5354 break;
5355 }
5356
5358 SelectCMP_SWAP(N);
5359 return;
5360 }
5361
5362 SelectCode(N);
5363}
5364
5365// Inspect a register string of the form
5366// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5367// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5368// and obtain the integer operands from them, adding these operands to the
5369// provided vector.
5371 SelectionDAG *CurDAG,
5372 const SDLoc &DL,
5373 std::vector<SDValue> &Ops) {
5375 RegString.split(Fields, ':');
5376
5377 if (Fields.size() > 1) {
5378 bool AllIntFields = true;
5379
5380 for (StringRef Field : Fields) {
5381 // Need to trim out leading 'cp' characters and get the integer field.
5382 unsigned IntField;
5383 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5384 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5385 }
5386
5387 assert(AllIntFields &&
5388 "Unexpected non-integer value in special register string.");
5389 (void)AllIntFields;
5390 }
5391}
5392
5393// Maps a Banked Register string to its mask value. The mask value returned is
5394// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5395// mask operand, which expresses which register is to be used, e.g. r8, and in
5396// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5397// was invalid.
5398static inline int getBankedRegisterMask(StringRef RegString) {
5399 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5400 if (!TheReg)
5401 return -1;
5402 return TheReg->Encoding;
5403}
5404
5405// The flags here are common to those allowed for apsr in the A class cores and
5406// those allowed for the special registers in the M class cores. Returns a
5407// value representing which flags were present, -1 if invalid.
5408static inline int getMClassFlagsMask(StringRef Flags) {
5409 return StringSwitch<int>(Flags)
5410 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5411 // correct when flags are not permitted
5412 .Case("g", 0x1)
5413 .Case("nzcvq", 0x2)
5414 .Case("nzcvqg", 0x3)
5415 .Default(-1);
5416}
5417
5418// Maps MClass special registers string to its value for use in the
5419// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5420// Returns -1 to signify that the string was invalid.
5421static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5422 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5423 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5424 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5425 return -1;
5426 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5427}
5428
5430 // The mask operand contains the special register (R Bit) in bit 4, whether
5431 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5432 // bits 3-0 contains the fields to be accessed in the special register, set by
5433 // the flags provided with the register.
5434 int Mask = 0;
5435 if (Reg == "apsr") {
5436 // The flags permitted for apsr are the same flags that are allowed in
5437 // M class registers. We get the flag value and then shift the flags into
5438 // the correct place to combine with the mask.
5439 Mask = getMClassFlagsMask(Flags);
5440 if (Mask == -1)
5441 return -1;
5442 return Mask << 2;
5443 }
5444
5445 if (Reg != "cpsr" && Reg != "spsr") {
5446 return -1;
5447 }
5448
5449 // This is the same as if the flags were "fc"
5450 if (Flags.empty() || Flags == "all")
5451 return Mask | 0x9;
5452
5453 // Inspect the supplied flags string and set the bits in the mask for
5454 // the relevant and valid flags allowed for cpsr and spsr.
5455 for (char Flag : Flags) {
5456 int FlagVal;
5457 switch (Flag) {
5458 case 'c':
5459 FlagVal = 0x1;
5460 break;
5461 case 'x':
5462 FlagVal = 0x2;
5463 break;
5464 case 's':
5465 FlagVal = 0x4;
5466 break;
5467 case 'f':
5468 FlagVal = 0x8;
5469 break;
5470 default:
5471 FlagVal = 0;
5472 }
5473
5474 // This avoids allowing strings where the same flag bit appears twice.
5475 if (!FlagVal || (Mask & FlagVal))
5476 return -1;
5477 Mask |= FlagVal;
5478 }
5479
5480 // If the register is spsr then we need to set the R bit.
5481 if (Reg == "spsr")
5482 Mask |= 0x10;
5483
5484 return Mask;
5485}
5486
5487// Lower the read_register intrinsic to ARM specific DAG nodes
5488// using the supplied metadata string to select the instruction node to use
5489// and the registers/masks to construct as operands for the node.
5490bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5491 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5492 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5493 bool IsThumb2 = Subtarget->isThumb2();
5494 SDLoc DL(N);
5495
5496 std::vector<SDValue> Ops;
5497 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5498
5499 if (!Ops.empty()) {
5500 // If the special register string was constructed of fields (as defined
5501 // in the ACLE) then need to lower to MRC node (32 bit) or
5502 // MRRC node(64 bit), we can make the distinction based on the number of
5503 // operands we have.
5504 unsigned Opcode;
5505 SmallVector<EVT, 3> ResTypes;
5506 if (Ops.size() == 5){
5507 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5508 ResTypes.append({ MVT::i32, MVT::Other });
5509 } else {
5510 assert(Ops.size() == 3 &&
5511 "Invalid number of fields in special register string.");
5512 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5513 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5514 }
5515
5516 Ops.push_back(getAL(CurDAG, DL));
5517 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5518 Ops.push_back(N->getOperand(0));
5519 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5520 return true;
5521 }
5522
5523 std::string SpecialReg = RegString->getString().lower();
5524
5525 int BankedReg = getBankedRegisterMask(SpecialReg);
5526 if (BankedReg != -1) {
5527 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5528 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5529 N->getOperand(0) };
5530 ReplaceNode(
5531 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5532 DL, MVT::i32, MVT::Other, Ops));
5533 return true;
5534 }
5535
5536 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5537 // corresponding to the register that is being read from. So we switch on the
5538 // string to find which opcode we need to use.
5539 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5540 .Case("fpscr", ARM::VMRS)
5541 .Case("fpexc", ARM::VMRS_FPEXC)
5542 .Case("fpsid", ARM::VMRS_FPSID)
5543 .Case("mvfr0", ARM::VMRS_MVFR0)
5544 .Case("mvfr1", ARM::VMRS_MVFR1)
5545 .Case("mvfr2", ARM::VMRS_MVFR2)
5546 .Case("fpinst", ARM::VMRS_FPINST)
5547 .Case("fpinst2", ARM::VMRS_FPINST2)
5548 .Default(0);
5549
5550 // If an opcode was found then we can lower the read to a VFP instruction.
5551 if (Opcode) {
5552 if (!Subtarget->hasVFP2Base())
5553 return false;
5554 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5555 return false;
5556
5557 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5558 N->getOperand(0) };
5559 ReplaceNode(N,
5560 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5561 return true;
5562 }
5563
5564 // If the target is M Class then need to validate that the register string
5565 // is an acceptable value, so check that a mask can be constructed from the
5566 // string.
5567 if (Subtarget->isMClass()) {
5568 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5569 if (SYSmValue == -1)
5570 return false;
5571
5572 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5573 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5574 N->getOperand(0) };
5575 ReplaceNode(
5576 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5577 return true;
5578 }
5579
5580 // Here we know the target is not M Class so we need to check if it is one
5581 // of the remaining possible values which are apsr, cpsr or spsr.
5582 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5583 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5584 N->getOperand(0) };
5585 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5586 DL, MVT::i32, MVT::Other, Ops));
5587 return true;
5588 }
5589
5590 if (SpecialReg == "spsr") {
5591 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5592 N->getOperand(0) };
5593 ReplaceNode(
5594 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5595 MVT::i32, MVT::Other, Ops));
5596 return true;
5597 }
5598
5599 return false;
5600}
5601
5602// Lower the write_register intrinsic to ARM specific DAG nodes
5603// using the supplied metadata string to select the instruction node to use
5604// and the registers/masks to use in the nodes
5605bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5606 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5607 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5608 bool IsThumb2 = Subtarget->isThumb2();
5609 SDLoc DL(N);
5610
5611 std::vector<SDValue> Ops;
5612 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5613
5614 if (!Ops.empty()) {
5615 // If the special register string was constructed of fields (as defined
5616 // in the ACLE) then need to lower to MCR node (32 bit) or
5617 // MCRR node(64 bit), we can make the distinction based on the number of
5618 // operands we have.
5619 unsigned Opcode;
5620 if (Ops.size() == 5) {
5621 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5622 Ops.insert(Ops.begin()+2, N->getOperand(2));
5623 } else {
5624 assert(Ops.size() == 3 &&
5625 "Invalid number of fields in special register string.");
5626 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5627 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5628 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5629 }
5630
5631 Ops.push_back(getAL(CurDAG, DL));
5632 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5633 Ops.push_back(N->getOperand(0));
5634
5635 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5636 return true;
5637 }
5638
5639 std::string SpecialReg = RegString->getString().lower();
5640 int BankedReg = getBankedRegisterMask(SpecialReg);
5641 if (BankedReg != -1) {
5642 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5643 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5644 N->getOperand(0) };
5645 ReplaceNode(
5646 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5647 DL, MVT::Other, Ops));
5648 return true;
5649 }
5650
5651 // The VFP registers are written to by creating SelectionDAG nodes with
5652 // opcodes corresponding to the register that is being written. So we switch
5653 // on the string to find which opcode we need to use.
5654 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5655 .Case("fpscr", ARM::VMSR)
5656 .Case("fpexc", ARM::VMSR_FPEXC)
5657 .Case("fpsid", ARM::VMSR_FPSID)
5658 .Case("fpinst", ARM::VMSR_FPINST)
5659 .Case("fpinst2", ARM::VMSR_FPINST2)
5660 .Default(0);
5661
5662 if (Opcode) {
5663 if (!Subtarget->hasVFP2Base())
5664 return false;
5665 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5666 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5667 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5668 return true;
5669 }
5670
5671 std::pair<StringRef, StringRef> Fields;
5672 Fields = StringRef(SpecialReg).rsplit('_');
5673 std::string Reg = Fields.first.str();
5674 StringRef Flags = Fields.second;
5675
5676 // If the target was M Class then need to validate the special register value
5677 // and retrieve the mask for use in the instruction node.
5678 if (Subtarget->isMClass()) {
5679 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5680 if (SYSmValue == -1)
5681 return false;
5682
5683 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5684 N->getOperand(2), getAL(CurDAG, DL),
5685 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5686 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5687 return true;
5688 }
5689
5690 // We then check to see if a valid mask can be constructed for one of the
5691 // register string values permitted for the A and R class cores. These values
5692 // are apsr, spsr and cpsr; these are also valid on older cores.
5693 int Mask = getARClassRegisterMask(Reg, Flags);
5694 if (Mask != -1) {
5695 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5696 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5697 N->getOperand(0) };
5698 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5699 DL, MVT::Other, Ops));
5700 return true;
5701 }
5702
5703 return false;
5704}
5705
5706bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5707 std::vector<SDValue> AsmNodeOperands;
5709 bool Changed = false;
5710 unsigned NumOps = N->getNumOperands();
5711
5712 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5713 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5714 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5715 // respectively. Since there is no constraint to explicitly specify a
5716 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5717 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5718 // them into a GPRPair.
5719
5720 SDLoc dl(N);
5721 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5722
5723 SmallVector<bool, 8> OpChanged;
5724 // Glue node will be appended late.
5725 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5726 SDValue op = N->getOperand(i);
5727 AsmNodeOperands.push_back(op);
5728
5730 continue;
5731
5732 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5733 Flag = InlineAsm::Flag(C->getZExtValue());
5734 else
5735 continue;
5736
5737 // Immediate operands to inline asm in the SelectionDAG are modeled with
5738 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5739 // the second is a constant with the value of the immediate. If we get here
5740 // and we have a Kind::Imm, skip the next operand, and continue.
5741 if (Flag.isImmKind()) {
5742 SDValue op = N->getOperand(++i);
5743 AsmNodeOperands.push_back(op);
5744 continue;
5745 }
5746
5747 const unsigned NumRegs = Flag.getNumOperandRegisters();
5748 if (NumRegs)
5749 OpChanged.push_back(false);
5750
5751 unsigned DefIdx = 0;
5752 bool IsTiedToChangedOp = false;
5753 // If it's a use that is tied with a previous def, it has no
5754 // reg class constraint.
5755 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5756 IsTiedToChangedOp = OpChanged[DefIdx];
5757
5758 // Memory operands to inline asm in the SelectionDAG are modeled with two
5759 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5760 // operand. If we get here and we have a Kind::Mem, skip the next operand
5761 // (so it doesn't get misinterpreted), and continue. We do this here because
5762 // it's important to update the OpChanged array correctly before moving on.
5763 if (Flag.isMemKind()) {
5764 SDValue op = N->getOperand(++i);
5765 AsmNodeOperands.push_back(op);
5766 continue;
5767 }
5768
5769 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5770 !Flag.isRegDefEarlyClobberKind())
5771 continue;
5772
5773 unsigned RC;
5774 const bool HasRC = Flag.hasRegClassConstraint(RC);
5775 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5776 || NumRegs != 2)
5777 continue;
5778
5779 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5780 SDValue V0 = N->getOperand(i+1);
5781 SDValue V1 = N->getOperand(i+2);
5782 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5783 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5784 SDValue PairedReg;
5786
5787 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5788 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5789 // the original GPRs.
5790
5791 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5792 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5793 SDValue Chain = SDValue(N,0);
5794
5795 SDNode *GU = N->getGluedUser();
5796 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5797 Chain.getValue(1));
5798
5799 // Extract values from a GPRPair reg and copy to the original GPR reg.
5800 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5801 RegCopy);
5802 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5803 RegCopy);
5804 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5805 RegCopy.getValue(1));
5806 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5807
5808 // Update the original glue user.
5809 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5810 Ops.push_back(T1.getValue(1));
5811 CurDAG->UpdateNodeOperands(GU, Ops);
5812 } else {
5813 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5814 // GPRPair and then pass the GPRPair to the inline asm.
5815 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5816
5817 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5818 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5819 Chain.getValue(1));
5820 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5821 T0.getValue(1));
5822 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5823
5824 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5825 // i32 VRs of inline asm with it.
5826 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5827 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5828 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5829
5830 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5831 Glue = Chain.getValue(1);
5832 }
5833
5834 Changed = true;
5835
5836 if(PairedReg.getNode()) {
5837 OpChanged[OpChanged.size() -1 ] = true;
5838 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5839 if (IsTiedToChangedOp)
5840 Flag.setMatchingOp(DefIdx);
5841 else
5842 Flag.setRegClass(ARM::GPRPairRegClassID);
5843 // Replace the current flag.
5844 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5845 Flag, dl, MVT::i32);
5846 // Add the new register node and skip the original two GPRs.
5847 AsmNodeOperands.push_back(PairedReg);
5848 // Skip the next two GPRs.
5849 i += 2;
5850 }
5851 }
5852
5853 if (Glue.getNode())
5854 AsmNodeOperands.push_back(Glue);
5855 if (!Changed)
5856 return false;
5857
5858 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5859 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5860 New->setNodeId(-1);
5861 ReplaceNode(N, New.getNode());
5862 return true;
5863}
5864
5865bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5866 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5867 std::vector<SDValue> &OutOps) {
5868 switch(ConstraintID) {
5869 default:
5870 llvm_unreachable("Unexpected asm memory constraint");
5871 case InlineAsm::ConstraintCode::m:
5872 case InlineAsm::ConstraintCode::o:
5873 case InlineAsm::ConstraintCode::Q:
5874 case InlineAsm::ConstraintCode::Um:
5875 case InlineAsm::ConstraintCode::Un:
5876 case InlineAsm::ConstraintCode::Uq:
5877 case InlineAsm::ConstraintCode::Us:
5878 case InlineAsm::ConstraintCode::Ut:
5879 case InlineAsm::ConstraintCode::Uv:
5880 case InlineAsm::ConstraintCode::Uy:
5881 // Require the address to be in a register. That is safe for all ARM
5882 // variants and it is hard to do anything much smarter without knowing
5883 // how the operand is used.
5884 OutOps.push_back(Op);
5885 return false;
5886 }
5887 return true;
5888}
5889
5890/// createARMISelDag - This pass converts a legalized DAG into a
5891/// ARM-specific DAG, ready for instruction scheduling.
5892///
5894 CodeGenOptLevel OptLevel) {
5895 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5896}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
#define PASS_NAME
#define DEBUG_TYPE
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define op(i)
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1229
Class for arbitrary precision integers.
Definition: APInt.h:77
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
bool isSwift() const
Definition: ARMSubtarget.h:257
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isLikeA9() const
Definition: ARMSubtarget.h:260
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isLittle() const
Definition: ARMSubtarget.h:407
bool isMClass() const
Definition: ARMSubtarget.h:366
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
Base class for LoadSDNode and StoreSDNode.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
An instruction for reading from memory.
Definition: Instructions.h:173
This class is used to represent ISD::LOAD nodes.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MLOAD node.
This is an abstract virtual class for memory operations.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
std::string lower() const
Definition: StringRef.cpp:111
std::pair< StringRef, StringRef > rsplit(StringRef Separator) const
Split into two substrings around the last occurrence of a separator string.
Definition: StringRef.h:726
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ FrameIndex
Definition: ISDOpcodes.h:80
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:870
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1269
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1118
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1115
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:869
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:263
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ NearestTiesToEven
roundTiesToEven.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const