/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Bug Summary

File:	lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Location:	line 634, column 67
Description:	The result of the '<<' expression is undefined

Annotated Source Code

//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file defines an instruction selector for the AArch64 target.

//===----------------------------------------------------------------------===//

#include "AArch64TargetMachine.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "llvm/ADT/APSInt.h"

#include "llvm/CodeGen/SelectionDAGISel.h"

#include "llvm/IR/Function.h" // To access function attributes.

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

using namespace llvm;

#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"

//===--------------------------------------------------------------------===//

/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine

/// instructions for SelectionDAG operations.

///

namespace {

class AArch64DAGToDAGISel : public SelectionDAGISel {

AArch64TargetMachine &TM;

/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can

/// make the right decision when generating code for different targets.

const AArch64Subtarget *Subtarget;

bool ForCodeSize;

public:

explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,

CodeGenOpt::Level OptLevel)

: SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),

ForCodeSize(false) {}

const char *getPassName() const override {

return "AArch64 Instruction Selection";

}

bool runOnMachineFunction(MachineFunction &MF) override {

ForCodeSize =

MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||

MF.getFunction()->hasFnAttribute(Attribute::MinSize);

Subtarget = &MF.getSubtarget<AArch64Subtarget>();

return SelectionDAGISel::runOnMachineFunction(MF);

}

SDNode *Select(SDNode *Node) override;

/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for

/// inline asm expressions.

bool SelectInlineAsmMemoryOperand(const SDValue &Op,

unsigned ConstraintID,

std::vector<SDValue> &OutOps) override;

SDNode *SelectMLAV64LaneV128(SDNode *N);

SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);

bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);

bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);

bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);

bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {

return SelectShiftedRegister(N, false, Reg, Shift);

}

bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {

return SelectShiftedRegister(N, true, Reg, Shift);

}

bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 1, Base, OffImm);

}

bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 2, Base, OffImm);

}

bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 4, Base, OffImm);

}

bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 8, Base, OffImm);

}

bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 16, Base, OffImm);

}

bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeUnscaled(N, 1, Base, OffImm);

}

100

bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {

101

return SelectAddrModeUnscaled(N, 2, Base, OffImm);

102

}

103

bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {

104

return SelectAddrModeUnscaled(N, 4, Base, OffImm);

105

}

106

bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {

107

return SelectAddrModeUnscaled(N, 8, Base, OffImm);

108

}

109

bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {

110

return SelectAddrModeUnscaled(N, 16, Base, OffImm);

111

}

112

113

template<int Width>

114

bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,

115

SDValue &SignExtend, SDValue &DoShift) {

116

return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);

117

}

118

119

template<int Width>

120

bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,

121

SDValue &SignExtend, SDValue &DoShift) {

122

return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);

123

}

124

125

126

/// Form sequences of consecutive 64/128-bit registers for use in NEON

127

/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have

128

/// between 1 and 4 elements. If it contains a single element that is returned

129

/// unchanged; otherwise a REG_SEQUENCE value is returned.

130

SDValue createDTuple(ArrayRef<SDValue> Vecs);

131

SDValue createQTuple(ArrayRef<SDValue> Vecs);

132

133

/// Generic helper for the createDTuple/createQTuple

134

/// functions. Those should almost always be called instead.

135

SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],

136

const unsigned SubRegs[]);

137

138

SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);

139

140

SDNode *SelectIndexedLoad(SDNode *N, bool &Done);

141

142

SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,

143

unsigned SubRegIdx);

144

SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,

145

unsigned SubRegIdx);

146

SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

147

SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

148

149

SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);

150

SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);

151

SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

152

SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

153

154

SDNode *SelectBitfieldExtractOp(SDNode *N);

155

SDNode *SelectBitfieldInsertOp(SDNode *N);

156

157

SDNode *SelectLIBM(SDNode *N);

158

159

// Include the pieces autogenerated from the target description.

160

#include "AArch64GenDAGISel.inc"

161

162

private:

163

bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,

164

SDValue &Shift);

165

bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,

166

SDValue &OffImm);

167

bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,

168

SDValue &OffImm);

169

bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,

170

SDValue &Offset, SDValue &SignExtend,

171

SDValue &DoShift);

172

bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,

173

SDValue &Offset, SDValue &SignExtend,

174

SDValue &DoShift);

175

bool isWorthFolding(SDValue V) const;

176

bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,

177

SDValue &Offset, SDValue &SignExtend);

178

179

template<unsigned RegWidth>

180

bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {

181

return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);

182

}

183

184

bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);

185

};

186

} // end anonymous namespace

187

188

/// isIntImmediate - This method tests to see if the node is a constant

189

/// operand. If so Imm will receive the 32-bit value.

190

static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {

191

if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {

192

Imm = C->getZExtValue();

193

return true;

194

}

195

return false;

196

}

197

198

// isIntImmediate - This method tests to see if a constant operand.

199

// If so Imm will receive the value.

200

static bool isIntImmediate(SDValue N, uint64_t &Imm) {

201

return isIntImmediate(N.getNode(), Imm);

202

}

203

204

// isOpcWithIntImmediate - This method tests to see if the node is a specific

205

// opcode and that it has a immediate integer right operand.

206

// If so Imm will receive the 32 bit value.

207

static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,

208

uint64_t &Imm) {

209

return N->getOpcode() == Opc &&

210

isIntImmediate(N->getOperand(1).getNode(), Imm);

211

}

212

213

bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(

214

const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {

215

switch(ConstraintID) {

216

default:

217

llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 217);

218

case InlineAsm::Constraint_i:

219

case InlineAsm::Constraint_m:

220

case InlineAsm::Constraint_Q:

221

// Require the address to be in a register. That is safe for all AArch64

222

// variants and it is hard to do anything much smarter without knowing

223

// how the operand is used.

224

OutOps.push_back(Op);

225

return false;

226

}

227

return true;

228

}

229

230

/// SelectArithImmed - Select an immediate value that can be represented as

231

/// a 12-bit value shifted left by either 0 or 12. If so, return true with

232

/// Val set to the 12-bit value and Shift set to the shifter operand.

233

bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,

234

SDValue &Shift) {

235

// This function is called from the addsub_shifted_imm ComplexPattern,

236

// which lists [imm] as the list of opcode it's interested in, however

237

// we still need to check whether the operand is actually an immediate

238

// here because the ComplexPattern opcode list is only used in

239

// root-level opcode matching.

240

if (!isa<ConstantSDNode>(N.getNode()))

241

return false;

242

243

uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();

244

unsigned ShiftAmt;

245

246

if (Immed >> 12 == 0) {

247

ShiftAmt = 0;

248

} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {

249

ShiftAmt = 12;

250

Immed = Immed >> 12;

251

} else

252

return false;

253

254

unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);

255

Val = CurDAG->getTargetConstant(Immed, MVT::i32);

256

Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);

257

return true;

258

}

259

260

/// SelectNegArithImmed - As above, but negates the value before trying to

261

/// select it.

262

bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,

263

SDValue &Shift) {

264

// This function is called from the addsub_shifted_imm ComplexPattern,

265

// which lists [imm] as the list of opcode it's interested in, however

266

// we still need to check whether the operand is actually an immediate

267

// here because the ComplexPattern opcode list is only used in

268

// root-level opcode matching.

269

if (!isa<ConstantSDNode>(N.getNode()))

270

return false;

271

272

// The immediate operand must be a 24-bit zero-extended immediate.

273

uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();

274

275

// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"

276

// have the opposite effect on the C flag, so this pattern mustn't match under

277

// those circumstances.

278

if (Immed == 0)

279

return false;

280

281

if (N.getValueType() == MVT::i32)

282

Immed = ~((uint32_t)Immed) + 1;

283

else

284

Immed = ~Immed + 1ULL;

285

if (Immed & 0xFFFFFFFFFF000000ULL)

286

return false;

287

288

Immed &= 0xFFFFFFULL;

289

return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);

290

}

291

292

/// getShiftTypeForNode - Translate a shift node to the corresponding

293

/// ShiftType value.

294

static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {

295

switch (N.getOpcode()) {

296

default:

297

return AArch64_AM::InvalidShiftExtend;

298

case ISD::SHL:

299

return AArch64_AM::LSL;

300

case ISD::SRL:

301

return AArch64_AM::LSR;

302

case ISD::SRA:

303

return AArch64_AM::ASR;

304

case ISD::ROTR:

305

return AArch64_AM::ROR;

306

}

307

}

308

309

/// \brief Determine whether it is worth to fold V into an extended register.

310

bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {

311

// it hurts if the value is used at least twice, unless we are optimizing

312

// for code size.

313

if (ForCodeSize || V.hasOneUse())

314

return true;

315

return false;

316

}

317

318

/// SelectShiftedRegister - Select a "shifted register" operand. If the value

319

/// is not shifted, set the Shift operand to default of "LSL 0". The logical

320

/// instructions allow the shifted register to be rotated, but the arithmetic

321

/// instructions do not. The AllowROR parameter specifies whether ROR is

322

/// supported.

323

bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,

324

SDValue &Reg, SDValue &Shift) {

325

AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);

326

if (ShType == AArch64_AM::InvalidShiftExtend)

327

return false;

328

if (!AllowROR && ShType == AArch64_AM::ROR)

329

return false;

330

331

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

332

unsigned BitSize = N.getValueType().getSizeInBits();

333

unsigned Val = RHS->getZExtValue() & (BitSize - 1);

334

unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);

335

336

Reg = N.getOperand(0);

337

Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);

338

return isWorthFolding(N);

339

}

340

341

return false;

342

}

343

344

/// getExtendTypeForNode - Translate an extend node to the corresponding

345

/// ExtendType value.

346

static AArch64_AM::ShiftExtendType

347

getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {

348

if (N.getOpcode() == ISD::SIGN_EXTEND ||

349

N.getOpcode() == ISD::SIGN_EXTEND_INREG) {

350

EVT SrcVT;

351

if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)

352

SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();

353

else

354

SrcVT = N.getOperand(0).getValueType();

355

356

if (!IsLoadStore && SrcVT == MVT::i8)

357

return AArch64_AM::SXTB;

358

else if (!IsLoadStore && SrcVT == MVT::i16)

359

return AArch64_AM::SXTH;

360

else if (SrcVT == MVT::i32)

361

return AArch64_AM::SXTW;

362

assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast
<void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 362, __PRETTY_FUNCTION__));

363

364

return AArch64_AM::InvalidShiftExtend;

365

} else if (N.getOpcode() == ISD::ZERO_EXTEND ||

366

N.getOpcode() == ISD::ANY_EXTEND) {

367

EVT SrcVT = N.getOperand(0).getValueType();

368

if (!IsLoadStore && SrcVT == MVT::i8)

369

return AArch64_AM::UXTB;

370

else if (!IsLoadStore && SrcVT == MVT::i16)

371

return AArch64_AM::UXTH;

372

else if (SrcVT == MVT::i32)

373

return AArch64_AM::UXTW;

374

375

376

return AArch64_AM::InvalidShiftExtend;

377

} else if (N.getOpcode() == ISD::AND) {

378

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

379

if (!CSD)

380

return AArch64_AM::InvalidShiftExtend;

381

uint64_t AndMask = CSD->getZExtValue();

382

383

switch (AndMask) {

384

default:

385

return AArch64_AM::InvalidShiftExtend;

386

case 0xFF:

387

return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;

388

case 0xFFFF:

389

return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;

390

case 0xFFFFFFFF:

391

return AArch64_AM::UXTW;

392

}

393

}

394

395

return AArch64_AM::InvalidShiftExtend;

396

}

397

398

// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.

399

static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {

400

if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&

401

DL->getOpcode() != AArch64ISD::DUPLANE32)

402

return false;

403

404

SDValue SV = DL->getOperand(0);

405

if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)

406

return false;

407

408

SDValue EV = SV.getOperand(1);

409

if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)

410

return false;

411

412

ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());

413

ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());

414

LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();

415

LaneOp = EV.getOperand(0);

416

417

return true;

418

}

419

420

// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a

421

// high lane extract.

422

static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,

423

SDValue &LaneOp, int &LaneIdx) {

424

425

if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {

426

std::swap(Op0, Op1);

427

if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))

428

return false;

429

}

430

StdOp = Op1;

431

return true;

432

}

433

434

/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand

435

/// is a lane in the upper half of a 128-bit vector. Recognize and select this

436

/// so that we don't emit unnecessary lane extracts.

437

SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {

438

SDValue Op0 = N->getOperand(0);

439

SDValue Op1 = N->getOperand(1);

440

SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.

441

SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.

442

int LaneIdx = -1; // Will hold the lane index.

443

444

if (Op1.getOpcode() != ISD::MUL ||

445

!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,

446

LaneIdx)) {

447

std::swap(Op0, Op1);

448

if (Op1.getOpcode() != ISD::MUL ||

449

!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,

450

LaneIdx))

451

return nullptr;

452

}

453

454

SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);

455

456

SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };

457

458

unsigned MLAOpc = ~0U;

459

460

switch (N->getSimpleValueType(0).SimpleTy) {

461

default:

462

llvm_unreachable("Unrecognized MLA.")::llvm::llvm_unreachable_internal("Unrecognized MLA.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 462);

463

case MVT::v4i16:

464

MLAOpc = AArch64::MLAv4i16_indexed;

465

break;

466

case MVT::v8i16:

467

MLAOpc = AArch64::MLAv8i16_indexed;

468

break;

469

case MVT::v2i32:

470

MLAOpc = AArch64::MLAv2i32_indexed;

471

break;

472

case MVT::v4i32:

473

MLAOpc = AArch64::MLAv4i32_indexed;

474

break;

475

}

476

477

return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);

478

}

479

480

SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {

481

SDValue SMULLOp0;

482

SDValue SMULLOp1;

483

int LaneIdx;

484

485

if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,

486

LaneIdx))

487

return nullptr;

488

489

SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);

490

491

SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };

492

493

unsigned SMULLOpc = ~0U;

494

495

if (IntNo == Intrinsic::aarch64_neon_smull) {

496

switch (N->getSimpleValueType(0).SimpleTy) {

497

default:

498

llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 498);

499

case MVT::v4i32:

500

SMULLOpc = AArch64::SMULLv4i16_indexed;

501

break;

502

case MVT::v2i64:

503

SMULLOpc = AArch64::SMULLv2i32_indexed;

504

break;

505

}

506

} else if (IntNo == Intrinsic::aarch64_neon_umull) {

507

switch (N->getSimpleValueType(0).SimpleTy) {

508

default:

509

llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 509);

510

case MVT::v4i32:

511

SMULLOpc = AArch64::UMULLv4i16_indexed;

512

break;

513

case MVT::v2i64:

514

SMULLOpc = AArch64::UMULLv2i32_indexed;

515

break;

516

}

517

} else

518

llvm_unreachable("Unrecognized intrinsic.")::llvm::llvm_unreachable_internal("Unrecognized intrinsic.", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 518);

519

520

return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);

521

}

522

523

/// Instructions that accept extend modifiers like UXTW expect the register

524

/// being extended to be a GPR32, but the incoming DAG might be acting on a

525

/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if

526

/// this is the case.

527

static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {

528

if (N.getValueType() == MVT::i32)

529

return N;

530

531

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);

532

MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

533

SDLoc(N), MVT::i32, N, SubReg);

534

return SDValue(Node, 0);

535

}

536

537

538

/// SelectArithExtendedRegister - Select a "extended register" operand. This

539

/// operand folds in an extend followed by an optional left shift.

540

bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,

541

SDValue &Shift) {

542

unsigned ShiftVal = 0;

543

AArch64_AM::ShiftExtendType Ext;

544

545

if (N.getOpcode() == ISD::SHL) {

546

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

547

if (!CSD)

548

return false;

549

ShiftVal = CSD->getZExtValue();

550

if (ShiftVal > 4)

551

return false;

552

553

Ext = getExtendTypeForNode(N.getOperand(0));

554

if (Ext == AArch64_AM::InvalidShiftExtend)

555

return false;

556

557

Reg = N.getOperand(0).getOperand(0);

558

} else {

559

Ext = getExtendTypeForNode(N);

560

if (Ext == AArch64_AM::InvalidShiftExtend)

561

return false;

562

563

Reg = N.getOperand(0);

564

}

565

566

// AArch64 mandates that the RHS of the operation must use the smallest

567

// register classs that could contain the size being extended from. Thus,

568

// if we're folding a (sext i8), we need the RHS to be a GPR32, even though

569

// there might not be an actual 32-bit value in the program. We can

570

// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.

571

assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)((Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)
? static_cast<void> (0) : __assert_fail ("Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 571, __PRETTY_FUNCTION__));

572

Reg = narrowIfNeeded(CurDAG, Reg);

573

Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);

574

return isWorthFolding(N);

575

}

576

577

/// If there's a use of this ADDlow that's not itself a load/store then we'll

578

/// need to create a real ADD instruction from it anyway and there's no point in

579

/// folding it into the mem op. Theoretically, it shouldn't matter, but there's

580

/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding

581

/// leads to duplaicated ADRP instructions.

582

static bool isWorthFoldingADDlow(SDValue N) {

583

for (auto Use : N->uses()) {

584

if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&

585

Use->getOpcode() != ISD::ATOMIC_LOAD &&

586

Use->getOpcode() != ISD::ATOMIC_STORE)

587

return false;

588

589

// ldar and stlr have much more restrictive addressing modes (just a

590

// register).

591

if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)

592

return false;

593

}

594

595

return true;

596

}

597

598

/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit

599

/// immediate" address. The "Size" argument is the size in bytes of the memory

600

/// reference, which determines the scale.

601

bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,

602

SDValue &Base, SDValue &OffImm) {

603

const TargetLowering *TLI = getTargetLowering();

604

if (N.getOpcode() == ISD::FrameIndex) {

Taking false branch

→

605

int FI = cast<FrameIndexSDNode>(N)->getIndex();

606

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

607

OffImm = CurDAG->getTargetConstant(0, MVT::i64);

608

return true;

609

}

610

611

if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {

612

GlobalAddressSDNode *GAN =

613

dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());

614

Base = N.getOperand(0);

615

OffImm = N.getOperand(1);

616

if (!GAN)

617

return true;

618

619

const GlobalValue *GV = GAN->getGlobal();

620

unsigned Alignment = GV->getAlignment();

621

const DataLayout *DL = TLI->getDataLayout();

622

Type *Ty = GV->getType()->getElementType();

623

if (Alignment == 0 && Ty->isSized())

624

Alignment = DL->getABITypeAlignment(Ty);

625

626

if (Alignment >= Size)

627

return true;

628

}

629

630

if (CurDAG->isBaseWithConstantOffset(N)) {

←

Taking true branch

→

631

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

←

Assuming 'RHS' is non-null

→

←

Taking true branch

→

632

int64_t RHSC = (int64_t)RHS->getZExtValue();

633

unsigned Scale = Log2_32(Size);

634

if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {

←

Assuming 'RHSC' is >= 0

→

←

The result of the '<<' expression is undefined

635

Base = N.getOperand(0);

636

if (Base.getOpcode() == ISD::FrameIndex) {

637

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

638

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

639

}

640

OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);

641

return true;

642

}

643

}

644

}

645

646

// Before falling back to our general case, check if the unscaled

647

// instructions can handle this. If so, that's preferable.

648

if (SelectAddrModeUnscaled(N, Size, Base, OffImm))

649

return false;

650

651

// Base only. The address will be materialized into a register before

652

// the memory is accessed.

653

// add x0, Xbase, #offset

654

// ldr x0, [x0]

655

Base = N;

656

OffImm = CurDAG->getTargetConstant(0, MVT::i64);

657

return true;

658

}

659

660

/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit

661

/// immediate" address. This should only match when there is an offset that

662

/// is not valid for a scaled immediate addressing mode. The "Size" argument

663

/// is the size in bytes of the memory reference, which is needed here to know

664

/// what is valid for a scaled immediate.

665

bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,

666

SDValue &Base,

667

SDValue &OffImm) {

668

if (!CurDAG->isBaseWithConstantOffset(N))

669

return false;

670

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

671

int64_t RHSC = RHS->getSExtValue();

672

// If the offset is valid as a scaled immediate, don't match here.

673

if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&

674

RHSC < (0x1000 << Log2_32(Size)))

675

return false;

676

if (RHSC >= -256 && RHSC < 256) {

677

Base = N.getOperand(0);

678

if (Base.getOpcode() == ISD::FrameIndex) {

679

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

680

const TargetLowering *TLI = getTargetLowering();

681

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

682

}

683

OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);

684

return true;

685

}

686

}

687

return false;

688

}

689

690

static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {

691

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);

692

SDValue ImpDef = SDValue(

693

CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),

694

0);

695

MachineSDNode *Node = CurDAG->getMachineNode(

696

TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);

697

return SDValue(Node, 0);

698

}

699

700

/// \brief Check if the given SHL node (\p N), can be used to form an

701

/// extended register for an addressing mode.

702

bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,

703

bool WantExtend, SDValue &Offset,

704

SDValue &SignExtend) {

705

assert(N.getOpcode() == ISD::SHL && "Invalid opcode.")((N.getOpcode() == ISD::SHL && "Invalid opcode.") ? static_cast
<void> (0) : __assert_fail ("N.getOpcode() == ISD::SHL && \"Invalid opcode.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 705, __PRETTY_FUNCTION__));

706

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

707

if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())

708

return false;

709

710

if (WantExtend) {

711

AArch64_AM::ShiftExtendType Ext =

712

getExtendTypeForNode(N.getOperand(0), true);

713

if (Ext == AArch64_AM::InvalidShiftExtend)

714

return false;

715

716

Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));

717

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);

718

} else {

719

Offset = N.getOperand(0);

720

SignExtend = CurDAG->getTargetConstant(0, MVT::i32);

721

}

722

723

unsigned LegalShiftVal = Log2_32(Size);

724

unsigned ShiftVal = CSD->getZExtValue();

725

726

if (ShiftVal != 0 && ShiftVal != LegalShiftVal)

727

return false;

728

729

if (isWorthFolding(N))

730

return true;

731

732

return false;

733

}

734

735

bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,

736

SDValue &Base, SDValue &Offset,

737

SDValue &SignExtend,

738

SDValue &DoShift) {

739

if (N.getOpcode() != ISD::ADD)

740

return false;

741

SDValue LHS = N.getOperand(0);

742

SDValue RHS = N.getOperand(1);

743

744

// We don't want to match immediate adds here, because they are better lowered

745

// to the register-immediate addressing modes.

746

if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))

747

return false;

748

749

// Check if this particular node is reused in any non-memory related

750

// operation. If yes, do not try to fold this node into the address

751

// computation, since the computation will be kept.

752

const SDNode *Node = N.getNode();

753

for (SDNode *UI : Node->uses()) {

754

if (!isa<MemSDNode>(*UI))

755

return false;

756

}

757

758

// Remember if it is worth folding N when it produces extended register.

759

bool IsExtendedRegisterWorthFolding = isWorthFolding(N);

760

761

// Try to match a shifted extend on the RHS.

762

if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&

763

SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {

764

Base = LHS;

765

DoShift = CurDAG->getTargetConstant(true, MVT::i32);

766

return true;

767

}

768

769

// Try to match a shifted extend on the LHS.

770

if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&

771

SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {

772

Base = RHS;

773

DoShift = CurDAG->getTargetConstant(true, MVT::i32);

774

return true;

775

}

776

777

// There was no shift, whatever else we find.

778

DoShift = CurDAG->getTargetConstant(false, MVT::i32);

779

780

AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;

781

// Try to match an unshifted extend on the LHS.

782

if (IsExtendedRegisterWorthFolding &&

783

(Ext = getExtendTypeForNode(LHS, true)) !=

784

AArch64_AM::InvalidShiftExtend) {

785

Base = RHS;

786

Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));

787

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);

788

if (isWorthFolding(LHS))

789

return true;

790

}

791

792

// Try to match an unshifted extend on the RHS.

793

if (IsExtendedRegisterWorthFolding &&

794

(Ext = getExtendTypeForNode(RHS, true)) !=

795

AArch64_AM::InvalidShiftExtend) {

796

Base = LHS;

797

Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));

798

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);

799

if (isWorthFolding(RHS))

800

return true;

801

}

802

803

return false;

804

}

805

806

// Check if the given immediate is preferred by ADD. If an immediate can be

807

// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be

808

// encoded by one MOVZ, return true.

809

static bool isPreferredADD(int64_t ImmOff) {

810

// Constant in [0x0, 0xfff] can be encoded in ADD.

811

if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)

812

return true;

813

// Check if it can be encoded in an "ADD LSL #12".

814

if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)

815

// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.

816

return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&

817

(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;

818

return false;

819

}

820

821

bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,

822

SDValue &Base, SDValue &Offset,

823

SDValue &SignExtend,

824

SDValue &DoShift) {

825

if (N.getOpcode() != ISD::ADD)

826

return false;

827

SDValue LHS = N.getOperand(0);

828

SDValue RHS = N.getOperand(1);

829

830

// Check if this particular node is reused in any non-memory related

831

// operation. If yes, do not try to fold this node into the address

832

// computation, since the computation will be kept.

833

const SDNode *Node = N.getNode();

834

for (SDNode *UI : Node->uses()) {

835

if (!isa<MemSDNode>(*UI))

836

return false;

837

}

838

839

// Watch out if RHS is a wide immediate, it can not be selected into

840

// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into

841

// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate

842

// instructions like:

843

// MOV X0, WideImmediate

844

// ADD X1, BaseReg, X0

845

// LDR X2, [X1, 0]

846

// For such situation, using [BaseReg, XReg] addressing mode can save one

847

// ADD/SUB:

848

// MOV X0, WideImmediate

849

// LDR X2, [BaseReg, X0]

850

if (isa<ConstantSDNode>(RHS)) {

851

int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();

852

unsigned Scale = Log2_32(Size);

853

// Skip the immediate can be seleced by load/store addressing mode.

854

// Also skip the immediate can be encoded by a single ADD (SUB is also

855

// checked by using -ImmOff).

856

if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||

857

isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))

858

return false;

859

860

SDLoc DL(N.getNode());

861

SDValue Ops[] = { RHS };

862

SDNode *MOVI =

863

CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);

864

SDValue MOVIV = SDValue(MOVI, 0);

865

// This ADD of two X register will be selected into [Reg+Reg] mode.

866

N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);

867

}

868

869

// Remember if it is worth folding N when it produces extended register.

870

bool IsExtendedRegisterWorthFolding = isWorthFolding(N);

871

872

// Try to match a shifted extend on the RHS.

873

if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&

874

SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {

875

Base = LHS;

876

DoShift = CurDAG->getTargetConstant(true, MVT::i32);

877

return true;

878

}

879

880

// Try to match a shifted extend on the LHS.

881

if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&

882

SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {

883

Base = RHS;

884

DoShift = CurDAG->getTargetConstant(true, MVT::i32);

885

return true;

886

}

887

888

// Match any non-shifted, non-extend, non-immediate add expression.

889

Base = LHS;

890

Offset = RHS;

891

SignExtend = CurDAG->getTargetConstant(false, MVT::i32);

892

DoShift = CurDAG->getTargetConstant(false, MVT::i32);

893

// Reg1 + Reg2 is free: no check needed.

894

return true;

895

}

896

897

SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {

898

static const unsigned RegClassIDs[] = {

899

AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};

900

static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,

901

AArch64::dsub2, AArch64::dsub3};

902

903

return createTuple(Regs, RegClassIDs, SubRegs);

904

}

905

906

SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {

907

static const unsigned RegClassIDs[] = {

908

AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};

909

static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,

910

AArch64::qsub2, AArch64::qsub3};

911

912

return createTuple(Regs, RegClassIDs, SubRegs);

913

}

914

915

SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,

916

const unsigned RegClassIDs[],

917

const unsigned SubRegs[]) {

918

// There's no special register-class for a vector-list of 1 element: it's just

919

// a vector.

920

if (Regs.size() == 1)

921

return Regs[0];

922

923

assert(Regs.size() >= 2 && Regs.size() <= 4)((Regs.size() >= 2 && Regs.size() <= 4) ? static_cast
<void> (0) : __assert_fail ("Regs.size() >= 2 && Regs.size() <= 4"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 923, __PRETTY_FUNCTION__));

924

925

SDLoc DL(Regs[0].getNode());

926

927

SmallVector<SDValue, 4> Ops;

928

929

// First operand of REG_SEQUENCE is the desired RegClass.

930

Ops.push_back(

931

CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));

932

933

// Then we get pairs of source & subregister-position for the components.

934

for (unsigned i = 0; i < Regs.size(); ++i) {

935

Ops.push_back(Regs[i]);

936

Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));

937

}

938

939

SDNode *N =

940

CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);

941

return SDValue(N, 0);

942

}

943

944

SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,

945

unsigned Opc, bool isExt) {

946

SDLoc dl(N);

947

EVT VT = N->getValueType(0);

948

949

unsigned ExtOff = isExt;

950

951

// Form a REG_SEQUENCE to force register allocation.

952

unsigned Vec0Off = ExtOff + 1;

953

SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,

954

N->op_begin() + Vec0Off + NumVecs);

955

SDValue RegSeq = createQTuple(Regs);

956

957

SmallVector<SDValue, 6> Ops;

958

if (isExt)

959

Ops.push_back(N->getOperand(1));

960

Ops.push_back(RegSeq);

961

Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));

962

return CurDAG->getMachineNode(Opc, dl, VT, Ops);

963

}

964

965

SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {

966

LoadSDNode *LD = cast<LoadSDNode>(N);

967

if (LD->isUnindexed())

968

return nullptr;

969

EVT VT = LD->getMemoryVT();

970

EVT DstVT = N->getValueType(0);

971

ISD::MemIndexedMode AM = LD->getAddressingMode();

972

bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;

973

974

// We're not doing validity checking here. That was done when checking

975

// if we should mark the load as indexed or not. We're just selecting

976

// the right instruction.

977

unsigned Opcode = 0;

978

979

ISD::LoadExtType ExtType = LD->getExtensionType();

980

bool InsertTo64 = false;

981

if (VT == MVT::i64)

982

Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;

983

else if (VT == MVT::i32) {

984

if (ExtType == ISD::NON_EXTLOAD)

985

Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

986

else if (ExtType == ISD::SEXTLOAD)

987

Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;

988

else {

989

Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

990

InsertTo64 = true;

991

// The result of the load is only i32. It's the subreg_to_reg that makes

992

// it into an i64.

993

DstVT = MVT::i32;

994

}

995

} else if (VT == MVT::i16) {

996

if (ExtType == ISD::SEXTLOAD) {

997

if (DstVT == MVT::i64)

998

Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;

999

else

1000

Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;

1001

} else {

1002

Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;

1003

InsertTo64 = DstVT == MVT::i64;

1004

// The result of the load is only i32. It's the subreg_to_reg that makes

1005

// it into an i64.

1006

DstVT = MVT::i32;

1007

}

1008

} else if (VT == MVT::i8) {

1009

if (ExtType == ISD::SEXTLOAD) {

1010

if (DstVT == MVT::i64)

1011

Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;

1012

else

1013

Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;

1014

} else {

1015

Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;

1016

InsertTo64 = DstVT == MVT::i64;

1017

// The result of the load is only i32. It's the subreg_to_reg that makes

1018

// it into an i64.

1019

DstVT = MVT::i32;

1020

}

1021

} else if (VT == MVT::f32) {

1022

Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;

1023

} else if (VT == MVT::f64 || VT.is64BitVector()) {

1024

Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;

1025

} else if (VT.is128BitVector()) {

1026

Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;

1027

} else

1028

return nullptr;

1029

SDValue Chain = LD->getChain();

1030

SDValue Base = LD->getBasePtr();

1031

ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());

1032

int OffsetVal = (int)OffsetOp->getZExtValue();

1033

SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);

1034

SDValue Ops[] = { Base, Offset, Chain };

1035

SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,

1036

MVT::Other, Ops);

1037

// Either way, we're replacing the node, so tell the caller that.

1038

Done = true;

1039

SDValue LoadedVal = SDValue(Res, 1);

1040

if (InsertTo64) {

1041

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);

1042

LoadedVal =

1043

SDValue(CurDAG->getMachineNode(

1044

AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,

1045

CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),

1046

0);

1047

}

1048

1049

ReplaceUses(SDValue(N, 0), LoadedVal);

1050

ReplaceUses(SDValue(N, 1), SDValue(Res, 0));

1051

ReplaceUses(SDValue(N, 2), SDValue(Res, 2));

1052

1053

return nullptr;

1054

}

1055

1056

SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,

1057

unsigned Opc, unsigned SubRegIdx) {

1058

SDLoc dl(N);

1059

EVT VT = N->getValueType(0);

1060

SDValue Chain = N->getOperand(0);

1061

1062

SDValue Ops[] = {N->getOperand(2), // Mem operand;

1063

Chain};

1064

1065

const EVT ResTys[] = {MVT::Untyped, MVT::Other};

1066

1067

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1068

SDValue SuperReg = SDValue(Ld, 0);

1069

for (unsigned i = 0; i < NumVecs; ++i)

1070

ReplaceUses(SDValue(N, i),

1071

CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));

1072

1073

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));

1074

return nullptr;

1075

}

1076

1077

SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,

1078

unsigned Opc, unsigned SubRegIdx) {

1079

SDLoc dl(N);

1080

EVT VT = N->getValueType(0);

1081

SDValue Chain = N->getOperand(0);

1082

1083

SDValue Ops[] = {N->getOperand(1), // Mem operand

1084

N->getOperand(2), // Incremental

1085

Chain};

1086

1087

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1088

MVT::Untyped, MVT::Other};

1089

1090

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1091

1092

// Update uses of write back register

1093

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

1094

1095

// Update uses of vector list

1096

SDValue SuperReg = SDValue(Ld, 1);

1097

if (NumVecs == 1)

1098

ReplaceUses(SDValue(N, 0), SuperReg);

1099

else

1100

for (unsigned i = 0; i < NumVecs; ++i)

1101

ReplaceUses(SDValue(N, i),

1102

CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));

1103

1104

// Update the chain

1105

ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));

1106

return nullptr;

1107

}

1108

1109

SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,

1110

unsigned Opc) {

1111

SDLoc dl(N);

1112

EVT VT = N->getOperand(2)->getValueType(0);

1113

1114

// Form a REG_SEQUENCE to force register allocation.

1115

bool Is128Bit = VT.getSizeInBits() == 128;

1116

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1117

SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);

1118

1119

SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};

1120

SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);

1121

1122

return St;

1123

}

1124

1125

SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,

1126

unsigned Opc) {

1127

SDLoc dl(N);

1128

EVT VT = N->getOperand(2)->getValueType(0);

1129

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1130

MVT::Other}; // Type for the Chain

1131

1132

// Form a REG_SEQUENCE to force register allocation.

1133

bool Is128Bit = VT.getSizeInBits() == 128;

1134

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1135

SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);

1136

1137

SDValue Ops[] = {RegSeq,

1138

N->getOperand(NumVecs + 1), // base register

1139

N->getOperand(NumVecs + 2), // Incremental

1140

N->getOperand(0)}; // Chain

1141

SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1142

1143

return St;

1144

}

1145

1146

namespace {

1147

/// WidenVector - Given a value in the V64 register class, produce the

1148

/// equivalent value in the V128 register class.

1149

class WidenVector {

1150

SelectionDAG &DAG;

1151

1152

public:

1153

WidenVector(SelectionDAG &DAG) : DAG(DAG) {}

1154

1155

SDValue operator()(SDValue V64Reg) {

1156

EVT VT = V64Reg.getValueType();

1157

unsigned NarrowSize = VT.getVectorNumElements();

1158

MVT EltTy = VT.getVectorElementType().getSimpleVT();

1159

MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);

1160

SDLoc DL(V64Reg);

1161

1162

SDValue Undef =

1163

SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);

1164

return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);

1165

}

1166

};

1167

} // namespace

1168

1169

/// NarrowVector - Given a value in the V128 register class, produce the

1170

/// equivalent value in the V64 register class.

1171

static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {

1172

EVT VT = V128Reg.getValueType();

1173

unsigned WideSize = VT.getVectorNumElements();

1174

MVT EltTy = VT.getVectorElementType().getSimpleVT();

1175

MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);

1176

1177

return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,

1178

V128Reg);

1179

}

1180

1181

SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,

1182

unsigned Opc) {

1183

SDLoc dl(N);

1184

EVT VT = N->getValueType(0);

1185

bool Narrow = VT.getSizeInBits() == 64;

1186

1187

// Form a REG_SEQUENCE to force register allocation.

1188

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1189

1190

if (Narrow)

1191

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1192

WidenVector(*CurDAG));

1193

1194

SDValue RegSeq = createQTuple(Regs);

1195

1196

const EVT ResTys[] = {MVT::Untyped, MVT::Other};

1197

1198

unsigned LaneNo =

1199

cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();

1200

1201

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),

1202

N->getOperand(NumVecs + 3), N->getOperand(0)};

1203

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1204

SDValue SuperReg = SDValue(Ld, 0);

1205

1206

EVT WideVT = RegSeq.getOperand(1)->getValueType(0);

1207

static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,

1208

AArch64::qsub3 };

1209

for (unsigned i = 0; i < NumVecs; ++i) {

1210

SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);

1211

if (Narrow)

1212

NV = NarrowVector(NV, *CurDAG);

1213

ReplaceUses(SDValue(N, i), NV);

1214

}

1215

1216

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));

1217

1218

return Ld;

1219

}

1220

1221

SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,

1222

unsigned Opc) {

1223

SDLoc dl(N);

1224

EVT VT = N->getValueType(0);

1225

bool Narrow = VT.getSizeInBits() == 64;

1226

1227

// Form a REG_SEQUENCE to force register allocation.

1228

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1229

1230

if (Narrow)

1231

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1232

WidenVector(*CurDAG));

1233

1234

SDValue RegSeq = createQTuple(Regs);

1235

1236

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1237

RegSeq->getValueType(0), MVT::Other};

1238

1239

unsigned LaneNo =

1240

cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

1241

1242

SDValue Ops[] = {RegSeq,

1243

CurDAG->getTargetConstant(LaneNo, MVT::i64), // Lane Number

1244

N->getOperand(NumVecs + 2), // Base register

1245

N->getOperand(NumVecs + 3), // Incremental

1246

N->getOperand(0)};

1247

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1248

1249

// Update uses of the write back register

1250

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

1251

1252

// Update uses of the vector list

1253

SDValue SuperReg = SDValue(Ld, 1);

1254

if (NumVecs == 1) {

1255

ReplaceUses(SDValue(N, 0),

1256

Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);

1257

} else {

1258

EVT WideVT = RegSeq.getOperand(1)->getValueType(0);

1259

static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,

1260

AArch64::qsub3 };

1261

for (unsigned i = 0; i < NumVecs; ++i) {

1262

SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,

1263

SuperReg);

1264

if (Narrow)

1265

NV = NarrowVector(NV, *CurDAG);

1266

ReplaceUses(SDValue(N, i), NV);

1267

}

1268

}

1269

1270

// Update the Chain

1271

ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));

1272

1273

return Ld;

1274

}

1275

1276

SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,

1277

unsigned Opc) {

1278

SDLoc dl(N);

1279

EVT VT = N->getOperand(2)->getValueType(0);

1280

bool Narrow = VT.getSizeInBits() == 64;

1281

1282

// Form a REG_SEQUENCE to force register allocation.

1283

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1284

1285

if (Narrow)

1286

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1287

WidenVector(*CurDAG));

1288

1289

SDValue RegSeq = createQTuple(Regs);

1290

1291

unsigned LaneNo =

1292

cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();

1293

1294

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),

1295

N->getOperand(NumVecs + 3), N->getOperand(0)};

1296

SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);

1297

1298

// Transfer memoperands.

1299

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

1300

MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();

1301

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

1302

1303

return St;

1304

}

1305

1306

SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,

1307

unsigned Opc) {

1308

SDLoc dl(N);

1309

EVT VT = N->getOperand(2)->getValueType(0);

1310

bool Narrow = VT.getSizeInBits() == 64;

1311

1312

// Form a REG_SEQUENCE to force register allocation.

1313

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1314

1315

if (Narrow)

1316

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1317

WidenVector(*CurDAG));

1318

1319

SDValue RegSeq = createQTuple(Regs);

1320

1321

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1322

MVT::Other};

1323

1324

unsigned LaneNo =

1325

cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

1326

1327

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),

1328

N->getOperand(NumVecs + 2), // Base Register

1329

N->getOperand(NumVecs + 3), // Incremental

1330

N->getOperand(0)};

1331

SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1332

1333

// Transfer memoperands.

1334

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

1335

MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();

1336

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

1337

1338

return St;

1339

}

1340

1341

static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,

1342

unsigned &Opc, SDValue &Opd0,

1343

unsigned &LSB, unsigned &MSB,

1344

unsigned NumberOfIgnoredLowBits,

1345

bool BiggerPattern) {

1346

assert(N->getOpcode() == ISD::AND &&((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1347, __PRETTY_FUNCTION__))

1347

"N must be a AND operation to call this function")((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1347, __PRETTY_FUNCTION__));

1348

1349

EVT VT = N->getValueType(0);

1350

1351

// Here we can test the type of VT and return false when the type does not

1352

// match, but since it is done prior to that call in the current context

1353

// we turned that into an assert to avoid redundant code.

1354

assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1355, __PRETTY_FUNCTION__))

1355

"Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1355, __PRETTY_FUNCTION__));

1356

1357

// FIXME: simplify-demanded-bits in DAGCombine will probably have

1358

// changed the AND node to a 32-bit mask operation. We'll have to

1359

// undo that as part of the transform here if we want to catch all

1360

// the opportunities.

1361

// Currently the NumberOfIgnoredLowBits argument helps to recover

1362

// form these situations when matching bigger pattern (bitfield insert).

1363

1364

// For unsigned extracts, check for a shift right and mask

1365

uint64_t And_imm = 0;

1366

if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))

1367

return false;

1368

1369

const SDNode *Op0 = N->getOperand(0).getNode();

1370

1371

// Because of simplify-demanded-bits in DAGCombine, the mask may have been

1372

// simplified. Try to undo that

1373

And_imm |= (1 << NumberOfIgnoredLowBits) - 1;

1374

1375

// The immediate is a mask of the low bits iff imm & (imm+1) == 0

1376

if (And_imm & (And_imm + 1))

1377

return false;

1378

1379

bool ClampMSB = false;

1380

uint64_t Srl_imm = 0;

1381

// Handle the SRL + ANY_EXTEND case.

1382

if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&

1383

isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {

1384

// Extend the incoming operand of the SRL to 64-bit.

1385

Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));

1386

// Make sure to clamp the MSB so that we preserve the semantics of the

1387

// original operations.

1388

ClampMSB = true;

1389

} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&

1390

isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,

1391

Srl_imm)) {

1392

// If the shift result was truncated, we can still combine them.

1393

Opd0 = Op0->getOperand(0).getOperand(0);

1394

1395

// Use the type of SRL node.

1396

VT = Opd0->getValueType(0);

1397

} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {

1398

Opd0 = Op0->getOperand(0);

1399

} else if (BiggerPattern) {

1400

// Let's pretend a 0 shift right has been performed.

1401

// The resulting code will be at least as good as the original one

1402

// plus it may expose more opportunities for bitfield insert pattern.

1403

// FIXME: Currently we limit this to the bigger pattern, because

1404

// some optimizations expect AND and not UBFM

1405

Opd0 = N->getOperand(0);

1406

} else

1407

return false;

1408

1409

// Bail out on large immediates. This happens when no proper

1410

// combining/constant folding was performed.

1411

if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {

1412

DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0)

1413

<< ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0);

1414

return false;

1415

}

1416

1417

LSB = Srl_imm;

1418

MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)

1419

: countTrailingOnes<uint64_t>(And_imm)) -

1420

1421

if (ClampMSB)

1422

// Since we're moving the extend before the right shift operation, we need

1423

// to clamp the MSB to make sure we don't shift in undefined bits instead of

1424

// the zeros which would get shifted in with the original right shift

1425

// operation.

1426

MSB = MSB > 31 ? 31 : MSB;

1427

1428

Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;

1429

return true;

1430

}

1431

1432

static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,

1433

SDValue &Opd0, unsigned &LSB,

1434

unsigned &MSB) {

1435

// We are looking for the following pattern which basically extracts several

1436

// continuous bits from the source value and places it from the LSB of the

1437

// destination value, all other bits of the destination value or set to zero:

1438

1439

// Value2 = AND Value, MaskImm

1440

// SRL Value2, ShiftImm

1441

1442

// with MaskImm >> ShiftImm to search for the bit width.

1443

1444

// This gets selected into a single UBFM:

1445

1446

// UBFM Value, ShiftImm, BitWide + Srl_imm -1

1447

1448

1449

if (N->getOpcode() != ISD::SRL)

1450

return false;

1451

1452

uint64_t And_mask = 0;

1453

if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))

1454

return false;

1455

1456

Opd0 = N->getOperand(0).getOperand(0);

1457

1458

uint64_t Srl_imm = 0;

1459

if (!isIntImmediate(N->getOperand(1), Srl_imm))

1460

return false;

1461

1462

// Check whether we really have several bits extract here.

1463

unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));

1464

if (BitWide && isMask_64(And_mask >> Srl_imm)) {

1465

if (N->getValueType(0) == MVT::i32)

1466

Opc = AArch64::UBFMWri;

1467

else

1468

Opc = AArch64::UBFMXri;

1469

1470

LSB = Srl_imm;

1471

MSB = BitWide + Srl_imm - 1;

1472

return true;

1473

}

1474

1475

return false;

1476

}

1477

1478

static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,

1479

unsigned &LSB, unsigned &MSB,

1480

bool BiggerPattern) {

1481

assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "N must be a SHR/SRA operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1482, __PRETTY_FUNCTION__))

1482

"N must be a SHR/SRA operation to call this function")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "N must be a SHR/SRA operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1482, __PRETTY_FUNCTION__));

1483

1484

EVT VT = N->getValueType(0);

1485

1486

// Here we can test the type of VT and return false when the type does not

1487

// match, but since it is done prior to that call in the current context

1488

// we turned that into an assert to avoid redundant code.

1489

1490

1491

1492

// Check for AND + SRL doing several bits extract.

1493

if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))

1494

return true;

1495

1496

// we're looking for a shift of a shift

1497

uint64_t Shl_imm = 0;

1498

uint64_t Trunc_bits = 0;

1499

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {

1500

Opd0 = N->getOperand(0).getOperand(0);

1501

} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&

1502

N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {

1503

// We are looking for a shift of truncate. Truncate from i64 to i32 could

1504

// be considered as setting high 32 bits as zero. Our strategy here is to

1505

// always generate 64bit UBFM. This consistency will help the CSE pass

1506

// later find more redundancy.

1507

Opd0 = N->getOperand(0).getOperand(0);

1508

Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();

1509

VT = Opd0->getValueType(0);

1510

assert(VT == MVT::i64 && "the promoted type should be i64")((VT == MVT::i64 && "the promoted type should be i64"
) ? static_cast<void> (0) : __assert_fail ("VT == MVT::i64 && \"the promoted type should be i64\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1510, __PRETTY_FUNCTION__));

1511

} else if (BiggerPattern) {

1512

// Let's pretend a 0 shift left has been performed.

1513

// FIXME: Currently we limit this to the bigger pattern case,

1514

// because some optimizations expect AND and not UBFM

1515

Opd0 = N->getOperand(0);

1516

} else

1517

return false;

1518

1519

// Missing combines/constant folding may have left us with strange

1520

// constants.

1521

if (Shl_imm >= VT.getSizeInBits()) {

1522

DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0)

1523

1524

return false;

1525

}

1526

1527

uint64_t Srl_imm = 0;

1528

if (!isIntImmediate(N->getOperand(1), Srl_imm))

1529

return false;

1530

1531

assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!") ? static_cast<void> (0) :
__assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1532, __PRETTY_FUNCTION__))

1532

"bad amount in shift node!")((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!") ? static_cast<void> (0) :
__assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1532, __PRETTY_FUNCTION__));

1533

// Note: The width operand is encoded as width-1.

1534

unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;

1535

int sLSB = Srl_imm - Shl_imm;

1536

if (sLSB < 0)

1537

return false;

1538

LSB = sLSB;

1539

MSB = LSB + Width;

1540

// SRA requires a signed extraction

1541

if (VT == MVT::i32)

1542

Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;

1543

else

1544

Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;

1545

return true;

1546

}

1547

1548

static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,

1549

SDValue &Opd0, unsigned &LSB, unsigned &MSB,

1550

unsigned NumberOfIgnoredLowBits = 0,

1551

bool BiggerPattern = false) {

1552

if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)

1553

return false;

1554

1555

switch (N->getOpcode()) {

1556

default:

1557

if (!N->isMachineOpcode())

1558

return false;

1559

break;

1560

case ISD::AND:

1561

return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,

1562

NumberOfIgnoredLowBits, BiggerPattern);

1563

case ISD::SRL:

1564

case ISD::SRA:

1565

return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);

1566

}

1567

1568

unsigned NOpc = N->getMachineOpcode();

1569

switch (NOpc) {

1570

default:

1571

return false;

1572

case AArch64::SBFMWri:

1573

case AArch64::UBFMWri:

1574

case AArch64::SBFMXri:

1575

case AArch64::UBFMXri:

1576

Opc = NOpc;

1577

Opd0 = N->getOperand(0);

1578

LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();

1579

MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();

1580

return true;

1581

}

1582

// Unreachable

1583

return false;

1584

}

1585

1586

SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {

1587

unsigned Opc, LSB, MSB;

1588

SDValue Opd0;

1589

if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))

1590

return nullptr;

1591

1592

EVT VT = N->getValueType(0);

1593

1594

// If the bit extract operation is 64bit but the original type is 32bit, we

1595

// need to add one EXTRACT_SUBREG.

1596

if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {

1597

SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),

1598

CurDAG->getTargetConstant(MSB, MVT::i64)};

1599

1600

SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);

1601

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);

1602

MachineSDNode *Node =

1603

CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,

1604

SDValue(BFM, 0), SubReg);

1605

return Node;

1606

}

1607

1608

SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),

1609

CurDAG->getTargetConstant(MSB, VT)};

1610

return CurDAG->SelectNodeTo(N, Opc, VT, Ops);

1611

}

1612

1613

/// Does DstMask form a complementary pair with the mask provided by

1614

/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,

1615

/// this asks whether DstMask zeroes precisely those bits that will be set by

1616

/// the other half.

1617

static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,

1618

unsigned NumberOfIgnoredHighBits, EVT VT) {

1619

assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1620, __PRETTY_FUNCTION__))

1620

"i32 or i64 mask type expected!")(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1620, __PRETTY_FUNCTION__));

1621

unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;

1622

1623

APInt SignificantDstMask = APInt(BitWidth, DstMask);

1624

APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);

1625

1626

return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&

1627

(SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();

1628

}

1629

1630

// Look for bits that will be useful for later uses.

1631

// A bit is consider useless as soon as it is dropped and never used

1632

// before it as been dropped.

1633

// E.g., looking for useful bit of x

1634

// 1. y = x & 0x7

1635

// 2. z = y >> 2

1636

// After #1, x useful bits are 0x7, then the useful bits of x, live through

1637

// y.

1638

// After #2, the useful bits of x are 0x4.

1639

// However, if x is used on an unpredicatable instruction, then all its bits

1640

// are useful.

1641

// E.g.

1642

// 1. y = x & 0x7

1643

// 2. z = y >> 2

1644

// 3. str x, [@x]

1645

static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);

1646

1647

static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,

1648

unsigned Depth) {

1649

uint64_t Imm =

1650

cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();

1651

Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());

1652

UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);

1653

getUsefulBits(Op, UsefulBits, Depth + 1);

1654

}

1655

1656

static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,

1657

uint64_t Imm, uint64_t MSB,

1658

unsigned Depth) {

1659

// inherit the bitwidth value

1660

APInt OpUsefulBits(UsefulBits);

1661

OpUsefulBits = 1;

1662

1663

if (MSB >= Imm) {

1664

OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);

1665

--OpUsefulBits;

1666

// The interesting part will be in the lower part of the result

1667

getUsefulBits(Op, OpUsefulBits, Depth + 1);

1668

// The interesting part was starting at Imm in the argument

1669

OpUsefulBits = OpUsefulBits.shl(Imm);

1670

} else {

1671

OpUsefulBits = OpUsefulBits.shl(MSB + 1);

1672

--OpUsefulBits;

1673

// The interesting part will be shifted in the result

1674

OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);

1675

getUsefulBits(Op, OpUsefulBits, Depth + 1);

1676

// The interesting part was at zero in the argument

1677

OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);

1678

}

1679

1680

UsefulBits &= OpUsefulBits;

1681

}

1682

1683

static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,

1684

unsigned Depth) {

1685

uint64_t Imm =

1686

cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();

1687

uint64_t MSB =

1688

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1689

1690

getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);

1691

}

1692

1693

static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,

1694

unsigned Depth) {

1695

uint64_t ShiftTypeAndValue =

1696

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1697

APInt Mask(UsefulBits);

1698

Mask.clearAllBits();

1699

Mask.flipAllBits();

1700

1701

if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {

1702

// Shift Left

1703

uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);

1704

Mask = Mask.shl(ShiftAmt);

1705

getUsefulBits(Op, Mask, Depth + 1);

1706

Mask = Mask.lshr(ShiftAmt);

1707

} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {

1708

// Shift Right

1709

// We do not handle AArch64_AM::ASR, because the sign will change the

1710

// number of useful bits

1711

uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);

1712

Mask = Mask.lshr(ShiftAmt);

1713

getUsefulBits(Op, Mask, Depth + 1);

1714

Mask = Mask.shl(ShiftAmt);

1715

} else

1716

return;

1717

1718

UsefulBits &= Mask;

1719

}

1720

1721

static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,

1722

unsigned Depth) {

1723

uint64_t Imm =

1724

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1725

uint64_t MSB =

1726

cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();

1727

1728

if (Op.getOperand(1) == Orig)

1729

return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);

1730

1731

APInt OpUsefulBits(UsefulBits);

1732

OpUsefulBits = 1;

1733

1734

if (MSB >= Imm) {

1735

OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);

1736

--OpUsefulBits;

1737

UsefulBits &= ~OpUsefulBits;

1738

getUsefulBits(Op, UsefulBits, Depth + 1);

1739

} else {

1740

OpUsefulBits = OpUsefulBits.shl(MSB + 1);

1741

--OpUsefulBits;

1742

UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));

1743

getUsefulBits(Op, UsefulBits, Depth + 1);

1744

}

1745

}

1746

1747

static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,

1748

SDValue Orig, unsigned Depth) {

1749

1750

// Users of this node should have already been instruction selected

1751

// FIXME: Can we turn that into an assert?

1752

if (!UserNode->isMachineOpcode())

1753

return;

1754

1755

switch (UserNode->getMachineOpcode()) {

1756

default:

1757

return;

1758

case AArch64::ANDSWri:

1759

case AArch64::ANDSXri:

1760

case AArch64::ANDWri:

1761

case AArch64::ANDXri:

1762

// We increment Depth only when we call the getUsefulBits

1763

return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,

1764

Depth);

1765

case AArch64::UBFMWri:

1766

case AArch64::UBFMXri:

1767

return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);

1768

1769

case AArch64::ORRWrs:

1770

case AArch64::ORRXrs:

1771

if (UserNode->getOperand(1) != Orig)

1772

return;

1773

return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,

1774

Depth);

1775

case AArch64::BFMWri:

1776

case AArch64::BFMXri:

1777

return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);

1778

}

1779

}

1780

1781

static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {

1782

if (Depth >= 6)

1783

return;

1784

// Initialize UsefulBits

1785

if (!Depth) {

1786

unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();

1787

// At the beginning, assume every produced bits is useful

1788

UsefulBits = APInt(Bitwidth, 0);

1789

UsefulBits.flipAllBits();

1790

}

1791

APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);

1792

1793

for (SDNode *Node : Op.getNode()->uses()) {

1794

// A use cannot produce useful bits

1795

APInt UsefulBitsForUse = APInt(UsefulBits);

1796

getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);

1797

UsersUsefulBits |= UsefulBitsForUse;

1798

}

1799

// UsefulBits contains the produced bits that are meaningful for the

1800

// current definition, thus a user cannot make a bit meaningful at

1801

// this point

1802

UsefulBits &= UsersUsefulBits;

1803

}

1804

1805

/// Create a machine node performing a notional SHL of Op by ShlAmount. If

1806

/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is

1807

/// 0, return Op unchanged.

1808

static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {

1809

if (ShlAmount == 0)

1810

return Op;

1811

1812

EVT VT = Op.getValueType();

1813

unsigned BitWidth = VT.getSizeInBits();

1814

unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;

1815

1816

SDNode *ShiftNode;

1817

if (ShlAmount > 0) {

1818

// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt

1819

ShiftNode = CurDAG->getMachineNode(

1820

UBFMOpc, SDLoc(Op), VT, Op,

1821

CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),

1822

CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));

1823

} else {

1824

// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1

1825

assert(ShlAmount < 0 && "expected right shift")((ShlAmount < 0 && "expected right shift") ? static_cast
<void> (0) : __assert_fail ("ShlAmount < 0 && \"expected right shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1825, __PRETTY_FUNCTION__));

1826

int ShrAmount = -ShlAmount;

1827

ShiftNode = CurDAG->getMachineNode(

1828

UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),

1829

CurDAG->getTargetConstant(BitWidth - 1, VT));

1830

}

1831

1832

return SDValue(ShiftNode, 0);

1833

}

1834

1835

/// Does this tree qualify as an attempt to move a bitfield into position,

1836

/// essentially "(and (shl VAL, N), Mask)".

1837

static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,

1838

SDValue &Src, int &ShiftAmount,

1839

int &MaskWidth) {

1840

EVT VT = Op.getValueType();

1841

unsigned BitWidth = VT.getSizeInBits();

1842

(void)BitWidth;

1843

assert(BitWidth == 32 || BitWidth == 64)((BitWidth == 32 || BitWidth == 64) ? static_cast<void>
(0) : __assert_fail ("BitWidth == 32 || BitWidth == 64", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1843, __PRETTY_FUNCTION__));

1844

1845

APInt KnownZero, KnownOne;

1846

CurDAG->computeKnownBits(Op, KnownZero, KnownOne);

1847

1848

// Non-zero in the sense that they're not provably zero, which is the key

1849

// point if we want to use this value

1850

uint64_t NonZeroBits = (~KnownZero).getZExtValue();

1851

1852

// Discard a constant AND mask if present. It's safe because the node will

1853

// already have been factored into the computeKnownBits calculation above.

1854

uint64_t AndImm;

1855

if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {

1856

assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0)(((~APInt(BitWidth, AndImm) & ~KnownZero) == 0) ? static_cast
<void> (0) : __assert_fail ("(~APInt(BitWidth, AndImm) & ~KnownZero) == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1856, __PRETTY_FUNCTION__));

1857

Op = Op.getOperand(0);

1858

}

1859

1860

uint64_t ShlImm;

1861

if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))

1862

return false;

1863

Op = Op.getOperand(0);

1864

1865

if (!isShiftedMask_64(NonZeroBits))

1866

return false;

1867

1868

ShiftAmount = countTrailingZeros(NonZeroBits);

1869

MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);

1870

1871

// BFI encompasses sufficiently many nodes that it's worth inserting an extra

1872

// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL

1873

// amount.

1874

Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);

1875

1876

return true;

1877

}

1878

1879

// Given a OR operation, check if we have the following pattern

1880

// ubfm c, b, imm, imm2 (or something that does the same jobs, see

1881

// isBitfieldExtractOp)

1882

// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and

1883

// countTrailingZeros(mask2) == imm2 - imm + 1

1884

// f = d | c

1885

// if yes, given reference arguments will be update so that one can replace

1886

// the OR instruction with:

1887

// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2

1888

static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,

1889

SDValue &Src, unsigned &ImmR,

1890

unsigned &ImmS, SelectionDAG *CurDAG) {

1891

assert(N->getOpcode() == ISD::OR && "Expect a OR operation")((N->getOpcode() == ISD::OR && "Expect a OR operation"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Expect a OR operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1891, __PRETTY_FUNCTION__));

1892

1893

// Set Opc

1894

EVT VT = N->getValueType(0);

1895

if (VT == MVT::i32)

1896

Opc = AArch64::BFMWri;

1897

else if (VT == MVT::i64)

1898

Opc = AArch64::BFMXri;

1899

else

1900

return false;

1901

1902

// Because of simplify-demanded-bits in DAGCombine, involved masks may not

1903

// have the expected shape. Try to undo that.

1904

APInt UsefulBits;

1905

getUsefulBits(SDValue(N, 0), UsefulBits);

1906

1907

unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();

1908

unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();

1909

1910

// OR is commutative, check both possibilities (does llvm provide a

1911

// way to do that directely, e.g., via code matcher?)

1912

SDValue OrOpd1Val = N->getOperand(1);

1913

SDNode *OrOpd0 = N->getOperand(0).getNode();

1914

SDNode *OrOpd1 = N->getOperand(1).getNode();

1915

for (int i = 0; i < 2;

1916

++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {

1917

unsigned BFXOpc;

1918

int DstLSB, Width;

1919

if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,

1920

NumberOfIgnoredLowBits, true)) {

1921

// Check that the returned opcode is compatible with the pattern,

1922

// i.e., same type and zero extended (U and not S)

1923

if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||

1924

(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))

1925

continue;

1926

1927

// Compute the width of the bitfield insertion

1928

DstLSB = 0;

1929

Width = ImmS - ImmR + 1;

1930

// FIXME: This constraint is to catch bitfield insertion we may

1931

// want to widen the pattern if we want to grab general bitfied

1932

// move case

1933

if (Width <= 0)

1934

continue;

1935

1936

// If the mask on the insertee is correct, we have a BFXIL operation. We

1937

// can share the ImmR and ImmS values from the already-computed UBFM.

1938

} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,

1939

DstLSB, Width)) {

1940

ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();

1941

ImmS = Width - 1;

1942

} else

1943

continue;

1944

1945

// Check the second part of the pattern

1946

EVT VT = OrOpd1->getValueType(0);

1947

assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected OR operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1947, __PRETTY_FUNCTION__));

1948

1949

// Compute the Known Zero for the candidate of the first operand.

1950

// This allows to catch more general case than just looking for

1951

// AND with imm. Indeed, simplify-demanded-bits may have removed

1952

// the AND instruction because it proves it was useless.

1953

APInt KnownZero, KnownOne;

1954

CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);

1955

1956

// Check if there is enough room for the second operand to appear

1957

// in the first one

1958

APInt BitsToBeInserted =

1959

APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);

1960

1961

if ((BitsToBeInserted & ~KnownZero) != 0)

1962

continue;

1963

1964

// Set the first operand

1965

uint64_t Imm;

1966

if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&

1967

isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))

1968

// In that case, we can eliminate the AND

1969

Dst = OrOpd1->getOperand(0);

1970

else

1971

// Maybe the AND has been removed by simplify-demanded-bits

1972

// or is useful because it discards more bits

1973

Dst = OrOpd1Val;

1974

1975

// both parts match

1976

return true;

1977

}

1978

1979

return false;

1980

}

1981

1982

SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {

1983

if (N->getOpcode() != ISD::OR)

1984

return nullptr;

1985

1986

unsigned Opc;

1987

unsigned LSB, MSB;

1988

SDValue Opd0, Opd1;

1989

1990

if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))

1991

return nullptr;

1992

1993

EVT VT = N->getValueType(0);

1994

SDValue Ops[] = { Opd0,

1995

Opd1,

1996

CurDAG->getTargetConstant(LSB, VT),

1997

CurDAG->getTargetConstant(MSB, VT) };

1998

return CurDAG->SelectNodeTo(N, Opc, VT, Ops);

1999

}

2000

2001

SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {

2002

EVT VT = N->getValueType(0);

2003

unsigned Variant;

2004

unsigned Opc;

2005

unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };

2006

2007

if (VT == MVT::f32) {

2008

Variant = 0;

2009

} else if (VT == MVT::f64) {

2010

Variant = 1;

2011

} else

2012

return nullptr; // Unrecognized argument type. Fall back on default codegen.

2013

2014

// Pick the FRINTX variant needed to set the flags.

2015

unsigned FRINTXOpc = FRINTXOpcs[Variant];

2016

2017

switch (N->getOpcode()) {

2018

default:

2019

return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.

2020

case ISD::FCEIL: {

2021

unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };

2022

Opc = FRINTPOpcs[Variant];

2023

break;

2024

}

2025

case ISD::FFLOOR: {

2026

unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };

2027

Opc = FRINTMOpcs[Variant];

2028

break;

2029

}

2030

case ISD::FTRUNC: {

2031

unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };

2032

Opc = FRINTZOpcs[Variant];

2033

break;

2034

}

2035

case ISD::FROUND: {

2036

unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };

2037

Opc = FRINTAOpcs[Variant];

2038

break;

2039

}

2040

}

2041

2042

SDLoc dl(N);

2043

SDValue In = N->getOperand(0);

2044

SmallVector<SDValue, 2> Ops;

2045

Ops.push_back(In);

2046

2047

if (!TM.Options.UnsafeFPMath) {

2048

SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);

2049

Ops.push_back(SDValue(FRINTX, 1));

2050

}

2051

2052

return CurDAG->getMachineNode(Opc, dl, VT, Ops);

2053

}

2054

2055

bool

2056

AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,

2057

unsigned RegWidth) {

2058

APFloat FVal(0.0);

2059

if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))

2060

FVal = CN->getValueAPF();

2061

else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {

2062

// Some otherwise illegal constants are allowed in this case.

2063

if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||

2064

!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))

2065

return false;

2066

2067

ConstantPoolSDNode *CN =

2068

dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));

2069

FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();

2070

} else

2071

return false;

2072

2073

// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits

2074

// is between 1 and 32 for a destination w-register, or 1 and 64 for an

2075

// x-register.

2076

2077

// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we

2078

// want THIS_NODE to be 2^fbits. This is much easier to deal with using

2079

// integers.

2080

bool IsExact;

2081

2082

// fbits is between 1 and 64 in the worst-case, which means the fmul

2083

// could have 2^64 as an actual operand. Need 65 bits of precision.

2084

APSInt IntVal(65, true);

2085

FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);

2086

2087

// N.b. isPowerOf2 also checks for > 0.

2088

if (!IsExact || !IntVal.isPowerOf2()) return false;

2089

unsigned FBits = IntVal.logBase2();

2090

2091

// Checks above should have guaranteed that we haven't lost information in

2092

// finding FBits, but it must still be in range.

2093

if (FBits == 0 || FBits > RegWidth) return false;

2094

2095

FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);

2096

return true;

2097

}

2098

2099

SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {

2100

// Dump information about the Node being selected

2101

DEBUG(errs() << "Selecting: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "Selecting: "; } } while (
0);

2102

DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Node->dump(CurDAG); } } while (0);

2103

DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "\n"; } } while (0);

2104

2105

// If we have a custom node, we already have selected!

2106

if (Node->isMachineOpcode()) {

2107

DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "== "; Node->dump(CurDAG
); errs() << "\n"; } } while (0);

2108

Node->setNodeId(-1);

2109

return nullptr;

2110

}

2111

2112

// Few custom selection stuff.

2113

SDNode *ResNode = nullptr;

2114

EVT VT = Node->getValueType(0);

2115

2116

switch (Node->getOpcode()) {

2117

default:

2118

break;

2119

2120

case ISD::ADD:

2121

if (SDNode *I = SelectMLAV64LaneV128(Node))

2122

return I;

2123

break;

2124

2125

case ISD::LOAD: {

2126

// Try to select as an indexed load. Fall through to normal processing

2127

// if we can't.

2128

bool Done = false;

2129

SDNode *I = SelectIndexedLoad(Node, Done);

2130

if (Done)

2131

return I;

2132

break;

2133

}

2134

2135

case ISD::SRL:

2136

case ISD::AND:

2137

case ISD::SRA:

2138

if (SDNode *I = SelectBitfieldExtractOp(Node))

2139

return I;

2140

break;

2141

2142

case ISD::OR:

2143

if (SDNode *I = SelectBitfieldInsertOp(Node))

2144

return I;

2145

break;

2146

2147

case ISD::EXTRACT_VECTOR_ELT: {

2148

// Extracting lane zero is a special case where we can just use a plain

2149

// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for

2150

// the rest of the compiler, especially the register allocator and copyi

2151

// propagation, to reason about, so is preferred when it's possible to

2152

// use it.

2153

ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));

2154

// Bail and use the default Select() for non-zero lanes.

2155

if (LaneNode->getZExtValue() != 0)

2156

break;

2157

// If the element type is not the same as the result type, likewise

2158

// bail and use the default Select(), as there's more to do than just

2159

// a cross-class COPY. This catches extracts of i8 and i16 elements

2160

// since they will need an explicit zext.

2161

if (VT != Node->getOperand(0).getValueType().getVectorElementType())

2162

break;

2163

unsigned SubReg;

2164

switch (Node->getOperand(0)

2165

.getValueType()

2166

.getVectorElementType()

2167

.getSizeInBits()) {

2168

default:

2169

llvm_unreachable("Unexpected vector element type!")::llvm::llvm_unreachable_internal("Unexpected vector element type!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2169);

2170

case 64:

2171

SubReg = AArch64::dsub;

2172

break;

2173

case 32:

2174

SubReg = AArch64::ssub;

2175

break;

2176

case 16:

2177

SubReg = AArch64::hsub;

2178

break;

2179

case 8:

2180

llvm_unreachable("unexpected zext-requiring extract element!")::llvm::llvm_unreachable_internal("unexpected zext-requiring extract element!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn235822/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2180);

2181

}

2182

SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,

2183

Node->getOperand(0));

2184

DEBUG(dbgs() << "ISEL: Custom selection!\n=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "ISEL: Custom selection!\n=> "
; } } while (0);

2185

DEBUG(Extract->dumpr(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Extract->dumpr(CurDAG); } } while (0);

2186

DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "\n"; } } while (0);

2187

return Extract.getNode();

2188

}

2189

case ISD::Constant: {

2190

// Materialize zero constants as copies from WZR/XZR. This allows

2191

// the coalescer to propagate these into other instructions.

2192

ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);

2193

if (ConstNode->isNullValue()) {

2194

if (VT == MVT::i32)

2195

return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),

2196

AArch64::WZR, MVT::i32).getNode();

2197

else if (VT == MVT::i64)

2198

return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),

2199

AArch64::XZR, MVT::i64).getNode();

2200

}

2201

break;

2202

}

2203

2204

case ISD::FrameIndex: {

2205

// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.

2206

int FI = cast<FrameIndexSDNode>(Node)->getIndex();

2207

unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);

2208

const TargetLowering *TLI = getTargetLowering();

2209

SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

2210

SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),

2211

CurDAG->getTargetConstant(Shifter, MVT::i32) };

2212

return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);

2213

}

2214

case ISD::INTRINSIC_W_CHAIN: {

2215

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

2216

switch (IntNo) {

2217

default:

2218

break;

2219

case Intrinsic::aarch64_ldaxp:

2220

case Intrinsic::aarch64_ldxp: {

2221

unsigned Op =

2222

IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;

2223

SDValue MemAddr = Node->getOperand(2);

2224

SDLoc DL(Node);

2225

SDValue Chain = Node->getOperand(0);

2226

2227

SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,

2228

MVT::Other, MemAddr, Chain);

2229

2230

// Transfer memoperands.

2231

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

2232

MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();

2233

cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);

2234

return Ld;

2235

}

2236

case Intrinsic::aarch64_stlxp:

2237

case Intrinsic::aarch64_stxp: {

2238

unsigned Op =

2239

IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;

2240

SDLoc DL(Node);

2241

SDValue Chain = Node->getOperand(0);

2242

SDValue ValLo = Node->getOperand(2);

2243

SDValue ValHi = Node->getOperand(3);

2244

SDValue MemAddr = Node->getOperand(4);

2245

2246

// Place arguments in the right order.

2247

SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};

2248

2249

SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);

2250

// Transfer memoperands.

2251

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

2252

MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();

2253

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

2254

2255

return St;

2256

}

2257

case Intrinsic::aarch64_neon_ld1x2:

2258

if (VT == MVT::v8i8)

2259

return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);

2260

else if (VT == MVT::v16i8)

2261

return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);

2262

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2263

return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);

2264

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2265

return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);

2266

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2267

return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);

2268

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2269

return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);

2270

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2271

return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);

2272

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2273

return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);

2274

break;

2275

case Intrinsic::aarch64_neon_ld1x3:

2276

if (VT == MVT::v8i8)

2277

return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);

2278

else if (VT == MVT::v16i8)

2279

return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);

2280

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2281

return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);

2282

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2283

return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);

2284

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2285

return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);

2286

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2287

return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);

2288

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2289

return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);

2290

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2291

return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);

2292

break;

2293

case Intrinsic::aarch64_neon_ld1x4:

2294

if (VT == MVT::v8i8)

2295

return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);

2296

else if (VT == MVT::v16i8)

2297

return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);

2298

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2299

return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);

2300

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2301

return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);

2302

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2303

return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);

2304

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2305

return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);

2306

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2307

return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);

2308

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2309

return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);

2310

break;

2311

case Intrinsic::aarch64_neon_ld2:

2312

if (VT == MVT::v8i8)

2313

return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);

2314

else if (VT == MVT::v16i8)

2315

return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);

2316

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2317

return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);

2318

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2319

return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);

2320

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2321

return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);

2322

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2323

return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);

2324

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2325

return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);

2326

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2327

return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);

2328

break;

2329

case Intrinsic::aarch64_neon_ld3:

2330

if (VT == MVT::v8i8)

2331

return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);

2332

else if (VT == MVT::v16i8)

2333

return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);

2334

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2335

return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);

2336

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2337

return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);

2338

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2339

return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);

2340

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2341

return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);

2342

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2343

return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);

2344

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2345

return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);

2346

break;

2347

case Intrinsic::aarch64_neon_ld4:

2348

if (VT == MVT::v8i8)

2349

return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);

2350

else if (VT == MVT::v16i8)

2351

return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);

2352

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2353

return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);

2354

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2355

return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);

2356

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2357

return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);

2358

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2359

return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);

2360

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2361

return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);

2362

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2363

return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);

2364

break;

2365

case Intrinsic::aarch64_neon_ld2r:

2366

if (VT == MVT::v8i8)

2367

return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);

2368

else if (VT == MVT::v16i8)

2369

return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);

2370

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2371

return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);

2372

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2373

return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);

2374

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2375

return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);

2376

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2377

return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);

2378

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2379

return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);

2380

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2381

return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);

2382

break;

2383

case Intrinsic::aarch64_neon_ld3r:

2384

if (VT == MVT::v8i8)

2385

return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);

2386

else if (VT == MVT::v16i8)

2387

return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);

2388

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2389

return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);

2390

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2391

return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);

2392

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2393

return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);

2394

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2395

return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);

2396

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2397

return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);

2398

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2399

return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);

2400

break;

2401

case Intrinsic::aarch64_neon_ld4r:

2402

if (VT == MVT::v8i8)

2403

return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);

2404

else if (VT == MVT::v16i8)

2405

return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);

2406

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2407

return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);

2408

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2409

return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);

2410

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2411

return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);

2412

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2413

return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);

2414

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2415

return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);

2416

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2417

return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);

2418

break;

2419

case Intrinsic::aarch64_neon_ld2lane:

2420

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2421

return SelectLoadLane(Node, 2, AArch64::LD2i8);

2422

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2423

VT == MVT::v8f16)

2424

return SelectLoadLane(Node, 2, AArch64::LD2i16);

2425

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2426

VT == MVT::v2f32)

2427

return SelectLoadLane(Node, 2, AArch64::LD2i32);

2428

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2429

VT == MVT::v1f64)

2430

return SelectLoadLane(Node, 2, AArch64::LD2i64);

2431

break;

2432

case Intrinsic::aarch64_neon_ld3lane:

2433

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2434

return SelectLoadLane(Node, 3, AArch64::LD3i8);

2435

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2436

VT == MVT::v8f16)

2437

return SelectLoadLane(Node, 3, AArch64::LD3i16);

2438

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2439

VT == MVT::v2f32)

2440

return SelectLoadLane(Node, 3, AArch64::LD3i32);

2441

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2442

VT == MVT::v1f64)

2443

return SelectLoadLane(Node, 3, AArch64::LD3i64);

2444

break;

2445

case Intrinsic::aarch64_neon_ld4lane:

2446

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2447

return SelectLoadLane(Node, 4, AArch64::LD4i8);

2448

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2449

VT == MVT::v8f16)

2450

return SelectLoadLane(Node, 4, AArch64::LD4i16);

2451

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2452

VT == MVT::v2f32)

2453

return SelectLoadLane(Node, 4, AArch64::LD4i32);

2454

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2455

VT == MVT::v1f64)

2456

return SelectLoadLane(Node, 4, AArch64::LD4i64);

2457

break;

2458

}

2459

} break;

2460

case ISD::INTRINSIC_WO_CHAIN: {

2461

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();

2462

switch (IntNo) {

2463

default:

2464

break;

2465

case Intrinsic::aarch64_neon_tbl2:

2466

return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two

2467

: AArch64::TBLv16i8Two,

2468

false);

2469

case Intrinsic::aarch64_neon_tbl3:

2470

return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three

2471

: AArch64::TBLv16i8Three,

2472

false);

2473

case Intrinsic::aarch64_neon_tbl4:

2474

return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four

2475

: AArch64::TBLv16i8Four,

2476

false);

2477

case Intrinsic::aarch64_neon_tbx2:

2478

return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two

2479

: AArch64::TBXv16i8Two,

2480

true);

2481

case Intrinsic::aarch64_neon_tbx3:

2482

return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three

2483

: AArch64::TBXv16i8Three,

2484

true);

2485

case Intrinsic::aarch64_neon_tbx4:

2486

return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four

2487

: AArch64::TBXv16i8Four,

2488

true);

2489

case Intrinsic::aarch64_neon_smull:

2490

case Intrinsic::aarch64_neon_umull:

2491

if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))

2492

return N;

2493

break;

2494

}

2495

break;

2496

}

2497

case ISD::INTRINSIC_VOID: {

2498

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

2499

if (Node->getNumOperands() >= 3)

2500

VT = Node->getOperand(2)->getValueType(0);

2501

switch (IntNo) {

2502

default:

2503

break;

2504

case Intrinsic::aarch64_neon_st1x2: {

2505

if (VT == MVT::v8i8)

2506

return SelectStore(Node, 2, AArch64::ST1Twov8b);

2507

else if (VT == MVT::v16i8)

2508

return SelectStore(Node, 2, AArch64::ST1Twov16b);

2509

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2510

return SelectStore(Node, 2, AArch64::ST1Twov4h);

2511

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2512

return SelectStore(Node, 2, AArch64::ST1Twov8h);

2513

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2514

return SelectStore(Node, 2, AArch64::ST1Twov2s);

2515

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2516

return SelectStore(Node, 2, AArch64::ST1Twov4s);

2517

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2518

return SelectStore(Node, 2, AArch64::ST1Twov2d);

2519

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2520

return SelectStore(Node, 2, AArch64::ST1Twov1d);

2521

break;

2522

}

2523

case Intrinsic::aarch64_neon_st1x3: {

2524

if (VT == MVT::v8i8)

2525

return SelectStore(Node, 3, AArch64::ST1Threev8b);

2526

else if (VT == MVT::v16i8)

2527

return SelectStore(Node, 3, AArch64::ST1Threev16b);

2528

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2529

return SelectStore(Node, 3, AArch64::ST1Threev4h);

2530

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2531

return SelectStore(Node, 3, AArch64::ST1Threev8h);

2532

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2533

return SelectStore(Node, 3, AArch64::ST1Threev2s);

2534

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2535

return SelectStore(Node, 3, AArch64::ST1Threev4s);

2536

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2537

return SelectStore(Node, 3, AArch64::ST1Threev2d);

2538

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2539

return SelectStore(Node, 3, AArch64::ST1Threev1d);

2540

break;

2541

}

2542

case Intrinsic::aarch64_neon_st1x4: {

2543

if (VT == MVT::v8i8)

2544

return SelectStore(Node, 4, AArch64::ST1Fourv8b);

2545

else if (VT == MVT::v16i8)

2546

return SelectStore(Node, 4, AArch64::ST1Fourv16b);

2547

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2548

return SelectStore(Node, 4, AArch64::ST1Fourv4h);

2549

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2550

return SelectStore(Node, 4, AArch64::ST1Fourv8h);

2551

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2552

return SelectStore(Node, 4, AArch64::ST1Fourv2s);

2553

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2554

return SelectStore(Node, 4, AArch64::ST1Fourv4s);

2555

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2556

return SelectStore(Node, 4, AArch64::ST1Fourv2d);

2557

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2558

return SelectStore(Node, 4, AArch64::ST1Fourv1d);

2559

break;

2560

}

2561

case Intrinsic::aarch64_neon_st2: {

2562

if (VT == MVT::v8i8)

2563

return SelectStore(Node, 2, AArch64::ST2Twov8b);

2564

else if (VT == MVT::v16i8)

2565

return SelectStore(Node, 2, AArch64::ST2Twov16b);

2566

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2567

return SelectStore(Node, 2, AArch64::ST2Twov4h);

2568

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2569

return SelectStore(Node, 2, AArch64::ST2Twov8h);

2570

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2571

return SelectStore(Node, 2, AArch64::ST2Twov2s);

2572

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2573

return SelectStore(Node, 2, AArch64::ST2Twov4s);

2574

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2575

return SelectStore(Node, 2, AArch64::ST2Twov2d);

2576

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2577

return SelectStore(Node, 2, AArch64::ST1Twov1d);

2578

break;

2579

}

2580

case Intrinsic::aarch64_neon_st3: {

2581

if (VT == MVT::v8i8)

2582

return SelectStore(Node, 3, AArch64::ST3Threev8b);

2583

else if (VT == MVT::v16i8)

2584

return SelectStore(Node, 3, AArch64::ST3Threev16b);

2585

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2586

return SelectStore(Node, 3, AArch64::ST3Threev4h);

2587

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2588

return SelectStore(Node, 3, AArch64::ST3Threev8h);

2589

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2590

return SelectStore(Node, 3, AArch64::ST3Threev2s);

2591

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2592

return SelectStore(Node, 3, AArch64::ST3Threev4s);

2593

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2594

return SelectStore(Node, 3, AArch64::ST3Threev2d);

2595

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2596

return SelectStore(Node, 3, AArch64::ST1Threev1d);

2597

break;

2598

}

2599

case Intrinsic::aarch64_neon_st4: {

2600

if (VT == MVT::v8i8)

2601

return SelectStore(Node, 4, AArch64::ST4Fourv8b);

2602

else if (VT == MVT::v16i8)

2603

return SelectStore(Node, 4, AArch64::ST4Fourv16b);

2604

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2605

return SelectStore(Node, 4, AArch64::ST4Fourv4h);

2606

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2607

return SelectStore(Node, 4, AArch64::ST4Fourv8h);

2608

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2609

return SelectStore(Node, 4, AArch64::ST4Fourv2s);

2610

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2611

return SelectStore(Node, 4, AArch64::ST4Fourv4s);

2612

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2613

return SelectStore(Node, 4, AArch64::ST4Fourv2d);

2614

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2615

return SelectStore(Node, 4, AArch64::ST1Fourv1d);

2616

break;

2617

}

2618

case Intrinsic::aarch64_neon_st2lane: {

2619

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2620

return SelectStoreLane(Node, 2, AArch64::ST2i8);

2621

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2622

VT == MVT::v8f16)

2623

return SelectStoreLane(Node, 2, AArch64::ST2i16);

2624

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2625

VT == MVT::v2f32)

2626

return SelectStoreLane(Node, 2, AArch64::ST2i32);

2627

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2628

VT == MVT::v1f64)

2629

return SelectStoreLane(Node, 2, AArch64::ST2i64);

2630

break;

2631

}

2632

case Intrinsic::aarch64_neon_st3lane: {

2633

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2634

return SelectStoreLane(Node, 3, AArch64::ST3i8);

2635

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2636

VT == MVT::v8f16)

2637

return SelectStoreLane(Node, 3, AArch64::ST3i16);

2638

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2639

VT == MVT::v2f32)

2640

return SelectStoreLane(Node, 3, AArch64::ST3i32);

2641

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2642

VT == MVT::v1f64)

2643

return SelectStoreLane(Node, 3, AArch64::ST3i64);

2644

break;

2645

}

2646

case Intrinsic::aarch64_neon_st4lane: {

2647

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2648

return SelectStoreLane(Node, 4, AArch64::ST4i8);

2649

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2650

VT == MVT::v8f16)

2651

return SelectStoreLane(Node, 4, AArch64::ST4i16);

2652

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2653

VT == MVT::v2f32)

2654

return SelectStoreLane(Node, 4, AArch64::ST4i32);

2655

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2656

VT == MVT::v1f64)

2657

return SelectStoreLane(Node, 4, AArch64::ST4i64);

2658

break;

2659

}

2660

}

2661

}

2662

case AArch64ISD::LD2post: {

2663

if (VT == MVT::v8i8)

2664

return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);

2665

else if (VT == MVT::v16i8)

2666

return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);

2667

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2668

return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);

2669

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2670

return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);

2671

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2672

return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);

2673

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2674

return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);

2675

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2676

return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);

2677

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2678

return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);

2679

break;

2680

}

2681

case AArch64ISD::LD3post: {

2682

if (VT == MVT::v8i8)

2683

return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);

2684

else if (VT == MVT::v16i8)

2685

return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);

2686

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2687

return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);

2688

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2689

return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);

2690

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2691

return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);

2692

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2693

return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);

2694

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2695

return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);

2696

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2697

return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);

2698

break;

2699

}

2700

case AArch64ISD::LD4post: {

2701

if (VT == MVT::v8i8)

2702

return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);

2703

else if (VT == MVT::v16i8)

2704

return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);

2705

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2706

return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);

2707

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2708

return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);

2709

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2710

return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);

2711

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2712

return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);

2713

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2714

return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);

2715

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2716

return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);

2717

break;

2718

}

2719

case AArch64ISD::LD1x2post: {

2720

if (VT == MVT::v8i8)

2721

return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);

2722

else if (VT == MVT::v16i8)

2723

return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);

2724

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2725

return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);

2726

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2727

return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);

2728

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2729

return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);

2730

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2731

return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);

2732

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2733

return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);

2734

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2735

return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);

2736

break;

2737

}

2738

case AArch64ISD::LD1x3post: {

2739

if (VT == MVT::v8i8)

2740

return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);

2741

else if (VT == MVT::v16i8)

2742

return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);

2743

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2744

return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);

2745

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2746

return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);

2747

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2748

return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);

2749

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2750

return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);

2751

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2752

return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);

2753

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2754

return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);

2755

break;

2756

}

2757

case AArch64ISD::LD1x4post: {

2758

if (VT == MVT::v8i8)

2759

return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);

2760

else if (VT == MVT::v16i8)

2761

return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);

2762

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2763

return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);

2764

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2765

return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);

2766

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2767

return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);

2768

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2769

return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);

2770

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2771

return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);

2772

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2773

return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);

2774

break;

2775

}

2776

case AArch64ISD::LD1DUPpost: {

2777

if (VT == MVT::v8i8)

2778

return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);

2779

else if (VT == MVT::v16i8)

2780

return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);

2781

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2782

return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);

2783

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2784

return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);

2785

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2786

return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);

2787

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2788

return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);

2789

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2790

return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);

2791

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2792

return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);

2793

break;

2794

}

2795

case AArch64ISD::LD2DUPpost: {

2796

if (VT == MVT::v8i8)

2797

return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);

2798

else if (VT == MVT::v16i8)

2799

return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);

2800

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2801

return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);

2802

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2803

return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);

2804

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2805

return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);

2806

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2807

return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);

2808

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2809

return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);

2810

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2811

return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);

2812

break;

2813

}

2814

case AArch64ISD::LD3DUPpost: {

2815

if (VT == MVT::v8i8)

2816

return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);

2817

else if (VT == MVT::v16i8)

2818

return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);

2819

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2820

return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);

2821

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2822

return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);

2823

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2824

return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);

2825

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2826

return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);

2827

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2828

return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);

2829

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2830

return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);

2831

break;

2832

}

2833

case AArch64ISD::LD4DUPpost: {

2834

if (VT == MVT::v8i8)

2835

return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);

2836

else if (VT == MVT::v16i8)

2837

return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);

2838

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2839

return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);

2840

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2841

return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);

2842

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2843

return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);

2844

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2845

return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);

2846

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2847

return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);

2848

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2849

return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);

2850

break;

2851

}

2852

case AArch64ISD::LD1LANEpost: {

2853

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2854

return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);

2855

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2856

VT == MVT::v8f16)

2857

return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);

2858

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2859

VT == MVT::v2f32)

2860

return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);

2861

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2862

VT == MVT::v1f64)

2863

return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);

2864

break;

2865

}

2866

case AArch64ISD::LD2LANEpost: {

2867

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2868

return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);

2869

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2870

VT == MVT::v8f16)

2871

return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);

2872

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2873

VT == MVT::v2f32)

2874

return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);

2875

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2876

VT == MVT::v1f64)

2877

return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);

2878

break;

2879

}

2880

case AArch64ISD::LD3LANEpost: {

2881

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2882

return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);

2883

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2884

VT == MVT::v8f16)

2885

return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);

2886

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2887

VT == MVT::v2f32)

2888

return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);

2889

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2890

VT == MVT::v1f64)

2891

return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);

2892

break;

2893

}

2894

case AArch64ISD::LD4LANEpost: {

2895

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2896

return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);

2897

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2898

VT == MVT::v8f16)

2899

return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);

2900

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2901

VT == MVT::v2f32)

2902

return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);

2903

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2904

VT == MVT::v1f64)

2905

return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);

2906

break;

2907

}

2908

case AArch64ISD::ST2post: {

2909

VT = Node->getOperand(1).getValueType();

2910

if (VT == MVT::v8i8)

2911

return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);

2912

else if (VT == MVT::v16i8)

2913

return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);

2914

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2915

return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);

2916

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2917

return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);

2918

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2919

return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);

2920

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2921

return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);

2922

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2923

return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);

2924

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2925

return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);

2926

break;

2927

}

2928

case AArch64ISD::ST3post: {

2929

VT = Node->getOperand(1).getValueType();

2930

if (VT == MVT::v8i8)

2931

return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);

2932

else if (VT == MVT::v16i8)

2933

return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);

2934

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2935

return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);

2936

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2937

return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);

2938

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2939

return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);

2940

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2941

return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);

2942

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2943

return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);

2944

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2945

return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);

2946

break;

2947

}

2948

case AArch64ISD::ST4post: {

2949

VT = Node->getOperand(1).getValueType();

2950

if (VT == MVT::v8i8)

2951

return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);

2952

else if (VT == MVT::v16i8)

2953

return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);

2954

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2955

return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);

2956

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2957

return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);

2958

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2959

return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);

2960

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2961

return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);

2962

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2963

return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);

2964

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2965

return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);

2966

break;

2967

}

2968

case AArch64ISD::ST1x2post: {

2969

VT = Node->getOperand(1).getValueType();

2970

if (VT == MVT::v8i8)

2971

return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);

2972

else if (VT == MVT::v16i8)

2973

return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);

2974

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2975

return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);

2976

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2977

return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);

2978

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2979

return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);

2980

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2981

return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);

2982

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2983

return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);

2984

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2985

return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);

2986

break;

2987

}

2988

case AArch64ISD::ST1x3post: {

2989

VT = Node->getOperand(1).getValueType();

2990

if (VT == MVT::v8i8)

2991

return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);

2992

else if (VT == MVT::v16i8)

2993

return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);

2994

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2995

return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);

2996

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2997

return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);

2998

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2999

return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);

3000

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3001

return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);

3002

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3003

return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);

3004

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3005

return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);

3006

break;

3007

}

3008

case AArch64ISD::ST1x4post: {

3009

VT = Node->getOperand(1).getValueType();

3010

if (VT == MVT::v8i8)

3011

return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);

3012

else if (VT == MVT::v16i8)

3013

return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);

3014

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3015

return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);

3016

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3017

return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);

3018

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3019

return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);

3020

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3021

return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);

3022

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3023

return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);

3024

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3025

return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);

3026

break;

3027

}

3028

case AArch64ISD::ST2LANEpost: {

3029

VT = Node->getOperand(1).getValueType();

3030

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3031

return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);

3032

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3033

VT == MVT::v8f16)

3034

return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);

3035

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3036

VT == MVT::v2f32)

3037

return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);

3038

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3039

VT == MVT::v1f64)

3040

return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);

3041

break;

3042

}

3043

case AArch64ISD::ST3LANEpost: {

3044

VT = Node->getOperand(1).getValueType();

3045

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3046

return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);

3047

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3048

VT == MVT::v8f16)

3049

return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);

3050

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3051

VT == MVT::v2f32)

3052

return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);

3053

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3054

VT == MVT::v1f64)

3055

return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);

3056

break;

3057

}

3058

case AArch64ISD::ST4LANEpost: {

3059

VT = Node->getOperand(1).getValueType();

3060

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3061

return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);

3062

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3063

VT == MVT::v8f16)

3064

return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);

3065

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3066

VT == MVT::v2f32)

3067

return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);

3068

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3069

VT == MVT::v1f64)

3070

return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);

3071

break;

3072

}

3073

3074

case ISD::FCEIL:

3075

case ISD::FFLOOR:

3076

case ISD::FTRUNC:

3077

case ISD::FROUND:

3078

if (SDNode *I = SelectLIBM(Node))

3079

return I;

3080

break;

3081

}

3082

3083

// Select the default instruction

3084

ResNode = SelectCode(Node);

3085

3086

DEBUG(errs() << "=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "=> "; } } while (0);

3087

if (ResNode == nullptr || ResNode == Node)

3088

DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Node->dump(CurDAG); } } while (0);

3089

else

3090

DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { ResNode->dump(CurDAG); } } while (0);

3091

DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "\n"; } } while (0);

3092

3093

return ResNode;

3094

}

3095

3096

/// createAArch64ISelDag - This pass converts a legalized DAG into a

3097

/// AArch64-specific DAG, ready for instruction scheduling.

3098

FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,

3099

CodeGenOpt::Level OptLevel) {

3100

return new AArch64DAGToDAGISel(TM, OptLevel);

3101

}