/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Bug Summary

File:	lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Location:	line 702, column 67
Description:	The result of the '<<' expression is undefined

Annotated Source Code

//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file defines an instruction selector for the AArch64 target.

//===----------------------------------------------------------------------===//

#include "AArch64TargetMachine.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "llvm/ADT/APSInt.h"

#include "llvm/CodeGen/SelectionDAGISel.h"

#include "llvm/IR/Function.h" // To access function attributes.

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

using namespace llvm;

#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"

//===--------------------------------------------------------------------===//

/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine

/// instructions for SelectionDAG operations.

///

namespace {

class AArch64DAGToDAGISel : public SelectionDAGISel {

/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can

/// make the right decision when generating code for different targets.

const AArch64Subtarget *Subtarget;

bool ForCodeSize;

public:

explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,

CodeGenOpt::Level OptLevel)

: SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),

ForCodeSize(false) {}

const char *getPassName() const override {

return "AArch64 Instruction Selection";

}

bool runOnMachineFunction(MachineFunction &MF) override {

ForCodeSize = MF.getFunction()->optForSize();

Subtarget = &MF.getSubtarget<AArch64Subtarget>();

return SelectionDAGISel::runOnMachineFunction(MF);

}

SDNode *Select(SDNode *Node) override;

/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for

/// inline asm expressions.

bool SelectInlineAsmMemoryOperand(const SDValue &Op,

unsigned ConstraintID,

std::vector<SDValue> &OutOps) override;

SDNode *SelectMLAV64LaneV128(SDNode *N);

SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);

bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);

bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);

bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);

bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {

return SelectShiftedRegister(N, false, Reg, Shift);

}

bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {

return SelectShiftedRegister(N, true, Reg, Shift);

}

bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed7S(N, 1, Base, OffImm);

}

bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed7S(N, 2, Base, OffImm);

}

bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed7S(N, 4, Base, OffImm);

}

bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed7S(N, 8, Base, OffImm);

}

bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed7S(N, 16, Base, OffImm);

}

bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 1, Base, OffImm);

}

bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {

return SelectAddrModeIndexed(N, 2, Base, OffImm);

}

100

bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {

101

return SelectAddrModeIndexed(N, 4, Base, OffImm);

102

}

103

bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {

104

return SelectAddrModeIndexed(N, 8, Base, OffImm);

105

}

106

bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {

107

return SelectAddrModeIndexed(N, 16, Base, OffImm);

108

}

109

bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {

110

return SelectAddrModeUnscaled(N, 1, Base, OffImm);

111

}

112

bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {

113

return SelectAddrModeUnscaled(N, 2, Base, OffImm);

114

}

115

bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {

116

return SelectAddrModeUnscaled(N, 4, Base, OffImm);

117

}

118

bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {

119

return SelectAddrModeUnscaled(N, 8, Base, OffImm);

120

}

121

bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {

122

return SelectAddrModeUnscaled(N, 16, Base, OffImm);

123

}

124

125

template<int Width>

126

bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,

127

SDValue &SignExtend, SDValue &DoShift) {

128

return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);

129

}

130

131

template<int Width>

132

bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,

133

SDValue &SignExtend, SDValue &DoShift) {

134

return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);

135

}

136

137

138

/// Form sequences of consecutive 64/128-bit registers for use in NEON

139

/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have

140

/// between 1 and 4 elements. If it contains a single element that is returned

141

/// unchanged; otherwise a REG_SEQUENCE value is returned.

142

SDValue createDTuple(ArrayRef<SDValue> Vecs);

143

SDValue createQTuple(ArrayRef<SDValue> Vecs);

144

145

/// Generic helper for the createDTuple/createQTuple

146

/// functions. Those should almost always be called instead.

147

SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],

148

const unsigned SubRegs[]);

149

150

SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);

151

152

SDNode *SelectIndexedLoad(SDNode *N, bool &Done);

153

154

SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,

155

unsigned SubRegIdx);

156

SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,

157

unsigned SubRegIdx);

158

SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

159

SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);

160

161

SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);

162

SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);

163

SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

164

SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);

165

166

SDNode *SelectBitfieldExtractOp(SDNode *N);

167

SDNode *SelectBitfieldInsertOp(SDNode *N);

168

SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);

169

170

SDNode *SelectReadRegister(SDNode *N);

171

SDNode *SelectWriteRegister(SDNode *N);

172

173

// Include the pieces autogenerated from the target description.

174

#include "AArch64GenDAGISel.inc"

175

176

private:

177

bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,

178

SDValue &Shift);

179

bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,

180

SDValue &OffImm);

181

bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,

182

SDValue &OffImm);

183

bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,

184

SDValue &OffImm);

185

bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,

186

SDValue &Offset, SDValue &SignExtend,

187

SDValue &DoShift);

188

bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,

189

SDValue &Offset, SDValue &SignExtend,

190

SDValue &DoShift);

191

bool isWorthFolding(SDValue V) const;

192

bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,

193

SDValue &Offset, SDValue &SignExtend);

194

195

template<unsigned RegWidth>

196

bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {

197

return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);

198

}

199

200

bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);

201

};

202

} // end anonymous namespace

203

204

/// isIntImmediate - This method tests to see if the node is a constant

205

/// operand. If so Imm will receive the 32-bit value.

206

static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {

207

if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {

208

Imm = C->getZExtValue();

209

return true;

210

}

211

return false;

212

}

213

214

// isIntImmediate - This method tests to see if a constant operand.

215

// If so Imm will receive the value.

216

static bool isIntImmediate(SDValue N, uint64_t &Imm) {

217

return isIntImmediate(N.getNode(), Imm);

218

}

219

220

// isOpcWithIntImmediate - This method tests to see if the node is a specific

221

// opcode and that it has a immediate integer right operand.

222

// If so Imm will receive the 32 bit value.

223

static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,

224

uint64_t &Imm) {

225

return N->getOpcode() == Opc &&

226

isIntImmediate(N->getOperand(1).getNode(), Imm);

227

}

228

229

bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(

230

const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {

231

switch(ConstraintID) {

232

default:

233

llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 233);

234

case InlineAsm::Constraint_i:

235

case InlineAsm::Constraint_m:

236

case InlineAsm::Constraint_Q:

237

// Require the address to be in a register. That is safe for all AArch64

238

// variants and it is hard to do anything much smarter without knowing

239

// how the operand is used.

240

OutOps.push_back(Op);

241

return false;

242

}

243

return true;

244

}

245

246

/// SelectArithImmed - Select an immediate value that can be represented as

247

/// a 12-bit value shifted left by either 0 or 12. If so, return true with

248

/// Val set to the 12-bit value and Shift set to the shifter operand.

249

bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,

250

SDValue &Shift) {

251

// This function is called from the addsub_shifted_imm ComplexPattern,

252

// which lists [imm] as the list of opcode it's interested in, however

253

// we still need to check whether the operand is actually an immediate

254

// here because the ComplexPattern opcode list is only used in

255

// root-level opcode matching.

256

if (!isa<ConstantSDNode>(N.getNode()))

257

return false;

258

259

uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();

260

unsigned ShiftAmt;

261

262

if (Immed >> 12 == 0) {

263

ShiftAmt = 0;

264

} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {

265

ShiftAmt = 12;

266

Immed = Immed >> 12;

267

} else

268

return false;

269

270

unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);

271

SDLoc dl(N);

272

Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);

273

Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);

274

return true;

275

}

276

277

/// SelectNegArithImmed - As above, but negates the value before trying to

278

/// select it.

279

bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,

280

SDValue &Shift) {

281

// This function is called from the addsub_shifted_imm ComplexPattern,

282

// which lists [imm] as the list of opcode it's interested in, however

283

// we still need to check whether the operand is actually an immediate

284

// here because the ComplexPattern opcode list is only used in

285

// root-level opcode matching.

286

if (!isa<ConstantSDNode>(N.getNode()))

287

return false;

288

289

// The immediate operand must be a 24-bit zero-extended immediate.

290

uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();

291

292

// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"

293

// have the opposite effect on the C flag, so this pattern mustn't match under

294

// those circumstances.

295

if (Immed == 0)

296

return false;

297

298

if (N.getValueType() == MVT::i32)

299

Immed = ~((uint32_t)Immed) + 1;

300

else

301

Immed = ~Immed + 1ULL;

302

if (Immed & 0xFFFFFFFFFF000000ULL)

303

return false;

304

305

Immed &= 0xFFFFFFULL;

306

return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,

307

Shift);

308

}

309

310

/// getShiftTypeForNode - Translate a shift node to the corresponding

311

/// ShiftType value.

312

static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {

313

switch (N.getOpcode()) {

314

default:

315

return AArch64_AM::InvalidShiftExtend;

316

case ISD::SHL:

317

return AArch64_AM::LSL;

318

case ISD::SRL:

319

return AArch64_AM::LSR;

320

case ISD::SRA:

321

return AArch64_AM::ASR;

322

case ISD::ROTR:

323

return AArch64_AM::ROR;

324

}

325

}

326

327

/// \brief Determine whether it is worth to fold V into an extended register.

328

bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {

329

// it hurts if the value is used at least twice, unless we are optimizing

330

// for code size.

331

if (ForCodeSize || V.hasOneUse())

332

return true;

333

return false;

334

}

335

336

/// SelectShiftedRegister - Select a "shifted register" operand. If the value

337

/// is not shifted, set the Shift operand to default of "LSL 0". The logical

338

/// instructions allow the shifted register to be rotated, but the arithmetic

339

/// instructions do not. The AllowROR parameter specifies whether ROR is

340

/// supported.

341

bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,

342

SDValue &Reg, SDValue &Shift) {

343

AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);

344

if (ShType == AArch64_AM::InvalidShiftExtend)

345

return false;

346

if (!AllowROR && ShType == AArch64_AM::ROR)

347

return false;

348

349

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

350

unsigned BitSize = N.getValueType().getSizeInBits();

351

unsigned Val = RHS->getZExtValue() & (BitSize - 1);

352

unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);

353

354

Reg = N.getOperand(0);

355

Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);

356

return isWorthFolding(N);

357

}

358

359

return false;

360

}

361

362

/// getExtendTypeForNode - Translate an extend node to the corresponding

363

/// ExtendType value.

364

static AArch64_AM::ShiftExtendType

365

getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {

366

if (N.getOpcode() == ISD::SIGN_EXTEND ||

367

N.getOpcode() == ISD::SIGN_EXTEND_INREG) {

368

EVT SrcVT;

369

if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)

370

SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();

371

else

372

SrcVT = N.getOperand(0).getValueType();

373

374

if (!IsLoadStore && SrcVT == MVT::i8)

375

return AArch64_AM::SXTB;

376

else if (!IsLoadStore && SrcVT == MVT::i16)

377

return AArch64_AM::SXTH;

378

else if (SrcVT == MVT::i32)

379

return AArch64_AM::SXTW;

380

assert(SrcVT != MVT::i64 && "extend from 64-bits?")((SrcVT != MVT::i64 && "extend from 64-bits?") ? static_cast
<void> (0) : __assert_fail ("SrcVT != MVT::i64 && \"extend from 64-bits?\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 380, __PRETTY_FUNCTION__));

381

382

return AArch64_AM::InvalidShiftExtend;

383

} else if (N.getOpcode() == ISD::ZERO_EXTEND ||

384

N.getOpcode() == ISD::ANY_EXTEND) {

385

EVT SrcVT = N.getOperand(0).getValueType();

386

if (!IsLoadStore && SrcVT == MVT::i8)

387

return AArch64_AM::UXTB;

388

else if (!IsLoadStore && SrcVT == MVT::i16)

389

return AArch64_AM::UXTH;

390

else if (SrcVT == MVT::i32)

391

return AArch64_AM::UXTW;

392

393

394

return AArch64_AM::InvalidShiftExtend;

395

} else if (N.getOpcode() == ISD::AND) {

396

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

397

if (!CSD)

398

return AArch64_AM::InvalidShiftExtend;

399

uint64_t AndMask = CSD->getZExtValue();

400

401

switch (AndMask) {

402

default:

403

return AArch64_AM::InvalidShiftExtend;

404

case 0xFF:

405

return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;

406

case 0xFFFF:

407

return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;

408

case 0xFFFFFFFF:

409

return AArch64_AM::UXTW;

410

}

411

}

412

413

return AArch64_AM::InvalidShiftExtend;

414

}

415

416

// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.

417

static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {

418

if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&

419

DL->getOpcode() != AArch64ISD::DUPLANE32)

420

return false;

421

422

SDValue SV = DL->getOperand(0);

423

if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)

424

return false;

425

426

SDValue EV = SV.getOperand(1);

427

if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)

428

return false;

429

430

ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());

431

ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());

432

LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();

433

LaneOp = EV.getOperand(0);

434

435

return true;

436

}

437

438

// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a

439

// high lane extract.

440

static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,

441

SDValue &LaneOp, int &LaneIdx) {

442

443

if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {

444

std::swap(Op0, Op1);

445

if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))

446

return false;

447

}

448

StdOp = Op1;

449

return true;

450

}

451

452

/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand

453

/// is a lane in the upper half of a 128-bit vector. Recognize and select this

454

/// so that we don't emit unnecessary lane extracts.

455

SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {

456

SDLoc dl(N);

457

SDValue Op0 = N->getOperand(0);

458

SDValue Op1 = N->getOperand(1);

459

SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.

460

SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.

461

int LaneIdx = -1; // Will hold the lane index.

462

463

if (Op1.getOpcode() != ISD::MUL ||

464

!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,

465

LaneIdx)) {

466

std::swap(Op0, Op1);

467

if (Op1.getOpcode() != ISD::MUL ||

468

!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,

469

LaneIdx))

470

return nullptr;

471

}

472

473

SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);

474

475

SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };

476

477

unsigned MLAOpc = ~0U;

478

479

switch (N->getSimpleValueType(0).SimpleTy) {

480

default:

481

llvm_unreachable("Unrecognized MLA.")::llvm::llvm_unreachable_internal("Unrecognized MLA.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 481);

482

case MVT::v4i16:

483

MLAOpc = AArch64::MLAv4i16_indexed;

484

break;

485

case MVT::v8i16:

486

MLAOpc = AArch64::MLAv8i16_indexed;

487

break;

488

case MVT::v2i32:

489

MLAOpc = AArch64::MLAv2i32_indexed;

490

break;

491

case MVT::v4i32:

492

MLAOpc = AArch64::MLAv4i32_indexed;

493

break;

494

}

495

496

return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);

497

}

498

499

SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {

500

SDLoc dl(N);

501

SDValue SMULLOp0;

502

SDValue SMULLOp1;

503

int LaneIdx;

504

505

if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,

506

LaneIdx))

507

return nullptr;

508

509

SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);

510

511

SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };

512

513

unsigned SMULLOpc = ~0U;

514

515

if (IntNo == Intrinsic::aarch64_neon_smull) {

516

switch (N->getSimpleValueType(0).SimpleTy) {

517

default:

518

llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 518);

519

case MVT::v4i32:

520

SMULLOpc = AArch64::SMULLv4i16_indexed;

521

break;

522

case MVT::v2i64:

523

SMULLOpc = AArch64::SMULLv2i32_indexed;

524

break;

525

}

526

} else if (IntNo == Intrinsic::aarch64_neon_umull) {

527

switch (N->getSimpleValueType(0).SimpleTy) {

528

default:

529

llvm_unreachable("Unrecognized SMULL.")::llvm::llvm_unreachable_internal("Unrecognized SMULL.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 529);

530

case MVT::v4i32:

531

SMULLOpc = AArch64::UMULLv4i16_indexed;

532

break;

533

case MVT::v2i64:

534

SMULLOpc = AArch64::UMULLv2i32_indexed;

535

break;

536

}

537

} else

538

llvm_unreachable("Unrecognized intrinsic.")::llvm::llvm_unreachable_internal("Unrecognized intrinsic.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 538);

539

540

return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);

541

}

542

543

/// Instructions that accept extend modifiers like UXTW expect the register

544

/// being extended to be a GPR32, but the incoming DAG might be acting on a

545

/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if

546

/// this is the case.

547

static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {

548

if (N.getValueType() == MVT::i32)

549

return N;

550

551

SDLoc dl(N);

552

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);

553

MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

554

dl, MVT::i32, N, SubReg);

555

return SDValue(Node, 0);

556

}

557

558

559

/// SelectArithExtendedRegister - Select a "extended register" operand. This

560

/// operand folds in an extend followed by an optional left shift.

561

bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,

562

SDValue &Shift) {

563

unsigned ShiftVal = 0;

564

AArch64_AM::ShiftExtendType Ext;

565

566

if (N.getOpcode() == ISD::SHL) {

567

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

568

if (!CSD)

569

return false;

570

ShiftVal = CSD->getZExtValue();

571

if (ShiftVal > 4)

572

return false;

573

574

Ext = getExtendTypeForNode(N.getOperand(0));

575

if (Ext == AArch64_AM::InvalidShiftExtend)

576

return false;

577

578

Reg = N.getOperand(0).getOperand(0);

579

} else {

580

Ext = getExtendTypeForNode(N);

581

if (Ext == AArch64_AM::InvalidShiftExtend)

582

return false;

583

584

Reg = N.getOperand(0);

585

}

586

587

// AArch64 mandates that the RHS of the operation must use the smallest

588

// register class that could contain the size being extended from. Thus,

589

// if we're folding a (sext i8), we need the RHS to be a GPR32, even though

590

// there might not be an actual 32-bit value in the program. We can

591

// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.

592

assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)((Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX)
? static_cast<void> (0) : __assert_fail ("Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 592, __PRETTY_FUNCTION__));

593

Reg = narrowIfNeeded(CurDAG, Reg);

594

Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),

595

MVT::i32);

596

return isWorthFolding(N);

597

}

598

599

/// If there's a use of this ADDlow that's not itself a load/store then we'll

600

/// need to create a real ADD instruction from it anyway and there's no point in

601

/// folding it into the mem op. Theoretically, it shouldn't matter, but there's

602

/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding

603

/// leads to duplicated ADRP instructions.

604

static bool isWorthFoldingADDlow(SDValue N) {

605

for (auto Use : N->uses()) {

606

if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&

607

Use->getOpcode() != ISD::ATOMIC_LOAD &&

608

Use->getOpcode() != ISD::ATOMIC_STORE)

609

return false;

610

611

// ldar and stlr have much more restrictive addressing modes (just a

612

// register).

613

if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)

614

return false;

615

}

616

617

return true;

618

}

619

620

/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit

621

/// immediate" address. The "Size" argument is the size in bytes of the memory

622

/// reference, which determines the scale.

623

bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,

624

SDValue &Base,

625

SDValue &OffImm) {

626

SDLoc dl(N);

627

const DataLayout &DL = CurDAG->getDataLayout();

628

const TargetLowering *TLI = getTargetLowering();

629

if (N.getOpcode() == ISD::FrameIndex) {

630

int FI = cast<FrameIndexSDNode>(N)->getIndex();

631

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));

632

OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);

633

return true;

634

}

635

636

// As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed

637

// selected here doesn't support labels/immediates, only base+offset.

638

639

if (CurDAG->isBaseWithConstantOffset(N)) {

640

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

641

int64_t RHSC = RHS->getSExtValue();

642

unsigned Scale = Log2_32(Size);

643

if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&

644

RHSC < (0x40 << Scale)) {

645

Base = N.getOperand(0);

646

if (Base.getOpcode() == ISD::FrameIndex) {

647

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

648

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));

649

}

650

OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);

651

return true;

652

}

653

}

654

}

655

656

// Base only. The address will be materialized into a register before

657

// the memory is accessed.

658

// add x0, Xbase, #offset

659

// stp x1, x2, [x0]

660

Base = N;

661

OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);

662

return true;

663

}

664

665

/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit

666

/// immediate" address. The "Size" argument is the size in bytes of the memory

667

/// reference, which determines the scale.

668

bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,

669

SDValue &Base, SDValue &OffImm) {

670

SDLoc dl(N);

671

const DataLayout &DL = CurDAG->getDataLayout();

672

const TargetLowering *TLI = getTargetLowering();

673

if (N.getOpcode() == ISD::FrameIndex) {

Taking false branch

→

674

int FI = cast<FrameIndexSDNode>(N)->getIndex();

675

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));

676

OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);

677

return true;

678

}

679

680

if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {

681

GlobalAddressSDNode *GAN =

682

dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());

683

Base = N.getOperand(0);

684

OffImm = N.getOperand(1);

685

if (!GAN)

686

return true;

687

688

const GlobalValue *GV = GAN->getGlobal();

689

unsigned Alignment = GV->getAlignment();

690

Type *Ty = GV->getType()->getElementType();

691

if (Alignment == 0 && Ty->isSized())

692

Alignment = DL.getABITypeAlignment(Ty);

693

694

if (Alignment >= Size)

695

return true;

696

}

697

698

if (CurDAG->isBaseWithConstantOffset(N)) {

←

Taking true branch

→

699

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

←

Assuming 'RHS' is non-null

→

←

Taking true branch

→

700

int64_t RHSC = (int64_t)RHS->getZExtValue();

701

unsigned Scale = Log2_32(Size);

702

if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {

←

Assuming 'RHSC' is >= 0

→

←

The result of the '<<' expression is undefined

703

Base = N.getOperand(0);

704

if (Base.getOpcode() == ISD::FrameIndex) {

705

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

706

Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));

707

}

708

OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);

709

return true;

710

}

711

}

712

}

713

714

// Before falling back to our general case, check if the unscaled

715

// instructions can handle this. If so, that's preferable.

716

if (SelectAddrModeUnscaled(N, Size, Base, OffImm))

717

return false;

718

719

// Base only. The address will be materialized into a register before

720

// the memory is accessed.

721

// add x0, Xbase, #offset

722

// ldr x0, [x0]

723

Base = N;

724

OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);

725

return true;

726

}

727

728

/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit

729

/// immediate" address. This should only match when there is an offset that

730

/// is not valid for a scaled immediate addressing mode. The "Size" argument

731

/// is the size in bytes of the memory reference, which is needed here to know

732

/// what is valid for a scaled immediate.

733

bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,

734

SDValue &Base,

735

SDValue &OffImm) {

736

if (!CurDAG->isBaseWithConstantOffset(N))

737

return false;

738

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {

739

int64_t RHSC = RHS->getSExtValue();

740

// If the offset is valid as a scaled immediate, don't match here.

741

if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&

742

RHSC < (0x1000 << Log2_32(Size)))

743

return false;

744

if (RHSC >= -256 && RHSC < 256) {

745

Base = N.getOperand(0);

746

if (Base.getOpcode() == ISD::FrameIndex) {

747

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

748

const TargetLowering *TLI = getTargetLowering();

749

Base = CurDAG->getTargetFrameIndex(

750

FI, TLI->getPointerTy(CurDAG->getDataLayout()));

751

}

752

OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);

753

return true;

754

}

755

}

756

return false;

757

}

758

759

static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {

760

SDLoc dl(N);

761

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);

762

SDValue ImpDef = SDValue(

763

CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);

764

MachineSDNode *Node = CurDAG->getMachineNode(

765

TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);

766

return SDValue(Node, 0);

767

}

768

769

/// \brief Check if the given SHL node (\p N), can be used to form an

770

/// extended register for an addressing mode.

771

bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,

772

bool WantExtend, SDValue &Offset,

773

SDValue &SignExtend) {

774

assert(N.getOpcode() == ISD::SHL && "Invalid opcode.")((N.getOpcode() == ISD::SHL && "Invalid opcode.") ? static_cast
<void> (0) : __assert_fail ("N.getOpcode() == ISD::SHL && \"Invalid opcode.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 774, __PRETTY_FUNCTION__));

775

ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));

776

if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())

777

return false;

778

779

SDLoc dl(N);

780

if (WantExtend) {

781

AArch64_AM::ShiftExtendType Ext =

782

getExtendTypeForNode(N.getOperand(0), true);

783

if (Ext == AArch64_AM::InvalidShiftExtend)

784

return false;

785

786

Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));

787

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,

788

MVT::i32);

789

} else {

790

Offset = N.getOperand(0);

791

SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);

792

}

793

794

unsigned LegalShiftVal = Log2_32(Size);

795

unsigned ShiftVal = CSD->getZExtValue();

796

797

if (ShiftVal != 0 && ShiftVal != LegalShiftVal)

798

return false;

799

800

if (isWorthFolding(N))

801

return true;

802

803

return false;

804

}

805

806

bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,

807

SDValue &Base, SDValue &Offset,

808

SDValue &SignExtend,

809

SDValue &DoShift) {

810

if (N.getOpcode() != ISD::ADD)

811

return false;

812

SDValue LHS = N.getOperand(0);

813

SDValue RHS = N.getOperand(1);

814

SDLoc dl(N);

815

816

// We don't want to match immediate adds here, because they are better lowered

817

// to the register-immediate addressing modes.

818

if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))

819

return false;

820

821

// Check if this particular node is reused in any non-memory related

822

// operation. If yes, do not try to fold this node into the address

823

// computation, since the computation will be kept.

824

const SDNode *Node = N.getNode();

825

for (SDNode *UI : Node->uses()) {

826

if (!isa<MemSDNode>(*UI))

827

return false;

828

}

829

830

// Remember if it is worth folding N when it produces extended register.

831

bool IsExtendedRegisterWorthFolding = isWorthFolding(N);

832

833

// Try to match a shifted extend on the RHS.

834

if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&

835

SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {

836

Base = LHS;

837

DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);

838

return true;

839

}

840

841

// Try to match a shifted extend on the LHS.

842

if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&

843

SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {

844

Base = RHS;

845

DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);

846

return true;

847

}

848

849

// There was no shift, whatever else we find.

850

DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);

851

852

AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;

853

// Try to match an unshifted extend on the LHS.

854

if (IsExtendedRegisterWorthFolding &&

855

(Ext = getExtendTypeForNode(LHS, true)) !=

856

AArch64_AM::InvalidShiftExtend) {

857

Base = RHS;

858

Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));

859

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,

860

MVT::i32);

861

if (isWorthFolding(LHS))

862

return true;

863

}

864

865

// Try to match an unshifted extend on the RHS.

866

if (IsExtendedRegisterWorthFolding &&

867

(Ext = getExtendTypeForNode(RHS, true)) !=

868

AArch64_AM::InvalidShiftExtend) {

869

Base = LHS;

870

Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));

871

SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,

872

MVT::i32);

873

if (isWorthFolding(RHS))

874

return true;

875

}

876

877

return false;

878

}

879

880

// Check if the given immediate is preferred by ADD. If an immediate can be

881

// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be

882

// encoded by one MOVZ, return true.

883

static bool isPreferredADD(int64_t ImmOff) {

884

// Constant in [0x0, 0xfff] can be encoded in ADD.

885

if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)

886

return true;

887

// Check if it can be encoded in an "ADD LSL #12".

888

if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)

889

// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.

890

return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&

891

(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;

892

return false;

893

}

894

895

bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,

896

SDValue &Base, SDValue &Offset,

897

SDValue &SignExtend,

898

SDValue &DoShift) {

899

if (N.getOpcode() != ISD::ADD)

900

return false;

901

SDValue LHS = N.getOperand(0);

902

SDValue RHS = N.getOperand(1);

903

SDLoc DL(N);

904

905

// Check if this particular node is reused in any non-memory related

906

// operation. If yes, do not try to fold this node into the address

907

// computation, since the computation will be kept.

908

const SDNode *Node = N.getNode();

909

for (SDNode *UI : Node->uses()) {

910

if (!isa<MemSDNode>(*UI))

911

return false;

912

}

913

914

// Watch out if RHS is a wide immediate, it can not be selected into

915

// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into

916

// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate

917

// instructions like:

918

// MOV X0, WideImmediate

919

// ADD X1, BaseReg, X0

920

// LDR X2, [X1, 0]

921

// For such situation, using [BaseReg, XReg] addressing mode can save one

922

// ADD/SUB:

923

// MOV X0, WideImmediate

924

// LDR X2, [BaseReg, X0]

925

if (isa<ConstantSDNode>(RHS)) {

926

int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();

927

unsigned Scale = Log2_32(Size);

928

// Skip the immediate can be selected by load/store addressing mode.

929

// Also skip the immediate can be encoded by a single ADD (SUB is also

930

// checked by using -ImmOff).

931

if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||

932

isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))

933

return false;

934

935

SDValue Ops[] = { RHS };

936

SDNode *MOVI =

937

CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);

938

SDValue MOVIV = SDValue(MOVI, 0);

939

// This ADD of two X register will be selected into [Reg+Reg] mode.

940

N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);

941

}

942

943

// Remember if it is worth folding N when it produces extended register.

944

bool IsExtendedRegisterWorthFolding = isWorthFolding(N);

945

946

// Try to match a shifted extend on the RHS.

947

if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&

948

SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {

949

Base = LHS;

950

DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);

951

return true;

952

}

953

954

// Try to match a shifted extend on the LHS.

955

if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&

956

SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {

957

Base = RHS;

958

DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);

959

return true;

960

}

961

962

// Match any non-shifted, non-extend, non-immediate add expression.

963

Base = LHS;

964

Offset = RHS;

965

SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);

966

DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);

967

// Reg1 + Reg2 is free: no check needed.

968

return true;

969

}

970

971

SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {

972

static const unsigned RegClassIDs[] = {

973

AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};

974

static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,

975

AArch64::dsub2, AArch64::dsub3};

976

977

return createTuple(Regs, RegClassIDs, SubRegs);

978

}

979

980

SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {

981

static const unsigned RegClassIDs[] = {

982

AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};

983

static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,

984

AArch64::qsub2, AArch64::qsub3};

985

986

return createTuple(Regs, RegClassIDs, SubRegs);

987

}

988

989

SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,

990

const unsigned RegClassIDs[],

991

const unsigned SubRegs[]) {

992

// There's no special register-class for a vector-list of 1 element: it's just

993

// a vector.

994

if (Regs.size() == 1)

995

return Regs[0];

996

997

assert(Regs.size() >= 2 && Regs.size() <= 4)((Regs.size() >= 2 && Regs.size() <= 4) ? static_cast
<void> (0) : __assert_fail ("Regs.size() >= 2 && Regs.size() <= 4"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 997, __PRETTY_FUNCTION__));

998

999

SDLoc DL(Regs[0]);

1000

1001

SmallVector<SDValue, 4> Ops;

1002

1003

// First operand of REG_SEQUENCE is the desired RegClass.

1004

Ops.push_back(

1005

CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));

1006

1007

// Then we get pairs of source & subregister-position for the components.

1008

for (unsigned i = 0; i < Regs.size(); ++i) {

1009

Ops.push_back(Regs[i]);

1010

Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));

1011

}

1012

1013

SDNode *N =

1014

CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);

1015

return SDValue(N, 0);

1016

}

1017

1018

SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,

1019

unsigned Opc, bool isExt) {

1020

SDLoc dl(N);

1021

EVT VT = N->getValueType(0);

1022

1023

unsigned ExtOff = isExt;

1024

1025

// Form a REG_SEQUENCE to force register allocation.

1026

unsigned Vec0Off = ExtOff + 1;

1027

SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,

1028

N->op_begin() + Vec0Off + NumVecs);

1029

SDValue RegSeq = createQTuple(Regs);

1030

1031

SmallVector<SDValue, 6> Ops;

1032

if (isExt)

1033

Ops.push_back(N->getOperand(1));

1034

Ops.push_back(RegSeq);

1035

Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));

1036

return CurDAG->getMachineNode(Opc, dl, VT, Ops);

1037

}

1038

1039

SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {

1040

LoadSDNode *LD = cast<LoadSDNode>(N);

1041

if (LD->isUnindexed())

1042

return nullptr;

1043

EVT VT = LD->getMemoryVT();

1044

EVT DstVT = N->getValueType(0);

1045

ISD::MemIndexedMode AM = LD->getAddressingMode();

1046

bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;

1047

1048

// We're not doing validity checking here. That was done when checking

1049

// if we should mark the load as indexed or not. We're just selecting

1050

// the right instruction.

1051

unsigned Opcode = 0;

1052

1053

ISD::LoadExtType ExtType = LD->getExtensionType();

1054

bool InsertTo64 = false;

1055

if (VT == MVT::i64)

1056

Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;

1057

else if (VT == MVT::i32) {

1058

if (ExtType == ISD::NON_EXTLOAD)

1059

Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

1060

else if (ExtType == ISD::SEXTLOAD)

1061

Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;

1062

else {

1063

Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

1064

InsertTo64 = true;

1065

// The result of the load is only i32. It's the subreg_to_reg that makes

1066

// it into an i64.

1067

DstVT = MVT::i32;

1068

}

1069

} else if (VT == MVT::i16) {

1070

if (ExtType == ISD::SEXTLOAD) {

1071

if (DstVT == MVT::i64)

1072

Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;

1073

else

1074

Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;

1075

} else {

1076

Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;

1077

InsertTo64 = DstVT == MVT::i64;

1078

// The result of the load is only i32. It's the subreg_to_reg that makes

1079

// it into an i64.

1080

DstVT = MVT::i32;

1081

}

1082

} else if (VT == MVT::i8) {

1083

if (ExtType == ISD::SEXTLOAD) {

1084

if (DstVT == MVT::i64)

1085

Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;

1086

else

1087

Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;

1088

} else {

1089

Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;

1090

InsertTo64 = DstVT == MVT::i64;

1091

// The result of the load is only i32. It's the subreg_to_reg that makes

1092

// it into an i64.

1093

DstVT = MVT::i32;

1094

}

1095

} else if (VT == MVT::f16) {

1096

Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;

1097

} else if (VT == MVT::f32) {

1098

Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;

1099

} else if (VT == MVT::f64 || VT.is64BitVector()) {

1100

Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;

1101

} else if (VT.is128BitVector()) {

1102

Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;

1103

} else

1104

return nullptr;

1105

SDValue Chain = LD->getChain();

1106

SDValue Base = LD->getBasePtr();

1107

ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());

1108

int OffsetVal = (int)OffsetOp->getZExtValue();

1109

SDLoc dl(N);

1110

SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);

1111

SDValue Ops[] = { Base, Offset, Chain };

1112

SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,

1113

MVT::Other, Ops);

1114

// Either way, we're replacing the node, so tell the caller that.

1115

Done = true;

1116

SDValue LoadedVal = SDValue(Res, 1);

1117

if (InsertTo64) {

1118

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);

1119

LoadedVal =

1120

SDValue(CurDAG->getMachineNode(

1121

AArch64::SUBREG_TO_REG, dl, MVT::i64,

1122

CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,

1123

SubReg),

1124

0);

1125

}

1126

1127

ReplaceUses(SDValue(N, 0), LoadedVal);

1128

ReplaceUses(SDValue(N, 1), SDValue(Res, 0));

1129

ReplaceUses(SDValue(N, 2), SDValue(Res, 2));

1130

1131

return nullptr;

1132

}

1133

1134

SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,

1135

unsigned Opc, unsigned SubRegIdx) {

1136

SDLoc dl(N);

1137

EVT VT = N->getValueType(0);

1138

SDValue Chain = N->getOperand(0);

1139

1140

SDValue Ops[] = {N->getOperand(2), // Mem operand;

1141

Chain};

1142

1143

const EVT ResTys[] = {MVT::Untyped, MVT::Other};

1144

1145

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1146

SDValue SuperReg = SDValue(Ld, 0);

1147

for (unsigned i = 0; i < NumVecs; ++i)

1148

ReplaceUses(SDValue(N, i),

1149

CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));

1150

1151

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));

1152

return nullptr;

1153

}

1154

1155

SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,

1156

unsigned Opc, unsigned SubRegIdx) {

1157

SDLoc dl(N);

1158

EVT VT = N->getValueType(0);

1159

SDValue Chain = N->getOperand(0);

1160

1161

SDValue Ops[] = {N->getOperand(1), // Mem operand

1162

N->getOperand(2), // Incremental

1163

Chain};

1164

1165

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1166

MVT::Untyped, MVT::Other};

1167

1168

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1169

1170

// Update uses of write back register

1171

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

1172

1173

// Update uses of vector list

1174

SDValue SuperReg = SDValue(Ld, 1);

1175

if (NumVecs == 1)

1176

ReplaceUses(SDValue(N, 0), SuperReg);

1177

else

1178

for (unsigned i = 0; i < NumVecs; ++i)

1179

ReplaceUses(SDValue(N, i),

1180

CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));

1181

1182

// Update the chain

1183

ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));

1184

return nullptr;

1185

}

1186

1187

SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,

1188

unsigned Opc) {

1189

SDLoc dl(N);

1190

EVT VT = N->getOperand(2)->getValueType(0);

1191

1192

// Form a REG_SEQUENCE to force register allocation.

1193

bool Is128Bit = VT.getSizeInBits() == 128;

1194

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1195

SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);

1196

1197

SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};

1198

SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);

1199

1200

return St;

1201

}

1202

1203

SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,

1204

unsigned Opc) {

1205

SDLoc dl(N);

1206

EVT VT = N->getOperand(2)->getValueType(0);

1207

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1208

MVT::Other}; // Type for the Chain

1209

1210

// Form a REG_SEQUENCE to force register allocation.

1211

bool Is128Bit = VT.getSizeInBits() == 128;

1212

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1213

SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);

1214

1215

SDValue Ops[] = {RegSeq,

1216

N->getOperand(NumVecs + 1), // base register

1217

N->getOperand(NumVecs + 2), // Incremental

1218

N->getOperand(0)}; // Chain

1219

SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1220

1221

return St;

1222

}

1223

1224

namespace {

1225

/// WidenVector - Given a value in the V64 register class, produce the

1226

/// equivalent value in the V128 register class.

1227

class WidenVector {

1228

SelectionDAG &DAG;

1229

1230

public:

1231

WidenVector(SelectionDAG &DAG) : DAG(DAG) {}

1232

1233

SDValue operator()(SDValue V64Reg) {

1234

EVT VT = V64Reg.getValueType();

1235

unsigned NarrowSize = VT.getVectorNumElements();

1236

MVT EltTy = VT.getVectorElementType().getSimpleVT();

1237

MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);

1238

SDLoc DL(V64Reg);

1239

1240

SDValue Undef =

1241

SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);

1242

return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);

1243

}

1244

};

1245

} // namespace

1246

1247

/// NarrowVector - Given a value in the V128 register class, produce the

1248

/// equivalent value in the V64 register class.

1249

static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {

1250

EVT VT = V128Reg.getValueType();

1251

unsigned WideSize = VT.getVectorNumElements();

1252

MVT EltTy = VT.getVectorElementType().getSimpleVT();

1253

MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);

1254

1255

return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,

1256

V128Reg);

1257

}

1258

1259

SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,

1260

unsigned Opc) {

1261

SDLoc dl(N);

1262

EVT VT = N->getValueType(0);

1263

bool Narrow = VT.getSizeInBits() == 64;

1264

1265

// Form a REG_SEQUENCE to force register allocation.

1266

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1267

1268

if (Narrow)

1269

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1270

WidenVector(*CurDAG));

1271

1272

SDValue RegSeq = createQTuple(Regs);

1273

1274

const EVT ResTys[] = {MVT::Untyped, MVT::Other};

1275

1276

unsigned LaneNo =

1277

cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();

1278

1279

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),

1280

N->getOperand(NumVecs + 3), N->getOperand(0)};

1281

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1282

SDValue SuperReg = SDValue(Ld, 0);

1283

1284

EVT WideVT = RegSeq.getOperand(1)->getValueType(0);

1285

static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,

1286

AArch64::qsub2, AArch64::qsub3 };

1287

for (unsigned i = 0; i < NumVecs; ++i) {

1288

SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);

1289

if (Narrow)

1290

NV = NarrowVector(NV, *CurDAG);

1291

ReplaceUses(SDValue(N, i), NV);

1292

}

1293

1294

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));

1295

1296

return Ld;

1297

}

1298

1299

SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,

1300

unsigned Opc) {

1301

SDLoc dl(N);

1302

EVT VT = N->getValueType(0);

1303

bool Narrow = VT.getSizeInBits() == 64;

1304

1305

// Form a REG_SEQUENCE to force register allocation.

1306

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1307

1308

if (Narrow)

1309

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1310

WidenVector(*CurDAG));

1311

1312

SDValue RegSeq = createQTuple(Regs);

1313

1314

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1315

RegSeq->getValueType(0), MVT::Other};

1316

1317

unsigned LaneNo =

1318

cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

1319

1320

SDValue Ops[] = {RegSeq,

1321

CurDAG->getTargetConstant(LaneNo, dl,

1322

MVT::i64), // Lane Number

1323

N->getOperand(NumVecs + 2), // Base register

1324

N->getOperand(NumVecs + 3), // Incremental

1325

N->getOperand(0)};

1326

SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1327

1328

// Update uses of the write back register

1329

ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

1330

1331

// Update uses of the vector list

1332

SDValue SuperReg = SDValue(Ld, 1);

1333

if (NumVecs == 1) {

1334

ReplaceUses(SDValue(N, 0),

1335

Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);

1336

} else {

1337

EVT WideVT = RegSeq.getOperand(1)->getValueType(0);

1338

static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,

1339

AArch64::qsub2, AArch64::qsub3 };

1340

for (unsigned i = 0; i < NumVecs; ++i) {

1341

SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,

1342

SuperReg);

1343

if (Narrow)

1344

NV = NarrowVector(NV, *CurDAG);

1345

ReplaceUses(SDValue(N, i), NV);

1346

}

1347

}

1348

1349

// Update the Chain

1350

ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));

1351

1352

return Ld;

1353

}

1354

1355

SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,

1356

unsigned Opc) {

1357

SDLoc dl(N);

1358

EVT VT = N->getOperand(2)->getValueType(0);

1359

bool Narrow = VT.getSizeInBits() == 64;

1360

1361

// Form a REG_SEQUENCE to force register allocation.

1362

SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);

1363

1364

if (Narrow)

1365

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1366

WidenVector(*CurDAG));

1367

1368

SDValue RegSeq = createQTuple(Regs);

1369

1370

unsigned LaneNo =

1371

cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();

1372

1373

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),

1374

N->getOperand(NumVecs + 3), N->getOperand(0)};

1375

SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);

1376

1377

// Transfer memoperands.

1378

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

1379

MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();

1380

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

1381

1382

return St;

1383

}

1384

1385

SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,

1386

unsigned Opc) {

1387

SDLoc dl(N);

1388

EVT VT = N->getOperand(2)->getValueType(0);

1389

bool Narrow = VT.getSizeInBits() == 64;

1390

1391

// Form a REG_SEQUENCE to force register allocation.

1392

SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

1393

1394

if (Narrow)

1395

std::transform(Regs.begin(), Regs.end(), Regs.begin(),

1396

WidenVector(*CurDAG));

1397

1398

SDValue RegSeq = createQTuple(Regs);

1399

1400

const EVT ResTys[] = {MVT::i64, // Type of the write back register

1401

MVT::Other};

1402

1403

unsigned LaneNo =

1404

cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

1405

1406

SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),

1407

N->getOperand(NumVecs + 2), // Base Register

1408

N->getOperand(NumVecs + 3), // Incremental

1409

N->getOperand(0)};

1410

SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

1411

1412

// Transfer memoperands.

1413

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

1414

MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();

1415

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

1416

1417

return St;

1418

}

1419

1420

static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,

1421

unsigned &Opc, SDValue &Opd0,

1422

unsigned &LSB, unsigned &MSB,

1423

unsigned NumberOfIgnoredLowBits,

1424

bool BiggerPattern) {

1425

assert(N->getOpcode() == ISD::AND &&((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1426, __PRETTY_FUNCTION__))

1426

"N must be a AND operation to call this function")((N->getOpcode() == ISD::AND && "N must be a AND operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"N must be a AND operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1426, __PRETTY_FUNCTION__));

1427

1428

EVT VT = N->getValueType(0);

1429

1430

// Here we can test the type of VT and return false when the type does not

1431

// match, but since it is done prior to that call in the current context

1432

// we turned that into an assert to avoid redundant code.

1433

assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1434, __PRETTY_FUNCTION__))

1434

"Type checking must have been done before calling this function")(((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"Type checking must have been done before calling this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1434, __PRETTY_FUNCTION__));

1435

1436

// FIXME: simplify-demanded-bits in DAGCombine will probably have

1437

// changed the AND node to a 32-bit mask operation. We'll have to

1438

// undo that as part of the transform here if we want to catch all

1439

// the opportunities.

1440

// Currently the NumberOfIgnoredLowBits argument helps to recover

1441

// form these situations when matching bigger pattern (bitfield insert).

1442

1443

// For unsigned extracts, check for a shift right and mask

1444

uint64_t And_imm = 0;

1445

if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))

1446

return false;

1447

1448

const SDNode *Op0 = N->getOperand(0).getNode();

1449

1450

// Because of simplify-demanded-bits in DAGCombine, the mask may have been

1451

// simplified. Try to undo that

1452

And_imm |= (1 << NumberOfIgnoredLowBits) - 1;

1453

1454

// The immediate is a mask of the low bits iff imm & (imm+1) == 0

1455

if (And_imm & (And_imm + 1))

1456

return false;

1457

1458

bool ClampMSB = false;

1459

uint64_t Srl_imm = 0;

1460

// Handle the SRL + ANY_EXTEND case.

1461

if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&

1462

isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {

1463

// Extend the incoming operand of the SRL to 64-bit.

1464

Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));

1465

// Make sure to clamp the MSB so that we preserve the semantics of the

1466

// original operations.

1467

ClampMSB = true;

1468

} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&

1469

isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,

1470

Srl_imm)) {

1471

// If the shift result was truncated, we can still combine them.

1472

Opd0 = Op0->getOperand(0).getOperand(0);

1473

1474

// Use the type of SRL node.

1475

VT = Opd0->getValueType(0);

1476

} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {

1477

Opd0 = Op0->getOperand(0);

1478

} else if (BiggerPattern) {

1479

// Let's pretend a 0 shift right has been performed.

1480

// The resulting code will be at least as good as the original one

1481

// plus it may expose more opportunities for bitfield insert pattern.

1482

// FIXME: Currently we limit this to the bigger pattern, because

1483

// some optimizations expect AND and not UBFM.

1484

Opd0 = N->getOperand(0);

1485

} else

1486

return false;

1487

1488

// Bail out on large immediates. This happens when no proper

1489

// combining/constant folding was performed.

1490

if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {

1491

DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0)

1492

<< ": Found large shift immediate, this should not happen\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0);

1493

return false;

1494

}

1495

1496

LSB = Srl_imm;

1497

MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)

1498

: countTrailingOnes<uint64_t>(And_imm)) -

1499

1500

if (ClampMSB)

1501

// Since we're moving the extend before the right shift operation, we need

1502

// to clamp the MSB to make sure we don't shift in undefined bits instead of

1503

// the zeros which would get shifted in with the original right shift

1504

// operation.

1505

MSB = MSB > 31 ? 31 : MSB;

1506

1507

Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;

1508

return true;

1509

}

1510

1511

static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,

1512

SDValue &Opd0, unsigned &LSB,

1513

unsigned &MSB) {

1514

// We are looking for the following pattern which basically extracts several

1515

// continuous bits from the source value and places it from the LSB of the

1516

// destination value, all other bits of the destination value or set to zero:

1517

1518

// Value2 = AND Value, MaskImm

1519

// SRL Value2, ShiftImm

1520

1521

// with MaskImm >> ShiftImm to search for the bit width.

1522

1523

// This gets selected into a single UBFM:

1524

1525

// UBFM Value, ShiftImm, BitWide + Srl_imm -1

1526

1527

1528

if (N->getOpcode() != ISD::SRL)

1529

return false;

1530

1531

uint64_t And_mask = 0;

1532

if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))

1533

return false;

1534

1535

Opd0 = N->getOperand(0).getOperand(0);

1536

1537

uint64_t Srl_imm = 0;

1538

if (!isIntImmediate(N->getOperand(1), Srl_imm))

1539

return false;

1540

1541

// Check whether we really have several bits extract here.

1542

unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));

1543

if (BitWide && isMask_64(And_mask >> Srl_imm)) {

1544

if (N->getValueType(0) == MVT::i32)

1545

Opc = AArch64::UBFMWri;

1546

else

1547

Opc = AArch64::UBFMXri;

1548

1549

LSB = Srl_imm;

1550

MSB = BitWide + Srl_imm - 1;

1551

return true;

1552

}

1553

1554

return false;

1555

}

1556

1557

static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,

1558

unsigned &Immr, unsigned &Imms,

1559

bool BiggerPattern) {

1560

assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "N must be a SHR/SRA operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1561, __PRETTY_FUNCTION__))

1561

"N must be a SHR/SRA operation to call this function")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "N must be a SHR/SRA operation to call this function"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"N must be a SHR/SRA operation to call this function\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1561, __PRETTY_FUNCTION__));

1562

1563

EVT VT = N->getValueType(0);

1564

1565

// Here we can test the type of VT and return false when the type does not

1566

// match, but since it is done prior to that call in the current context

1567

// we turned that into an assert to avoid redundant code.

1568

1569

1570

1571

// Check for AND + SRL doing several bits extract.

1572

if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))

1573

return true;

1574

1575

// we're looking for a shift of a shift

1576

uint64_t Shl_imm = 0;

1577

uint64_t Trunc_bits = 0;

1578

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {

1579

Opd0 = N->getOperand(0).getOperand(0);

1580

} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&

1581

N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {

1582

// We are looking for a shift of truncate. Truncate from i64 to i32 could

1583

// be considered as setting high 32 bits as zero. Our strategy here is to

1584

// always generate 64bit UBFM. This consistency will help the CSE pass

1585

// later find more redundancy.

1586

Opd0 = N->getOperand(0).getOperand(0);

1587

Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();

1588

VT = Opd0->getValueType(0);

1589

assert(VT == MVT::i64 && "the promoted type should be i64")((VT == MVT::i64 && "the promoted type should be i64"
) ? static_cast<void> (0) : __assert_fail ("VT == MVT::i64 && \"the promoted type should be i64\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1589, __PRETTY_FUNCTION__));

1590

} else if (BiggerPattern) {

1591

// Let's pretend a 0 shift left has been performed.

1592

// FIXME: Currently we limit this to the bigger pattern case,

1593

// because some optimizations expect AND and not UBFM

1594

Opd0 = N->getOperand(0);

1595

} else

1596

return false;

1597

1598

// Missing combines/constant folding may have left us with strange

1599

// constants.

1600

if (Shl_imm >= VT.getSizeInBits()) {

1601

DEBUG((dbgs() << Ndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { (dbgs() << N << ": Found large shift immediate, this should not happen\n"
); } } while (0)

1602

1603

return false;

1604

}

1605

1606

uint64_t Srl_imm = 0;

1607

if (!isIntImmediate(N->getOperand(1), Srl_imm))

1608

return false;

1609

1610

assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!") ? static_cast<void> (0) :
__assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1611, __PRETTY_FUNCTION__))

1611

"bad amount in shift node!")((Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!") ? static_cast<void> (0) :
__assert_fail ("Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && \"bad amount in shift node!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1611, __PRETTY_FUNCTION__));

1612

int immr = Srl_imm - Shl_imm;

1613

Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;

1614

Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;

1615

// SRA requires a signed extraction

1616

if (VT == MVT::i32)

1617

Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;

1618

else

1619

Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;

1620

return true;

1621

}

1622

1623

static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,

1624

SDValue &Opd0, unsigned &Immr, unsigned &Imms,

1625

unsigned NumberOfIgnoredLowBits = 0,

1626

bool BiggerPattern = false) {

1627

if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)

1628

return false;

1629

1630

switch (N->getOpcode()) {

1631

default:

1632

if (!N->isMachineOpcode())

1633

return false;

1634

break;

1635

case ISD::AND:

1636

return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,

1637

NumberOfIgnoredLowBits, BiggerPattern);

1638

case ISD::SRL:

1639

case ISD::SRA:

1640

return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);

1641

}

1642

1643

unsigned NOpc = N->getMachineOpcode();

1644

switch (NOpc) {

1645

default:

1646

return false;

1647

case AArch64::SBFMWri:

1648

case AArch64::UBFMWri:

1649

case AArch64::SBFMXri:

1650

case AArch64::UBFMXri:

1651

Opc = NOpc;

1652

Opd0 = N->getOperand(0);

1653

Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();

1654

Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();

1655

return true;

1656

}

1657

// Unreachable

1658

return false;

1659

}

1660

1661

SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {

1662

unsigned Opc, Immr, Imms;

1663

SDValue Opd0;

1664

if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))

1665

return nullptr;

1666

1667

EVT VT = N->getValueType(0);

1668

SDLoc dl(N);

1669

1670

// If the bit extract operation is 64bit but the original type is 32bit, we

1671

// need to add one EXTRACT_SUBREG.

1672

if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {

1673

SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),

1674

CurDAG->getTargetConstant(Imms, dl, MVT::i64)};

1675

1676

SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);

1677

SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);

1678

MachineSDNode *Node =

1679

CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,

1680

SDValue(BFM, 0), SubReg);

1681

return Node;

1682

}

1683

1684

SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),

1685

CurDAG->getTargetConstant(Imms, dl, VT)};

1686

return CurDAG->SelectNodeTo(N, Opc, VT, Ops);

1687

}

1688

1689

/// Does DstMask form a complementary pair with the mask provided by

1690

/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,

1691

/// this asks whether DstMask zeroes precisely those bits that will be set by

1692

/// the other half.

1693

static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,

1694

unsigned NumberOfIgnoredHighBits, EVT VT) {

1695

assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1696, __PRETTY_FUNCTION__))

1696

"i32 or i64 mask type expected!")(((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"i32 or i64 mask type expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1696, __PRETTY_FUNCTION__));

1697

unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;

1698

1699

APInt SignificantDstMask = APInt(BitWidth, DstMask);

1700

APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);

1701

1702

return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&

1703

(SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();

1704

}

1705

1706

// Look for bits that will be useful for later uses.

1707

// A bit is consider useless as soon as it is dropped and never used

1708

// before it as been dropped.

1709

// E.g., looking for useful bit of x

1710

// 1. y = x & 0x7

1711

// 2. z = y >> 2

1712

// After #1, x useful bits are 0x7, then the useful bits of x, live through

1713

// y.

1714

// After #2, the useful bits of x are 0x4.

1715

// However, if x is used on an unpredicatable instruction, then all its bits

1716

// are useful.

1717

// E.g.

1718

// 1. y = x & 0x7

1719

// 2. z = y >> 2

1720

// 3. str x, [@x]

1721

static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);

1722

1723

static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,

1724

unsigned Depth) {

1725

uint64_t Imm =

1726

cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();

1727

Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());

1728

UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);

1729

getUsefulBits(Op, UsefulBits, Depth + 1);

1730

}

1731

1732

static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,

1733

uint64_t Imm, uint64_t MSB,

1734

unsigned Depth) {

1735

// inherit the bitwidth value

1736

APInt OpUsefulBits(UsefulBits);

1737

OpUsefulBits = 1;

1738

1739

if (MSB >= Imm) {

1740

OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);

1741

--OpUsefulBits;

1742

// The interesting part will be in the lower part of the result

1743

getUsefulBits(Op, OpUsefulBits, Depth + 1);

1744

// The interesting part was starting at Imm in the argument

1745

OpUsefulBits = OpUsefulBits.shl(Imm);

1746

} else {

1747

OpUsefulBits = OpUsefulBits.shl(MSB + 1);

1748

--OpUsefulBits;

1749

// The interesting part will be shifted in the result

1750

OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);

1751

getUsefulBits(Op, OpUsefulBits, Depth + 1);

1752

// The interesting part was at zero in the argument

1753

OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);

1754

}

1755

1756

UsefulBits &= OpUsefulBits;

1757

}

1758

1759

static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,

1760

unsigned Depth) {

1761

uint64_t Imm =

1762

cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();

1763

uint64_t MSB =

1764

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1765

1766

getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);

1767

}

1768

1769

static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,

1770

unsigned Depth) {

1771

uint64_t ShiftTypeAndValue =

1772

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1773

APInt Mask(UsefulBits);

1774

Mask.clearAllBits();

1775

Mask.flipAllBits();

1776

1777

if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {

1778

// Shift Left

1779

uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);

1780

Mask = Mask.shl(ShiftAmt);

1781

getUsefulBits(Op, Mask, Depth + 1);

1782

Mask = Mask.lshr(ShiftAmt);

1783

} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {

1784

// Shift Right

1785

// We do not handle AArch64_AM::ASR, because the sign will change the

1786

// number of useful bits

1787

uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);

1788

Mask = Mask.lshr(ShiftAmt);

1789

getUsefulBits(Op, Mask, Depth + 1);

1790

Mask = Mask.shl(ShiftAmt);

1791

} else

1792

return;

1793

1794

UsefulBits &= Mask;

1795

}

1796

1797

static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,

1798

unsigned Depth) {

1799

uint64_t Imm =

1800

cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();

1801

uint64_t MSB =

1802

cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();

1803

1804

if (Op.getOperand(1) == Orig)

1805

return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);

1806

1807

APInt OpUsefulBits(UsefulBits);

1808

OpUsefulBits = 1;

1809

1810

if (MSB >= Imm) {

1811

OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);

1812

--OpUsefulBits;

1813

UsefulBits &= ~OpUsefulBits;

1814

getUsefulBits(Op, UsefulBits, Depth + 1);

1815

} else {

1816

OpUsefulBits = OpUsefulBits.shl(MSB + 1);

1817

--OpUsefulBits;

1818

UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));

1819

getUsefulBits(Op, UsefulBits, Depth + 1);

1820

}

1821

}

1822

1823

static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,

1824

SDValue Orig, unsigned Depth) {

1825

1826

// Users of this node should have already been instruction selected

1827

// FIXME: Can we turn that into an assert?

1828

if (!UserNode->isMachineOpcode())

1829

return;

1830

1831

switch (UserNode->getMachineOpcode()) {

1832

default:

1833

return;

1834

case AArch64::ANDSWri:

1835

case AArch64::ANDSXri:

1836

case AArch64::ANDWri:

1837

case AArch64::ANDXri:

1838

// We increment Depth only when we call the getUsefulBits

1839

return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,

1840

Depth);

1841

case AArch64::UBFMWri:

1842

case AArch64::UBFMXri:

1843

return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);

1844

1845

case AArch64::ORRWrs:

1846

case AArch64::ORRXrs:

1847

if (UserNode->getOperand(1) != Orig)

1848

return;

1849

return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,

1850

Depth);

1851

case AArch64::BFMWri:

1852

case AArch64::BFMXri:

1853

return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);

1854

}

1855

}

1856

1857

static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {

1858

if (Depth >= 6)

1859

return;

1860

// Initialize UsefulBits

1861

if (!Depth) {

1862

unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();

1863

// At the beginning, assume every produced bits is useful

1864

UsefulBits = APInt(Bitwidth, 0);

1865

UsefulBits.flipAllBits();

1866

}

1867

APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);

1868

1869

for (SDNode *Node : Op.getNode()->uses()) {

1870

// A use cannot produce useful bits

1871

APInt UsefulBitsForUse = APInt(UsefulBits);

1872

getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);

1873

UsersUsefulBits |= UsefulBitsForUse;

1874

}

1875

// UsefulBits contains the produced bits that are meaningful for the

1876

// current definition, thus a user cannot make a bit meaningful at

1877

// this point

1878

UsefulBits &= UsersUsefulBits;

1879

}

1880

1881

/// Create a machine node performing a notional SHL of Op by ShlAmount. If

1882

/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is

1883

/// 0, return Op unchanged.

1884

static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {

1885

if (ShlAmount == 0)

1886

return Op;

1887

1888

EVT VT = Op.getValueType();

1889

SDLoc dl(Op);

1890

unsigned BitWidth = VT.getSizeInBits();

1891

unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;

1892

1893

SDNode *ShiftNode;

1894

if (ShlAmount > 0) {

1895

// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt

1896

ShiftNode = CurDAG->getMachineNode(

1897

UBFMOpc, dl, VT, Op,

1898

CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),

1899

CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));

1900

} else {

1901

// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1

1902

assert(ShlAmount < 0 && "expected right shift")((ShlAmount < 0 && "expected right shift") ? static_cast
<void> (0) : __assert_fail ("ShlAmount < 0 && \"expected right shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1902, __PRETTY_FUNCTION__));

1903

int ShrAmount = -ShlAmount;

1904

ShiftNode = CurDAG->getMachineNode(

1905

UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),

1906

CurDAG->getTargetConstant(BitWidth - 1, dl, VT));

1907

}

1908

1909

return SDValue(ShiftNode, 0);

1910

}

1911

1912

/// Does this tree qualify as an attempt to move a bitfield into position,

1913

/// essentially "(and (shl VAL, N), Mask)".

1914

static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,

1915

bool BiggerPattern,

1916

SDValue &Src, int &ShiftAmount,

1917

int &MaskWidth) {

1918

EVT VT = Op.getValueType();

1919

unsigned BitWidth = VT.getSizeInBits();

1920

(void)BitWidth;

1921

assert(BitWidth == 32 || BitWidth == 64)((BitWidth == 32 || BitWidth == 64) ? static_cast<void>
(0) : __assert_fail ("BitWidth == 32 || BitWidth == 64", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1921, __PRETTY_FUNCTION__));

1922

1923

APInt KnownZero, KnownOne;

1924

CurDAG->computeKnownBits(Op, KnownZero, KnownOne);

1925

1926

// Non-zero in the sense that they're not provably zero, which is the key

1927

// point if we want to use this value

1928

uint64_t NonZeroBits = (~KnownZero).getZExtValue();

1929

1930

// Discard a constant AND mask if present. It's safe because the node will

1931

// already have been factored into the computeKnownBits calculation above.

1932

uint64_t AndImm;

1933

if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {

1934

assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0)(((~APInt(BitWidth, AndImm) & ~KnownZero) == 0) ? static_cast
<void> (0) : __assert_fail ("(~APInt(BitWidth, AndImm) & ~KnownZero) == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1934, __PRETTY_FUNCTION__));

1935

Op = Op.getOperand(0);

1936

}

1937

1938

// Don't match if the SHL has more than one use, since then we'll end up

1939

// generating SHL+UBFIZ instead of just keeping SHL+AND.

1940

if (!BiggerPattern && !Op.hasOneUse())

1941

return false;

1942

1943

uint64_t ShlImm;

1944

if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))

1945

return false;

1946

Op = Op.getOperand(0);

1947

1948

if (!isShiftedMask_64(NonZeroBits))

1949

return false;

1950

1951

ShiftAmount = countTrailingZeros(NonZeroBits);

1952

MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);

1953

1954

// BFI encompasses sufficiently many nodes that it's worth inserting an extra

1955

// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL

1956

// amount. BiggerPattern is true when this pattern is being matched for BFI,

1957

// BiggerPattern is false when this pattern is being matched for UBFIZ, in

1958

// which case it is not profitable to insert an extra shift.

1959

if (ShlImm - ShiftAmount != 0 && !BiggerPattern)

1960

return false;

1961

Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);

1962

1963

return true;

1964

}

1965

1966

// Given a OR operation, check if we have the following pattern

1967

// ubfm c, b, imm, imm2 (or something that does the same jobs, see

1968

// isBitfieldExtractOp)

1969

// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and

1970

// countTrailingZeros(mask2) == imm2 - imm + 1

1971

// f = d | c

1972

// if yes, given reference arguments will be update so that one can replace

1973

// the OR instruction with:

1974

// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2

1975

static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,

1976

SDValue &Src, unsigned &ImmR,

1977

unsigned &ImmS, const APInt &UsefulBits,

1978

SelectionDAG *CurDAG) {

1979

assert(N->getOpcode() == ISD::OR && "Expect a OR operation")((N->getOpcode() == ISD::OR && "Expect a OR operation"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Expect a OR operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 1979, __PRETTY_FUNCTION__));

1980

1981

// Set Opc

1982

EVT VT = N->getValueType(0);

1983

if (VT == MVT::i32)

1984

Opc = AArch64::BFMWri;

1985

else if (VT == MVT::i64)

1986

Opc = AArch64::BFMXri;

1987

else

1988

return false;

1989

1990

// Because of simplify-demanded-bits in DAGCombine, involved masks may not

1991

// have the expected shape. Try to undo that.

1992

1993

unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();

1994

unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();

1995

1996

// OR is commutative, check all combinations of operand order and values of

1997

// BiggerPattern, i.e.

1998

// Opd0, Opd1, BiggerPattern=false

1999

// Opd1, Opd0, BiggerPattern=false

2000

// Opd0, Opd1, BiggerPattern=true

2001

// Opd1, Opd0, BiggerPattern=true

2002

// Several of these combinations may match, so check with BiggerPattern=false

2003

// first since that will produce better results by matching more instructions

2004

// and/or inserting fewer extra instructions.

2005

for (int I = 0; I < 4; ++I) {

2006

2007

bool BiggerPattern = I / 2;

2008

SDNode *OrOpd0 = N->getOperand(I % 2).getNode();

2009

SDValue OrOpd1Val = N->getOperand((I + 1) % 2);

2010

SDNode *OrOpd1 = OrOpd1Val.getNode();

2011

2012

unsigned BFXOpc;

2013

int DstLSB, Width;

2014

if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,

2015

NumberOfIgnoredLowBits, BiggerPattern)) {

2016

// Check that the returned opcode is compatible with the pattern,

2017

// i.e., same type and zero extended (U and not S)

2018

if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||

2019

(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))

2020

continue;

2021

2022

// Compute the width of the bitfield insertion

2023

DstLSB = 0;

2024

Width = ImmS - ImmR + 1;

2025

// FIXME: This constraint is to catch bitfield insertion we may

2026

// want to widen the pattern if we want to grab general bitfied

2027

// move case

2028

if (Width <= 0)

2029

continue;

2030

2031

// If the mask on the insertee is correct, we have a BFXIL operation. We

2032

// can share the ImmR and ImmS values from the already-computed UBFM.

2033

} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),

2034

BiggerPattern,

2035

Src, DstLSB, Width)) {

2036

ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();

2037

ImmS = Width - 1;

2038

} else

2039

continue;

2040

2041

// Check the second part of the pattern

2042

EVT VT = OrOpd1->getValueType(0);

2043

assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected OR operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2043, __PRETTY_FUNCTION__));

2044

2045

// Compute the Known Zero for the candidate of the first operand.

2046

// This allows to catch more general case than just looking for

2047

// AND with imm. Indeed, simplify-demanded-bits may have removed

2048

// the AND instruction because it proves it was useless.

2049

APInt KnownZero, KnownOne;

2050

CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);

2051

2052

// Check if there is enough room for the second operand to appear

2053

// in the first one

2054

APInt BitsToBeInserted =

2055

APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);

2056

2057

if ((BitsToBeInserted & ~KnownZero) != 0)

2058

continue;

2059

2060

// Set the first operand

2061

uint64_t Imm;

2062

if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&

2063

isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))

2064

// In that case, we can eliminate the AND

2065

Dst = OrOpd1->getOperand(0);

2066

else

2067

// Maybe the AND has been removed by simplify-demanded-bits

2068

// or is useful because it discards more bits

2069

Dst = OrOpd1Val;

2070

2071

// both parts match

2072

return true;

2073

}

2074

2075

return false;

2076

}

2077

2078

SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {

2079

if (N->getOpcode() != ISD::OR)

2080

return nullptr;

2081

2082

unsigned Opc;

2083

unsigned LSB, MSB;

2084

SDValue Opd0, Opd1;

2085

EVT VT = N->getValueType(0);

2086

APInt NUsefulBits;

2087

getUsefulBits(SDValue(N, 0), NUsefulBits);

2088

2089

// If all bits are not useful, just return UNDEF.

2090

if (!NUsefulBits)

2091

return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);

2092

2093

if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,

2094

CurDAG))

2095

return nullptr;

2096

2097

SDLoc dl(N);

2098

SDValue Ops[] = { Opd0,

2099

Opd1,

2100

CurDAG->getTargetConstant(LSB, dl, VT),

2101

CurDAG->getTargetConstant(MSB, dl, VT) };

2102

return CurDAG->SelectNodeTo(N, Opc, VT, Ops);

2103

}

2104

2105

/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the

2106

/// equivalent of a left shift by a constant amount followed by an and masking

2107

/// out a contiguous set of bits.

2108

SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {

2109

if (N->getOpcode() != ISD::AND)

2110

return nullptr;

2111

2112

EVT VT = N->getValueType(0);

2113

unsigned Opc;

2114

if (VT == MVT::i32)

2115

Opc = AArch64::UBFMWri;

2116

else if (VT == MVT::i64)

2117

Opc = AArch64::UBFMXri;

2118

else

2119

return nullptr;

2120

2121

SDValue Op0;

2122

int DstLSB, Width;

2123

if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,

2124

Op0, DstLSB, Width))

2125

return nullptr;

2126

2127

// ImmR is the rotate right amount.

2128

unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();

2129

// ImmS is the most significant bit of the source to be moved.

2130

unsigned ImmS = Width - 1;

2131

2132

SDLoc DL(N);

2133

SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),

2134

CurDAG->getTargetConstant(ImmS, DL, VT)};

2135

return CurDAG->SelectNodeTo(N, Opc, VT, Ops);

2136

}

2137

2138

bool

2139

AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,

2140

unsigned RegWidth) {

2141

APFloat FVal(0.0);

2142

if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))

2143

FVal = CN->getValueAPF();

2144

else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {

2145

// Some otherwise illegal constants are allowed in this case.

2146

if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||

2147

!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))

2148

return false;

2149

2150

ConstantPoolSDNode *CN =

2151

dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));

2152

FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();

2153

} else

2154

return false;

2155

2156

// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits

2157

// is between 1 and 32 for a destination w-register, or 1 and 64 for an

2158

// x-register.

2159

2160

// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we

2161

// want THIS_NODE to be 2^fbits. This is much easier to deal with using

2162

// integers.

2163

bool IsExact;

2164

2165

// fbits is between 1 and 64 in the worst-case, which means the fmul

2166

// could have 2^64 as an actual operand. Need 65 bits of precision.

2167

APSInt IntVal(65, true);

2168

FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);

2169

2170

// N.b. isPowerOf2 also checks for > 0.

2171

if (!IsExact || !IntVal.isPowerOf2()) return false;

2172

unsigned FBits = IntVal.logBase2();

2173

2174

// Checks above should have guaranteed that we haven't lost information in

2175

// finding FBits, but it must still be in range.

2176

if (FBits == 0 || FBits > RegWidth) return false;

2177

2178

FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);

2179

return true;

2180

}

2181

2182

// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields

2183

// of the string and obtains the integer values from them and combines these

2184

// into a single value to be used in the MRS/MSR instruction.

2185

static int getIntOperandFromRegisterString(StringRef RegString) {

2186

SmallVector<StringRef, 5> Fields;

2187

RegString.split(Fields, ':');

2188

2189

if (Fields.size() == 1)

2190

return -1;

2191

2192

assert(Fields.size() == 5((Fields.size() == 5 && "Invalid number of fields in read register string"
) ? static_cast<void> (0) : __assert_fail ("Fields.size() == 5 && \"Invalid number of fields in read register string\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2193, __PRETTY_FUNCTION__))

2193

&& "Invalid number of fields in read register string")((Fields.size() == 5 && "Invalid number of fields in read register string"
) ? static_cast<void> (0) : __assert_fail ("Fields.size() == 5 && \"Invalid number of fields in read register string\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2193, __PRETTY_FUNCTION__));

2194

2195

SmallVector<int, 5> Ops;

2196

bool AllIntFields = true;

2197

2198

for (StringRef Field : Fields) {

2199

unsigned IntField;

2200

AllIntFields &= !Field.getAsInteger(10, IntField);

2201

Ops.push_back(IntField);

2202

}

2203

2204

assert(AllIntFields &&((AllIntFields && "Unexpected non-integer value in special register string."
) ? static_cast<void> (0) : __assert_fail ("AllIntFields && \"Unexpected non-integer value in special register string.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2205, __PRETTY_FUNCTION__))

2205

"Unexpected non-integer value in special register string.")((AllIntFields && "Unexpected non-integer value in special register string."
) ? static_cast<void> (0) : __assert_fail ("AllIntFields && \"Unexpected non-integer value in special register string.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2205, __PRETTY_FUNCTION__));

2206

2207

// Need to combine the integer fields of the string into a single value

2208

// based on the bit encoding of MRS/MSR instruction.

2209

return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |

2210

(Ops[3] << 3) | (Ops[4]);

2211

}

2212

2213

// Lower the read_register intrinsic to an MRS instruction node if the special

2214

// register string argument is either of the form detailed in the ALCE (the

2215

// form described in getIntOperandsFromRegsterString) or is a named register

2216

// known by the MRS SysReg mapper.

2217

SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {

2218

const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));

2219

const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));

2220

SDLoc DL(N);

2221

2222

int Reg = getIntOperandFromRegisterString(RegString->getString());

2223

if (Reg != -1)

2224

return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),

2225

MVT::Other,

2226

CurDAG->getTargetConstant(Reg, DL, MVT::i32),

2227

N->getOperand(0));

2228

2229

// Use the sysreg mapper to map the remaining possible strings to the

2230

// value for the register to be used for the instruction operand.

2231

AArch64SysReg::MRSMapper mapper;

2232

bool IsValidSpecialReg;

2233

Reg = mapper.fromString(RegString->getString(),

2234

Subtarget->getFeatureBits(),

2235

IsValidSpecialReg);

2236

if (IsValidSpecialReg)

2237

return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),

2238

MVT::Other,

2239

CurDAG->getTargetConstant(Reg, DL, MVT::i32),

2240

N->getOperand(0));

2241

2242

return nullptr;

2243

}

2244

2245

// Lower the write_register intrinsic to an MSR instruction node if the special

2246

// register string argument is either of the form detailed in the ALCE (the

2247

// form described in getIntOperandsFromRegsterString) or is a named register

2248

// known by the MSR SysReg mapper.

2249

SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {

2250

const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));

2251

const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));

2252

SDLoc DL(N);

2253

2254

int Reg = getIntOperandFromRegisterString(RegString->getString());

2255

if (Reg != -1)

2256

return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,

2257

CurDAG->getTargetConstant(Reg, DL, MVT::i32),

2258

N->getOperand(2), N->getOperand(0));

2259

2260

// Check if the register was one of those allowed as the pstatefield value in

2261

// the MSR (immediate) instruction. To accept the values allowed in the

2262

// pstatefield for the MSR (immediate) instruction, we also require that an

2263

// immediate value has been provided as an argument, we know that this is

2264

// the case as it has been ensured by semantic checking.

2265

AArch64PState::PStateMapper PMapper;

2266

bool IsValidSpecialReg;

2267

Reg = PMapper.fromString(RegString->getString(),

2268

Subtarget->getFeatureBits(),

2269

IsValidSpecialReg);

2270

if (IsValidSpecialReg) {

2271

assert (isa<ConstantSDNode>(N->getOperand(2))((isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression."
) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2)) && \"Expected a constant integer expression.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2272, __PRETTY_FUNCTION__))

2272

&& "Expected a constant integer expression.")((isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression."
) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2)) && \"Expected a constant integer expression.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2272, __PRETTY_FUNCTION__));

2273

uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();

2274

unsigned State;

2275

if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {

2276

assert(Immed < 2 && "Bad imm")((Immed < 2 && "Bad imm") ? static_cast<void>
(0) : __assert_fail ("Immed < 2 && \"Bad imm\"", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2276, __PRETTY_FUNCTION__));

2277

State = AArch64::MSRpstateImm1;

2278

} else {

2279

assert(Immed < 16 && "Bad imm")((Immed < 16 && "Bad imm") ? static_cast<void>
(0) : __assert_fail ("Immed < 16 && \"Bad imm\"",
"/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2279, __PRETTY_FUNCTION__));

2280

State = AArch64::MSRpstateImm4;

2281

}

2282

return CurDAG->getMachineNode(State, DL, MVT::Other,

2283

CurDAG->getTargetConstant(Reg, DL, MVT::i32),

2284

CurDAG->getTargetConstant(Immed, DL, MVT::i16),

2285

N->getOperand(0));

2286

}

2287

2288

// Use the sysreg mapper to attempt to map the remaining possible strings

2289

// to the value for the register to be used for the MSR (register)

2290

// instruction operand.

2291

AArch64SysReg::MSRMapper Mapper;

2292

Reg = Mapper.fromString(RegString->getString(),

2293

Subtarget->getFeatureBits(),

2294

IsValidSpecialReg);

2295

2296

if (IsValidSpecialReg)

2297

return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,

2298

CurDAG->getTargetConstant(Reg, DL, MVT::i32),

2299

N->getOperand(2), N->getOperand(0));

2300

2301

return nullptr;

2302

}

2303

2304

SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {

2305

// Dump information about the Node being selected

2306

DEBUG(errs() << "Selecting: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "Selecting: "; } } while (
0);

2307

DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Node->dump(CurDAG); } } while (0);

2308

DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "\n"; } } while (0);

2309

2310

// If we have a custom node, we already have selected!

2311

if (Node->isMachineOpcode()) {

2312

DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "== "; Node->dump(CurDAG
); errs() << "\n"; } } while (0);

2313

Node->setNodeId(-1);

2314

return nullptr;

2315

}

2316

2317

// Few custom selection stuff.

2318

SDNode *ResNode = nullptr;

2319

EVT VT = Node->getValueType(0);

2320

2321

switch (Node->getOpcode()) {

2322

default:

2323

break;

2324

2325

case ISD::READ_REGISTER:

2326

if (SDNode *Res = SelectReadRegister(Node))

2327

return Res;

2328

break;

2329

2330

case ISD::WRITE_REGISTER:

2331

if (SDNode *Res = SelectWriteRegister(Node))

2332

return Res;

2333

break;

2334

2335

case ISD::ADD:

2336

if (SDNode *I = SelectMLAV64LaneV128(Node))

2337

return I;

2338

break;

2339

2340

case ISD::LOAD: {

2341

// Try to select as an indexed load. Fall through to normal processing

2342

// if we can't.

2343

bool Done = false;

2344

SDNode *I = SelectIndexedLoad(Node, Done);

2345

if (Done)

2346

return I;

2347

break;

2348

}

2349

2350

case ISD::SRL:

2351

case ISD::AND:

2352

case ISD::SRA:

2353

if (SDNode *I = SelectBitfieldExtractOp(Node))

2354

return I;

2355

if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))

2356

return I;

2357

break;

2358

2359

case ISD::OR:

2360

if (SDNode *I = SelectBitfieldInsertOp(Node))

2361

return I;

2362

break;

2363

2364

case ISD::EXTRACT_VECTOR_ELT: {

2365

// Extracting lane zero is a special case where we can just use a plain

2366

// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for

2367

// the rest of the compiler, especially the register allocator and copyi

2368

// propagation, to reason about, so is preferred when it's possible to

2369

// use it.

2370

ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));

2371

// Bail and use the default Select() for non-zero lanes.

2372

if (LaneNode->getZExtValue() != 0)

2373

break;

2374

// If the element type is not the same as the result type, likewise

2375

// bail and use the default Select(), as there's more to do than just

2376

// a cross-class COPY. This catches extracts of i8 and i16 elements

2377

// since they will need an explicit zext.

2378

if (VT != Node->getOperand(0).getValueType().getVectorElementType())

2379

break;

2380

unsigned SubReg;

2381

switch (Node->getOperand(0)

2382

.getValueType()

2383

.getVectorElementType()

2384

.getSizeInBits()) {

2385

default:

2386

llvm_unreachable("Unexpected vector element type!")::llvm::llvm_unreachable_internal("Unexpected vector element type!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2386);

2387

case 64:

2388

SubReg = AArch64::dsub;

2389

break;

2390

case 32:

2391

SubReg = AArch64::ssub;

2392

break;

2393

case 16:

2394

SubReg = AArch64::hsub;

2395

break;

2396

case 8:

2397

llvm_unreachable("unexpected zext-requiring extract element!")::llvm::llvm_unreachable_internal("unexpected zext-requiring extract element!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp"
, 2397);

2398

}

2399

SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,

2400

Node->getOperand(0));

2401

DEBUG(dbgs() << "ISEL: Custom selection!\n=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "ISEL: Custom selection!\n=> "
; } } while (0);

2402

DEBUG(Extract->dumpr(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Extract->dumpr(CurDAG); } } while (0);

2403

DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "\n"; } } while (0);

2404

return Extract.getNode();

2405

}

2406

case ISD::Constant: {

2407

// Materialize zero constants as copies from WZR/XZR. This allows

2408

// the coalescer to propagate these into other instructions.

2409

ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);

2410

if (ConstNode->isNullValue()) {

2411

if (VT == MVT::i32)

2412

return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),

2413

AArch64::WZR, MVT::i32).getNode();

2414

else if (VT == MVT::i64)

2415

return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),

2416

AArch64::XZR, MVT::i64).getNode();

2417

}

2418

break;

2419

}

2420

2421

case ISD::FrameIndex: {

2422

// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.

2423

int FI = cast<FrameIndexSDNode>(Node)->getIndex();

2424

unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);

2425

const TargetLowering *TLI = getTargetLowering();

2426

SDValue TFI = CurDAG->getTargetFrameIndex(

2427

FI, TLI->getPointerTy(CurDAG->getDataLayout()));

2428

SDLoc DL(Node);

2429

SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),

2430

CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };

2431

return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);

2432

}

2433

case ISD::INTRINSIC_W_CHAIN: {

2434

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

2435

switch (IntNo) {

2436

default:

2437

break;

2438

case Intrinsic::aarch64_ldaxp:

2439

case Intrinsic::aarch64_ldxp: {

2440

unsigned Op =

2441

IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;

2442

SDValue MemAddr = Node->getOperand(2);

2443

SDLoc DL(Node);

2444

SDValue Chain = Node->getOperand(0);

2445

2446

SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,

2447

MVT::Other, MemAddr, Chain);

2448

2449

// Transfer memoperands.

2450

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

2451

MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();

2452

cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);

2453

return Ld;

2454

}

2455

case Intrinsic::aarch64_stlxp:

2456

case Intrinsic::aarch64_stxp: {

2457

unsigned Op =

2458

IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;

2459

SDLoc DL(Node);

2460

SDValue Chain = Node->getOperand(0);

2461

SDValue ValLo = Node->getOperand(2);

2462

SDValue ValHi = Node->getOperand(3);

2463

SDValue MemAddr = Node->getOperand(4);

2464

2465

// Place arguments in the right order.

2466

SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};

2467

2468

SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);

2469

// Transfer memoperands.

2470

MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

2471

MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();

2472

cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

2473

2474

return St;

2475

}

2476

case Intrinsic::aarch64_neon_ld1x2:

2477

if (VT == MVT::v8i8)

2478

return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);

2479

else if (VT == MVT::v16i8)

2480

return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);

2481

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2482

return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);

2483

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2484

return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);

2485

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2486

return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);

2487

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2488

return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);

2489

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2490

return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);

2491

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2492

return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);

2493

break;

2494

case Intrinsic::aarch64_neon_ld1x3:

2495

if (VT == MVT::v8i8)

2496

return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);

2497

else if (VT == MVT::v16i8)

2498

return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);

2499

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2500

return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);

2501

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2502

return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);

2503

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2504

return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);

2505

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2506

return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);

2507

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2508

return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);

2509

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2510

return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);

2511

break;

2512

case Intrinsic::aarch64_neon_ld1x4:

2513

if (VT == MVT::v8i8)

2514

return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);

2515

else if (VT == MVT::v16i8)

2516

return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);

2517

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2518

return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);

2519

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2520

return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);

2521

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2522

return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);

2523

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2524

return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);

2525

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2526

return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);

2527

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2528

return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);

2529

break;

2530

case Intrinsic::aarch64_neon_ld2:

2531

if (VT == MVT::v8i8)

2532

return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);

2533

else if (VT == MVT::v16i8)

2534

return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);

2535

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2536

return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);

2537

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2538

return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);

2539

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2540

return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);

2541

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2542

return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);

2543

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2544

return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);

2545

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2546

return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);

2547

break;

2548

case Intrinsic::aarch64_neon_ld3:

2549

if (VT == MVT::v8i8)

2550

return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);

2551

else if (VT == MVT::v16i8)

2552

return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);

2553

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2554

return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);

2555

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2556

return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);

2557

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2558

return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);

2559

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2560

return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);

2561

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2562

return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);

2563

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2564

return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);

2565

break;

2566

case Intrinsic::aarch64_neon_ld4:

2567

if (VT == MVT::v8i8)

2568

return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);

2569

else if (VT == MVT::v16i8)

2570

return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);

2571

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2572

return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);

2573

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2574

return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);

2575

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2576

return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);

2577

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2578

return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);

2579

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2580

return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);

2581

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2582

return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);

2583

break;

2584

case Intrinsic::aarch64_neon_ld2r:

2585

if (VT == MVT::v8i8)

2586

return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);

2587

else if (VT == MVT::v16i8)

2588

return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);

2589

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2590

return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);

2591

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2592

return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);

2593

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2594

return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);

2595

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2596

return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);

2597

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2598

return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);

2599

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2600

return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);

2601

break;

2602

case Intrinsic::aarch64_neon_ld3r:

2603

if (VT == MVT::v8i8)

2604

return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);

2605

else if (VT == MVT::v16i8)

2606

return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);

2607

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2608

return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);

2609

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2610

return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);

2611

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2612

return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);

2613

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2614

return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);

2615

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2616

return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);

2617

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2618

return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);

2619

break;

2620

case Intrinsic::aarch64_neon_ld4r:

2621

if (VT == MVT::v8i8)

2622

return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);

2623

else if (VT == MVT::v16i8)

2624

return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);

2625

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2626

return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);

2627

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2628

return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);

2629

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2630

return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);

2631

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2632

return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);

2633

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2634

return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);

2635

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2636

return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);

2637

break;

2638

case Intrinsic::aarch64_neon_ld2lane:

2639

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2640

return SelectLoadLane(Node, 2, AArch64::LD2i8);

2641

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2642

VT == MVT::v8f16)

2643

return SelectLoadLane(Node, 2, AArch64::LD2i16);

2644

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2645

VT == MVT::v2f32)

2646

return SelectLoadLane(Node, 2, AArch64::LD2i32);

2647

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2648

VT == MVT::v1f64)

2649

return SelectLoadLane(Node, 2, AArch64::LD2i64);

2650

break;

2651

case Intrinsic::aarch64_neon_ld3lane:

2652

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2653

return SelectLoadLane(Node, 3, AArch64::LD3i8);

2654

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2655

VT == MVT::v8f16)

2656

return SelectLoadLane(Node, 3, AArch64::LD3i16);

2657

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2658

VT == MVT::v2f32)

2659

return SelectLoadLane(Node, 3, AArch64::LD3i32);

2660

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2661

VT == MVT::v1f64)

2662

return SelectLoadLane(Node, 3, AArch64::LD3i64);

2663

break;

2664

case Intrinsic::aarch64_neon_ld4lane:

2665

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2666

return SelectLoadLane(Node, 4, AArch64::LD4i8);

2667

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2668

VT == MVT::v8f16)

2669

return SelectLoadLane(Node, 4, AArch64::LD4i16);

2670

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2671

VT == MVT::v2f32)

2672

return SelectLoadLane(Node, 4, AArch64::LD4i32);

2673

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2674

VT == MVT::v1f64)

2675

return SelectLoadLane(Node, 4, AArch64::LD4i64);

2676

break;

2677

}

2678

} break;

2679

case ISD::INTRINSIC_WO_CHAIN: {

2680

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();

2681

switch (IntNo) {

2682

default:

2683

break;

2684

case Intrinsic::aarch64_neon_tbl2:

2685

return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two

2686

: AArch64::TBLv16i8Two,

2687

false);

2688

case Intrinsic::aarch64_neon_tbl3:

2689

return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three

2690

: AArch64::TBLv16i8Three,

2691

false);

2692

case Intrinsic::aarch64_neon_tbl4:

2693

return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four

2694

: AArch64::TBLv16i8Four,

2695

false);

2696

case Intrinsic::aarch64_neon_tbx2:

2697

return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two

2698

: AArch64::TBXv16i8Two,

2699

true);

2700

case Intrinsic::aarch64_neon_tbx3:

2701

return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three

2702

: AArch64::TBXv16i8Three,

2703

true);

2704

case Intrinsic::aarch64_neon_tbx4:

2705

return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four

2706

: AArch64::TBXv16i8Four,

2707

true);

2708

case Intrinsic::aarch64_neon_smull:

2709

case Intrinsic::aarch64_neon_umull:

2710

if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))

2711

return N;

2712

break;

2713

}

2714

break;

2715

}

2716

case ISD::INTRINSIC_VOID: {

2717

unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

2718

if (Node->getNumOperands() >= 3)

2719

VT = Node->getOperand(2)->getValueType(0);

2720

switch (IntNo) {

2721

default:

2722

break;

2723

case Intrinsic::aarch64_neon_st1x2: {

2724

if (VT == MVT::v8i8)

2725

return SelectStore(Node, 2, AArch64::ST1Twov8b);

2726

else if (VT == MVT::v16i8)

2727

return SelectStore(Node, 2, AArch64::ST1Twov16b);

2728

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2729

return SelectStore(Node, 2, AArch64::ST1Twov4h);

2730

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2731

return SelectStore(Node, 2, AArch64::ST1Twov8h);

2732

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2733

return SelectStore(Node, 2, AArch64::ST1Twov2s);

2734

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2735

return SelectStore(Node, 2, AArch64::ST1Twov4s);

2736

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2737

return SelectStore(Node, 2, AArch64::ST1Twov2d);

2738

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2739

return SelectStore(Node, 2, AArch64::ST1Twov1d);

2740

break;

2741

}

2742

case Intrinsic::aarch64_neon_st1x3: {

2743

if (VT == MVT::v8i8)

2744

return SelectStore(Node, 3, AArch64::ST1Threev8b);

2745

else if (VT == MVT::v16i8)

2746

return SelectStore(Node, 3, AArch64::ST1Threev16b);

2747

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2748

return SelectStore(Node, 3, AArch64::ST1Threev4h);

2749

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2750

return SelectStore(Node, 3, AArch64::ST1Threev8h);

2751

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2752

return SelectStore(Node, 3, AArch64::ST1Threev2s);

2753

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2754

return SelectStore(Node, 3, AArch64::ST1Threev4s);

2755

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2756

return SelectStore(Node, 3, AArch64::ST1Threev2d);

2757

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2758

return SelectStore(Node, 3, AArch64::ST1Threev1d);

2759

break;

2760

}

2761

case Intrinsic::aarch64_neon_st1x4: {

2762

if (VT == MVT::v8i8)

2763

return SelectStore(Node, 4, AArch64::ST1Fourv8b);

2764

else if (VT == MVT::v16i8)

2765

return SelectStore(Node, 4, AArch64::ST1Fourv16b);

2766

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2767

return SelectStore(Node, 4, AArch64::ST1Fourv4h);

2768

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2769

return SelectStore(Node, 4, AArch64::ST1Fourv8h);

2770

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2771

return SelectStore(Node, 4, AArch64::ST1Fourv2s);

2772

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2773

return SelectStore(Node, 4, AArch64::ST1Fourv4s);

2774

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2775

return SelectStore(Node, 4, AArch64::ST1Fourv2d);

2776

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2777

return SelectStore(Node, 4, AArch64::ST1Fourv1d);

2778

break;

2779

}

2780

case Intrinsic::aarch64_neon_st2: {

2781

if (VT == MVT::v8i8)

2782

return SelectStore(Node, 2, AArch64::ST2Twov8b);

2783

else if (VT == MVT::v16i8)

2784

return SelectStore(Node, 2, AArch64::ST2Twov16b);

2785

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2786

return SelectStore(Node, 2, AArch64::ST2Twov4h);

2787

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2788

return SelectStore(Node, 2, AArch64::ST2Twov8h);

2789

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2790

return SelectStore(Node, 2, AArch64::ST2Twov2s);

2791

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2792

return SelectStore(Node, 2, AArch64::ST2Twov4s);

2793

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2794

return SelectStore(Node, 2, AArch64::ST2Twov2d);

2795

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2796

return SelectStore(Node, 2, AArch64::ST1Twov1d);

2797

break;

2798

}

2799

case Intrinsic::aarch64_neon_st3: {

2800

if (VT == MVT::v8i8)

2801

return SelectStore(Node, 3, AArch64::ST3Threev8b);

2802

else if (VT == MVT::v16i8)

2803

return SelectStore(Node, 3, AArch64::ST3Threev16b);

2804

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2805

return SelectStore(Node, 3, AArch64::ST3Threev4h);

2806

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2807

return SelectStore(Node, 3, AArch64::ST3Threev8h);

2808

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2809

return SelectStore(Node, 3, AArch64::ST3Threev2s);

2810

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2811

return SelectStore(Node, 3, AArch64::ST3Threev4s);

2812

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2813

return SelectStore(Node, 3, AArch64::ST3Threev2d);

2814

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2815

return SelectStore(Node, 3, AArch64::ST1Threev1d);

2816

break;

2817

}

2818

case Intrinsic::aarch64_neon_st4: {

2819

if (VT == MVT::v8i8)

2820

return SelectStore(Node, 4, AArch64::ST4Fourv8b);

2821

else if (VT == MVT::v16i8)

2822

return SelectStore(Node, 4, AArch64::ST4Fourv16b);

2823

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2824

return SelectStore(Node, 4, AArch64::ST4Fourv4h);

2825

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2826

return SelectStore(Node, 4, AArch64::ST4Fourv8h);

2827

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2828

return SelectStore(Node, 4, AArch64::ST4Fourv2s);

2829

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2830

return SelectStore(Node, 4, AArch64::ST4Fourv4s);

2831

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2832

return SelectStore(Node, 4, AArch64::ST4Fourv2d);

2833

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2834

return SelectStore(Node, 4, AArch64::ST1Fourv1d);

2835

break;

2836

}

2837

case Intrinsic::aarch64_neon_st2lane: {

2838

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2839

return SelectStoreLane(Node, 2, AArch64::ST2i8);

2840

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2841

VT == MVT::v8f16)

2842

return SelectStoreLane(Node, 2, AArch64::ST2i16);

2843

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2844

VT == MVT::v2f32)

2845

return SelectStoreLane(Node, 2, AArch64::ST2i32);

2846

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2847

VT == MVT::v1f64)

2848

return SelectStoreLane(Node, 2, AArch64::ST2i64);

2849

break;

2850

}

2851

case Intrinsic::aarch64_neon_st3lane: {

2852

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2853

return SelectStoreLane(Node, 3, AArch64::ST3i8);

2854

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2855

VT == MVT::v8f16)

2856

return SelectStoreLane(Node, 3, AArch64::ST3i16);

2857

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2858

VT == MVT::v2f32)

2859

return SelectStoreLane(Node, 3, AArch64::ST3i32);

2860

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2861

VT == MVT::v1f64)

2862

return SelectStoreLane(Node, 3, AArch64::ST3i64);

2863

break;

2864

}

2865

case Intrinsic::aarch64_neon_st4lane: {

2866

if (VT == MVT::v16i8 || VT == MVT::v8i8)

2867

return SelectStoreLane(Node, 4, AArch64::ST4i8);

2868

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

2869

VT == MVT::v8f16)

2870

return SelectStoreLane(Node, 4, AArch64::ST4i16);

2871

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

2872

VT == MVT::v2f32)

2873

return SelectStoreLane(Node, 4, AArch64::ST4i32);

2874

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

2875

VT == MVT::v1f64)

2876

return SelectStoreLane(Node, 4, AArch64::ST4i64);

2877

break;

2878

}

2879

}

2880

break;

2881

}

2882

case AArch64ISD::LD2post: {

2883

if (VT == MVT::v8i8)

2884

return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);

2885

else if (VT == MVT::v16i8)

2886

return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);

2887

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2888

return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);

2889

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2890

return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);

2891

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2892

return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);

2893

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2894

return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);

2895

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2896

return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);

2897

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2898

return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);

2899

break;

2900

}

2901

case AArch64ISD::LD3post: {

2902

if (VT == MVT::v8i8)

2903

return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);

2904

else if (VT == MVT::v16i8)

2905

return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);

2906

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2907

return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);

2908

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2909

return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);

2910

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2911

return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);

2912

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2913

return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);

2914

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2915

return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);

2916

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2917

return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);

2918

break;

2919

}

2920

case AArch64ISD::LD4post: {

2921

if (VT == MVT::v8i8)

2922

return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);

2923

else if (VT == MVT::v16i8)

2924

return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);

2925

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2926

return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);

2927

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2928

return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);

2929

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2930

return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);

2931

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2932

return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);

2933

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2934

return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);

2935

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2936

return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);

2937

break;

2938

}

2939

case AArch64ISD::LD1x2post: {

2940

if (VT == MVT::v8i8)

2941

return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);

2942

else if (VT == MVT::v16i8)

2943

return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);

2944

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2945

return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);

2946

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2947

return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);

2948

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2949

return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);

2950

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2951

return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);

2952

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2953

return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);

2954

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2955

return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);

2956

break;

2957

}

2958

case AArch64ISD::LD1x3post: {

2959

if (VT == MVT::v8i8)

2960

return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);

2961

else if (VT == MVT::v16i8)

2962

return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);

2963

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2964

return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);

2965

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2966

return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);

2967

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2968

return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);

2969

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2970

return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);

2971

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2972

return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);

2973

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2974

return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);

2975

break;

2976

}

2977

case AArch64ISD::LD1x4post: {

2978

if (VT == MVT::v8i8)

2979

return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);

2980

else if (VT == MVT::v16i8)

2981

return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);

2982

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

2983

return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);

2984

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

2985

return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);

2986

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

2987

return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);

2988

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

2989

return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);

2990

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

2991

return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);

2992

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

2993

return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);

2994

break;

2995

}

2996

case AArch64ISD::LD1DUPpost: {

2997

if (VT == MVT::v8i8)

2998

return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);

2999

else if (VT == MVT::v16i8)

3000

return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);

3001

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3002

return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);

3003

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3004

return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);

3005

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3006

return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);

3007

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3008

return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);

3009

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3010

return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);

3011

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3012

return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);

3013

break;

3014

}

3015

case AArch64ISD::LD2DUPpost: {

3016

if (VT == MVT::v8i8)

3017

return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);

3018

else if (VT == MVT::v16i8)

3019

return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);

3020

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3021

return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);

3022

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3023

return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);

3024

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3025

return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);

3026

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3027

return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);

3028

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3029

return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);

3030

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3031

return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);

3032

break;

3033

}

3034

case AArch64ISD::LD3DUPpost: {

3035

if (VT == MVT::v8i8)

3036

return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);

3037

else if (VT == MVT::v16i8)

3038

return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);

3039

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3040

return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);

3041

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3042

return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);

3043

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3044

return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);

3045

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3046

return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);

3047

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3048

return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);

3049

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3050

return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);

3051

break;

3052

}

3053

case AArch64ISD::LD4DUPpost: {

3054

if (VT == MVT::v8i8)

3055

return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);

3056

else if (VT == MVT::v16i8)

3057

return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);

3058

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3059

return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);

3060

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3061

return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);

3062

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3063

return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);

3064

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3065

return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);

3066

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3067

return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);

3068

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3069

return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);

3070

break;

3071

}

3072

case AArch64ISD::LD1LANEpost: {

3073

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3074

return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);

3075

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3076

VT == MVT::v8f16)

3077

return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);

3078

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3079

VT == MVT::v2f32)

3080

return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);

3081

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3082

VT == MVT::v1f64)

3083

return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);

3084

break;

3085

}

3086

case AArch64ISD::LD2LANEpost: {

3087

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3088

return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);

3089

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3090

VT == MVT::v8f16)

3091

return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);

3092

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3093

VT == MVT::v2f32)

3094

return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);

3095

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3096

VT == MVT::v1f64)

3097

return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);

3098

break;

3099

}

3100

case AArch64ISD::LD3LANEpost: {

3101

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3102

return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);

3103

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3104

VT == MVT::v8f16)

3105

return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);

3106

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3107

VT == MVT::v2f32)

3108

return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);

3109

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3110

VT == MVT::v1f64)

3111

return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);

3112

break;

3113

}

3114

case AArch64ISD::LD4LANEpost: {

3115

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3116

return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);

3117

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3118

VT == MVT::v8f16)

3119

return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);

3120

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3121

VT == MVT::v2f32)

3122

return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);

3123

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3124

VT == MVT::v1f64)

3125

return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);

3126

break;

3127

}

3128

case AArch64ISD::ST2post: {

3129

VT = Node->getOperand(1).getValueType();

3130

if (VT == MVT::v8i8)

3131

return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);

3132

else if (VT == MVT::v16i8)

3133

return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);

3134

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3135

return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);

3136

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3137

return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);

3138

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3139

return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);

3140

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3141

return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);

3142

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3143

return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);

3144

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3145

return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);

3146

break;

3147

}

3148

case AArch64ISD::ST3post: {

3149

VT = Node->getOperand(1).getValueType();

3150

if (VT == MVT::v8i8)

3151

return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);

3152

else if (VT == MVT::v16i8)

3153

return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);

3154

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3155

return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);

3156

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3157

return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);

3158

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3159

return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);

3160

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3161

return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);

3162

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3163

return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);

3164

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3165

return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);

3166

break;

3167

}

3168

case AArch64ISD::ST4post: {

3169

VT = Node->getOperand(1).getValueType();

3170

if (VT == MVT::v8i8)

3171

return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);

3172

else if (VT == MVT::v16i8)

3173

return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);

3174

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3175

return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);

3176

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3177

return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);

3178

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3179

return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);

3180

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3181

return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);

3182

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3183

return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);

3184

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3185

return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);

3186

break;

3187

}

3188

case AArch64ISD::ST1x2post: {

3189

VT = Node->getOperand(1).getValueType();

3190

if (VT == MVT::v8i8)

3191

return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);

3192

else if (VT == MVT::v16i8)

3193

return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);

3194

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3195

return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);

3196

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3197

return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);

3198

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3199

return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);

3200

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3201

return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);

3202

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3203

return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);

3204

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3205

return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);

3206

break;

3207

}

3208

case AArch64ISD::ST1x3post: {

3209

VT = Node->getOperand(1).getValueType();

3210

if (VT == MVT::v8i8)

3211

return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);

3212

else if (VT == MVT::v16i8)

3213

return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);

3214

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3215

return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);

3216

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3217

return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);

3218

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3219

return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);

3220

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3221

return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);

3222

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3223

return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);

3224

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3225

return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);

3226

break;

3227

}

3228

case AArch64ISD::ST1x4post: {

3229

VT = Node->getOperand(1).getValueType();

3230

if (VT == MVT::v8i8)

3231

return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);

3232

else if (VT == MVT::v16i8)

3233

return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);

3234

else if (VT == MVT::v4i16 || VT == MVT::v4f16)

3235

return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);

3236

else if (VT == MVT::v8i16 || VT == MVT::v8f16)

3237

return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);

3238

else if (VT == MVT::v2i32 || VT == MVT::v2f32)

3239

return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);

3240

else if (VT == MVT::v4i32 || VT == MVT::v4f32)

3241

return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);

3242

else if (VT == MVT::v1i64 || VT == MVT::v1f64)

3243

return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);

3244

else if (VT == MVT::v2i64 || VT == MVT::v2f64)

3245

return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);

3246

break;

3247

}

3248

case AArch64ISD::ST2LANEpost: {

3249

VT = Node->getOperand(1).getValueType();

3250

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3251

return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);

3252

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3253

VT == MVT::v8f16)

3254

return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);

3255

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3256

VT == MVT::v2f32)

3257

return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);

3258

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3259

VT == MVT::v1f64)

3260

return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);

3261

break;

3262

}

3263

case AArch64ISD::ST3LANEpost: {

3264

VT = Node->getOperand(1).getValueType();

3265

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3266

return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);

3267

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3268

VT == MVT::v8f16)

3269

return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);

3270

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3271

VT == MVT::v2f32)

3272

return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);

3273

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3274

VT == MVT::v1f64)

3275

return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);

3276

break;

3277

}

3278

case AArch64ISD::ST4LANEpost: {

3279

VT = Node->getOperand(1).getValueType();

3280

if (VT == MVT::v16i8 || VT == MVT::v8i8)

3281

return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);

3282

else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||

3283

VT == MVT::v8f16)

3284

return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);

3285

else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||

3286

VT == MVT::v2f32)

3287

return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);

3288

else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||

3289

VT == MVT::v1f64)

3290

return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);

3291

break;

3292

}

3293

}

3294

3295

// Select the default instruction

3296

ResNode = SelectCode(Node);

3297

3298

DEBUG(errs() << "=> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "=> "; } } while (0);

3299

if (ResNode == nullptr || ResNode == Node)

3300

DEBUG(Node->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { Node->dump(CurDAG); } } while (0);

3301

else

3302

DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { ResNode->dump(CurDAG); } } while (0);

3303

DEBUG(errs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { errs() << "\n"; } } while (0);

3304

3305

return ResNode;

3306

}

3307

3308

/// createAArch64ISelDag - This pass converts a legalized DAG into a

3309

/// AArch64-specific DAG, ready for instruction scheduling.

3310

FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,

3311

CodeGenOpt::Level OptLevel) {

3312

return new AArch64DAGToDAGISel(TM, OptLevel);

3313

}