/build/source/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

1

//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//

2

//

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6

//

7

//===----------------------------------------------------------------------===//

8

//

9

// This file defines a pattern matching instruction selector for PowerPC,

10

// converting from a legalized dag to a PPC dag.

11

//

12

//===----------------------------------------------------------------------===//

13

14

#include "MCTargetDesc/PPCMCTargetDesc.h"

15

#include "MCTargetDesc/PPCPredicates.h"

16

#include "PPC.h"

17

#include "PPCISelLowering.h"

18

#include "PPCMachineFunctionInfo.h"

19

#include "PPCSubtarget.h"

20

#include "PPCTargetMachine.h"

21

#include "llvm/ADT/APInt.h"

22

#include "llvm/ADT/DenseMap.h"

23

#include "llvm/ADT/STLExtras.h"

24

#include "llvm/ADT/SmallPtrSet.h"

25

#include "llvm/ADT/SmallVector.h"

26

#include "llvm/ADT/Statistic.h"

27

#include "llvm/Analysis/BranchProbabilityInfo.h"

28

#include "llvm/CodeGen/FunctionLoweringInfo.h"

29

#include "llvm/CodeGen/ISDOpcodes.h"

30

#include "llvm/CodeGen/MachineBasicBlock.h"

31

#include "llvm/CodeGen/MachineFrameInfo.h"

32

#include "llvm/CodeGen/MachineFunction.h"

33

#include "llvm/CodeGen/MachineInstrBuilder.h"

34

#include "llvm/CodeGen/MachineRegisterInfo.h"

35

#include "llvm/CodeGen/MachineValueType.h"

36

#include "llvm/CodeGen/SelectionDAG.h"

37

#include "llvm/CodeGen/SelectionDAGISel.h"

38

#include "llvm/CodeGen/SelectionDAGNodes.h"

39

#include "llvm/CodeGen/TargetInstrInfo.h"

40

#include "llvm/CodeGen/TargetRegisterInfo.h"

41

#include "llvm/CodeGen/ValueTypes.h"

42

#include "llvm/IR/BasicBlock.h"

43

#include "llvm/IR/DebugLoc.h"

44

#include "llvm/IR/Function.h"

45

#include "llvm/IR/GlobalValue.h"

46

#include "llvm/IR/InlineAsm.h"

47

#include "llvm/IR/InstrTypes.h"

48

#include "llvm/IR/IntrinsicsPowerPC.h"

49

#include "llvm/IR/Module.h"

50

#include "llvm/Support/Casting.h"

51

#include "llvm/Support/CodeGen.h"

52

#include "llvm/Support/CommandLine.h"

53

#include "llvm/Support/Compiler.h"

54

#include "llvm/Support/Debug.h"

55

#include "llvm/Support/ErrorHandling.h"

56

#include "llvm/Support/KnownBits.h"

57

#include "llvm/Support/MathExtras.h"

58

#include "llvm/Support/raw_ostream.h"

59

#include <algorithm>

60

#include <cassert>

61

#include <cstdint>

62

#include <iterator>

63

#include <limits>

64

#include <memory>

65

#include <new>

66

#include <tuple>

67

#include <utility>

68

69

using namespace llvm;

70

71

#define DEBUG_TYPE"ppc-isel" "ppc-isel"

72

#define PASS_NAME"PowerPC DAG->DAG Pattern Instruction Selection" "PowerPC DAG->DAG Pattern Instruction Selection"

73

74

STATISTIC(NumSextSetcc,static llvm::Statistic NumSextSetcc = {"ppc-isel", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
}

75

"Number of (sext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumSextSetcc = {"ppc-isel", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
};

76

STATISTIC(NumZextSetcc,static llvm::Statistic NumZextSetcc = {"ppc-isel", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
}

77

"Number of (zext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumZextSetcc = {"ppc-isel", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
};

78

STATISTIC(SignExtensionsAdded,static llvm::Statistic SignExtensionsAdded = {"ppc-isel", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."}

79

"Number of sign extensions for compare inputs added.")static llvm::Statistic SignExtensionsAdded = {"ppc-isel", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."};

80

STATISTIC(ZeroExtensionsAdded,static llvm::Statistic ZeroExtensionsAdded = {"ppc-isel", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."}

81

"Number of zero extensions for compare inputs added.")static llvm::Statistic ZeroExtensionsAdded = {"ppc-isel", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."};

82

STATISTIC(NumLogicOpsOnComparison,static llvm::Statistic NumLogicOpsOnComparison = {"ppc-isel",
"NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
}

83

"Number of logical ops on i1 values calculated in GPR.")static llvm::Statistic NumLogicOpsOnComparison = {"ppc-isel",
"NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
};

84

STATISTIC(OmittedForNonExtendUses,static llvm::Statistic OmittedForNonExtendUses = {"ppc-isel",
"OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
}

85

"Number of compares not eliminated as they have non-extending uses.")static llvm::Statistic OmittedForNonExtendUses = {"ppc-isel",
"OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
};

86

STATISTIC(NumP9Setb,static llvm::Statistic NumP9Setb = {"ppc-isel", "NumP9Setb", "Number of compares lowered to setb."
}

87

"Number of compares lowered to setb.")static llvm::Statistic NumP9Setb = {"ppc-isel", "NumP9Setb", "Number of compares lowered to setb."
};

88

89

// FIXME: Remove this once the bug has been fixed!

90

cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",

91

cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);

92

93

static cl::opt<bool>

94

UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),

95

cl::desc("use aggressive ppc isel for bit permutations"),

96

cl::Hidden);

97

static cl::opt<bool> BPermRewriterNoMasking(

98

"ppc-bit-perm-rewriter-stress-rotates",

99

cl::desc("stress rotate selection in aggressive ppc isel for "

100

"bit permutations"),

101

cl::Hidden);

102

103

static cl::opt<bool> EnableBranchHint(

104

"ppc-use-branch-hint", cl::init(true),

105

cl::desc("Enable static hinting of branches on ppc"),

106

cl::Hidden);

107

108

static cl::opt<bool> EnableTLSOpt(

109

"ppc-tls-opt", cl::init(true),

110

cl::desc("Enable tls optimization peephole"),

111

cl::Hidden);

112

113

enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,

114

ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,

115

ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };

116

117

static cl::opt<ICmpInGPRType> CmpInGPR(

118

"ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),

119

cl::desc("Specify the types of comparisons to emit GPR-only code for."),

120

cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons.")llvm::cl::OptionEnumValue { "none", int(ICGPR_None), "Do not modify integer comparisons."
},

121

clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs.")llvm::cl::OptionEnumValue { "all", int(ICGPR_All), "All possible int comparisons in GPRs."
},

122

clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i32", int(ICGPR_I32), "Only i32 comparisons in GPRs."
},

123

clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i64", int(ICGPR_I64), "Only i64 comparisons in GPRs."
},

124

clEnumValN(ICGPR_NonExtIn, "nonextin",llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
}

125

"Only comparisons where inputs don't need [sz]ext.")llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
},

126

clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result.")llvm::cl::OptionEnumValue { "zext", int(ICGPR_Zext), "Only comparisons with zext result."
},

127

clEnumValN(ICGPR_ZextI32, "zexti32",llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
}

128

"Only i32 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
},

129

clEnumValN(ICGPR_ZextI64, "zexti64",llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
}

130

"Only i64 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
},

131

clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result.")llvm::cl::OptionEnumValue { "sext", int(ICGPR_Sext), "Only comparisons with sext result."
},

132

clEnumValN(ICGPR_SextI32, "sexti32",llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
}

133

"Only i32 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
},

134

clEnumValN(ICGPR_SextI64, "sexti64",llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}

135

"Only i64 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}));

136

namespace {

137

138

//===--------------------------------------------------------------------===//

139

/// PPCDAGToDAGISel - PPC specific code to select PPC machine

140

/// instructions for SelectionDAG operations.

141

///

142

class PPCDAGToDAGISel : public SelectionDAGISel {

143

const PPCTargetMachine &TM;

144

const PPCSubtarget *Subtarget = nullptr;

145

const PPCTargetLowering *PPCLowering = nullptr;

146

unsigned GlobalBaseReg = 0;

147

148

public:

149

static char ID;

150

151

PPCDAGToDAGISel() = delete;

152

153

explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)

154

: SelectionDAGISel(ID, tm, OptLevel), TM(tm) {}

155

156

bool runOnMachineFunction(MachineFunction &MF) override {

157

// Make sure we re-emit a set of the global base reg if necessary

158

GlobalBaseReg = 0;

159

Subtarget = &MF.getSubtarget<PPCSubtarget>();

160

PPCLowering = Subtarget->getTargetLowering();

161

if (Subtarget->hasROPProtect()) {

162

// Create a place on the stack for the ROP Protection Hash.

163

// The ROP Protection Hash will always be 8 bytes and aligned to 8

164

// bytes.

165

MachineFrameInfo &MFI = MF.getFrameInfo();

166

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

167

const int Result = MFI.CreateStackObject(8, Align(8), false);

168

FI->setROPProtectionHashSaveIndex(Result);

169

}

170

SelectionDAGISel::runOnMachineFunction(MF);

171

172

return true;

173

}

174

175

void PreprocessISelDAG() override;

176

void PostprocessISelDAG() override;

177

178

/// getI16Imm - Return a target constant with the specified value, of type

179

/// i16.

180

inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {

181

return CurDAG->getTargetConstant(Imm, dl, MVT::i16);

182

}

183

184

/// getI32Imm - Return a target constant with the specified value, of type

185

/// i32.

186

inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {

187

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

188

}

189

190

/// getI64Imm - Return a target constant with the specified value, of type

191

/// i64.

192

inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {

193

return CurDAG->getTargetConstant(Imm, dl, MVT::i64);

194

}

195

196

/// getSmallIPtrImm - Return a target constant of pointer type.

197

inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {

198

return CurDAG->getTargetConstant(

199

Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));

200

}

201

202

/// isRotateAndMask - Returns true if Mask and Shift can be folded into a

203

/// rotate and mask opcode and mask operation.

204

static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,

205

unsigned &SH, unsigned &MB, unsigned &ME);

206

207

/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC

208

/// base register. Return the virtual register that holds this value.

209

SDNode *getGlobalBaseReg();

210

211

void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);

212

213

// Select - Convert the specified operand from a target-independent to a

214

// target-specific node if it hasn't already been changed.

215

void Select(SDNode *N) override;

216

217

bool tryBitfieldInsert(SDNode *N);

218

bool tryBitPermutation(SDNode *N);

219

bool tryIntCompareInGPR(SDNode *N);

220

221

// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into

222

// an X-Form load instruction with the offset being a relocation coming from

223

// the PPCISD::ADD_TLS.

224

bool tryTLSXFormLoad(LoadSDNode *N);

225

// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into

226

// an X-Form store instruction with the offset being a relocation coming from

227

// the PPCISD::ADD_TLS.

228

bool tryTLSXFormStore(StoreSDNode *N);

229

/// SelectCC - Select a comparison of the specified values with the

230

/// specified condition code, returning the CR# of the expression.

231

SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,

232

const SDLoc &dl, SDValue Chain = SDValue());

233

234

/// SelectAddrImmOffs - Return true if the operand is valid for a preinc

235

/// immediate field. Note that the operand at this point is already the

236

/// result of a prior SelectAddressRegImm call.

237

bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {

238

if (N.getOpcode() == ISD::TargetConstant ||

239

N.getOpcode() == ISD::TargetGlobalAddress) {

240

Out = N;

241

return true;

242

}

243

244

return false;

245

}

246

247

/// SelectDSForm - Returns true if address N can be represented by the

248

/// addressing mode of DSForm instructions (a base register, plus a signed

249

/// 16-bit displacement that is a multiple of 4.

250

bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

251

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

252

Align(4)) == PPC::AM_DSForm;

253

}

254

255

/// SelectDQForm - Returns true if address N can be represented by the

256

/// addressing mode of DQForm instructions (a base register, plus a signed

257

/// 16-bit displacement that is a multiple of 16.

258

bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

259

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

260

Align(16)) == PPC::AM_DQForm;

261

}

262

263

/// SelectDForm - Returns true if address N can be represented by

264

/// the addressing mode of DForm instructions (a base register, plus a

265

/// signed 16-bit immediate.

266

bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

267

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

268

std::nullopt) == PPC::AM_DForm;

269

}

270

271

/// SelectPCRelForm - Returns true if address N can be represented by

272

/// PC-Relative addressing mode.

273

bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,

274

SDValue &Base) {

275

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

276

std::nullopt) == PPC::AM_PCRel;

277

}

278

279

/// SelectPDForm - Returns true if address N can be represented by Prefixed

280

/// DForm addressing mode (a base register, plus a signed 34-bit immediate.

281

bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

282

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

283

std::nullopt) ==

284

PPC::AM_PrefixDForm;

285

}

286

287

/// SelectXForm - Returns true if address N can be represented by the

288

/// addressing mode of XForm instructions (an indexed [r+r] operation).

289

bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

290

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

291

std::nullopt) == PPC::AM_XForm;

292

}

293

294

/// SelectForceXForm - Given the specified address, force it to be

295

/// represented as an indexed [r+r] operation (an XForm instruction).

296

bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,

297

SDValue &Base) {

298

return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==

299

PPC::AM_XForm;

300

}

301

302

/// SelectAddrIdx - Given the specified address, check to see if it can be

303

/// represented as an indexed [r+r] operation.

304

/// This is for xform instructions whose associated displacement form is D.

305

/// The last parameter \p 0 means associated D form has no requirment for 16

306

/// bit signed displacement.

307

/// Returns false if it can be represented by [r+imm], which are preferred.

308

bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {

309

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,

310

std::nullopt);

311

}

312

313

/// SelectAddrIdx4 - Given the specified address, check to see if it can be

314

/// represented as an indexed [r+r] operation.

315

/// This is for xform instructions whose associated displacement form is DS.

316

/// The last parameter \p 4 means associated DS form 16 bit signed

317

/// displacement must be a multiple of 4.

318

/// Returns false if it can be represented by [r+imm], which are preferred.

319

bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {

320

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,

321

Align(4));

322

}

323

324

/// SelectAddrIdx16 - Given the specified address, check to see if it can be

325

/// represented as an indexed [r+r] operation.

326

/// This is for xform instructions whose associated displacement form is DQ.

327

/// The last parameter \p 16 means associated DQ form 16 bit signed

328

/// displacement must be a multiple of 16.

329

/// Returns false if it can be represented by [r+imm], which are preferred.

330

bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {

331

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,

332

Align(16));

333

}

334

335

/// SelectAddrIdxOnly - Given the specified address, force it to be

336

/// represented as an indexed [r+r] operation.

337

bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {

338

return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);

339

}

340

341

/// SelectAddrImm - Returns true if the address N can be represented by

342

/// a base register plus a signed 16-bit displacement [r+imm].

343

/// The last parameter \p 0 means D form has no requirment for 16 bit signed

344

/// displacement.

345

bool SelectAddrImm(SDValue N, SDValue &Disp,

346

SDValue &Base) {

347

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,

348

std::nullopt);

349

}

350

351

/// SelectAddrImmX4 - Returns true if the address N can be represented by

352

/// a base register plus a signed 16-bit displacement that is a multiple of

353

/// 4 (last parameter). Suitable for use by STD and friends.

354

bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {

355

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));

356

}

357

358

/// SelectAddrImmX16 - Returns true if the address N can be represented by

359

/// a base register plus a signed 16-bit displacement that is a multiple of

360

/// 16(last parameter). Suitable for use by STXV and friends.

361

bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {

362

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,

363

Align(16));

364

}

365

366

/// SelectAddrImmX34 - Returns true if the address N can be represented by

367

/// a base register plus a signed 34-bit displacement. Suitable for use by

368

/// PSTXVP and friends.

369

bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {

370

return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);

371

}

372

373

// Select an address into a single register.

374

bool SelectAddr(SDValue N, SDValue &Base) {

375

Base = N;

376

return true;

377

}

378

379

bool SelectAddrPCRel(SDValue N, SDValue &Base) {

380

return PPCLowering->SelectAddressPCRel(N, Base);

381

}

382

383

/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for

384

/// inline asm expressions. It is always correct to compute the value into

385

/// a register. The case of adding a (possibly relocatable) constant to a

386

/// register can be improved, but it is wrong to substitute Reg+Reg for

387

/// Reg in an asm, because the load or store opcode would have to change.

388

bool SelectInlineAsmMemoryOperand(const SDValue &Op,

389

unsigned ConstraintID,

390

std::vector<SDValue> &OutOps) override {

391

switch(ConstraintID) {

392

default:

393

errs() << "ConstraintID: " << ConstraintID << "\n";

394

llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 394);

395

case InlineAsm::Constraint_es:

396

case InlineAsm::Constraint_m:

397

case InlineAsm::Constraint_o:

398

case InlineAsm::Constraint_Q:

399

case InlineAsm::Constraint_Z:

400

case InlineAsm::Constraint_Zy:

401

// We need to make sure that this one operand does not end up in r0

402

// (because we might end up lowering this as 0(%op)).

403

const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();

404

const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);

405

SDLoc dl(Op);

406

SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);

407

SDValue NewOp =

408

SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,

409

dl, Op.getValueType(),

410

Op, RC), 0);

411

412

OutOps.push_back(NewOp);

413

return false;

414

}

415

return true;

416

}

417

418

// Include the pieces autogenerated from the target description.

419

#include "PPCGenDAGISel.inc"

420

421

private:

422

bool trySETCC(SDNode *N);

423

bool tryFoldSWTestBRCC(SDNode *N);

424

bool trySelectLoopCountIntrinsic(SDNode *N);

425

bool tryAsSingleRLDICL(SDNode *N);

426

bool tryAsSingleRLDICR(SDNode *N);

427

bool tryAsSingleRLWINM(SDNode *N);

428

bool tryAsSingleRLWINM8(SDNode *N);

429

bool tryAsSingleRLWIMI(SDNode *N);

430

bool tryAsPairOfRLDICL(SDNode *N);

431

bool tryAsSingleRLDIMI(SDNode *N);

432

433

void PeepholePPC64();

434

void PeepholePPC64ZExt();

435

void PeepholeCROps();

436

437

SDValue combineToCMPB(SDNode *N);

438

void foldBoolExts(SDValue &Res, SDNode *&N);

439

440

bool AllUsersSelectZero(SDNode *N);

441

void SwapAllSelectUsers(SDNode *N);

442

443

bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;

444

void transferMemOperands(SDNode *N, SDNode *Result);

445

};

446

447

} // end anonymous namespace

448

449

char PPCDAGToDAGISel::ID = 0;

450

451

INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)static void *initializePPCDAGToDAGISelPassOnce(PassRegistry &
Registry) { PassInfo *PI = new PassInfo( "PowerPC DAG->DAG Pattern Instruction Selection"
, "ppc-isel", &PPCDAGToDAGISel::ID, PassInfo::NormalCtor_t
(callDefaultCtor<PPCDAGToDAGISel>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializePPCDAGToDAGISelPassFlag; void llvm::initializePPCDAGToDAGISelPass
(PassRegistry &Registry) { llvm::call_once(InitializePPCDAGToDAGISelPassFlag
, initializePPCDAGToDAGISelPassOnce, std::ref(Registry)); }

452

453

/// getGlobalBaseReg - Output the instructions required to put the

454

/// base address to use for accessing globals into a register.

455

///

456

SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {

457

if (!GlobalBaseReg) {

458

const TargetInstrInfo &TII = *Subtarget->getInstrInfo();

459

// Insert the set of GlobalBaseReg into the first MBB of the function

460

MachineBasicBlock &FirstMBB = MF->front();

461

MachineBasicBlock::iterator MBBI = FirstMBB.begin();

462

const Module *M = MF->getFunction().getParent();

463

DebugLoc dl;

464

465

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {

466

if (Subtarget->isTargetELF()) {

467

GlobalBaseReg = PPC::R30;

468

if (!Subtarget->isSecurePlt() &&

469

M->getPICLevel() == PICLevel::SmallPIC) {

470

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));

471

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

472

MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);

473

} else {

474

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));

475

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

476

Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);

477

BuildMI(FirstMBB, MBBI, dl,

478

TII.get(PPC::UpdateGBR), GlobalBaseReg)

479

.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);

480

MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);

481

}

482

} else {

483

GlobalBaseReg =

484

RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);

485

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));

486

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

487

}

488

} else {

489

// We must ensure that this sequence is dominated by the prologue.

490

// FIXME: This is a bit of a big hammer since we don't get the benefits

491

// of shrink-wrapping whenever we emit this instruction. Considering

492

// this is used in any function where we emit a jump table, this may be

493

// a significant limitation. We should consider inserting this in the

494

// block where it is used and then commoning this sequence up if it

495

// appears in multiple places.

496

// Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of

497

// MovePCtoLR8.

498

MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);

499

GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);

500

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));

501

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);

502

}

503

}

504

return CurDAG->getRegister(GlobalBaseReg,

505

PPCLowering->getPointerTy(CurDAG->getDataLayout()))

506

.getNode();

507

}

508

509

// Check if a SDValue has the toc-data attribute.

510

static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {

511

GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);

512

if (!GA)

513

return false;

514

515

const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());

516

if (!GV)

517

return false;

518

519

if (!GV->hasAttribute("toc-data"))

520

return false;

521

522

// TODO: These asserts should be updated as more support for the toc data

523

// transformation is added (struct support, etc.).

524

525

assert((static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 528, __extension__
__PRETTY_FUNCTION__))

526

PointerSize >= GV->getAlign().valueOrOne().value() &&(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 528, __extension__
__PRETTY_FUNCTION__))

527

"GlobalVariables with an alignment requirement stricter than TOC entry "(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 528, __extension__
__PRETTY_FUNCTION__))

528

"size not supported by the toc data transformation.")(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 528, __extension__
__PRETTY_FUNCTION__));

529

530

Type *GVType = GV->getValueType();

531

532

assert(GVType->isSized() && "A GlobalVariable's size must be known to be "(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 533, __extension__
__PRETTY_FUNCTION__))

533

"supported by the toc data transformation.")(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 533, __extension__
__PRETTY_FUNCTION__));

534

535

if (GVType->isVectorTy())

536

report_fatal_error("A GlobalVariable of Vector type is not currently "

537

"supported by the toc data transformation.");

538

539

if (GVType->isArrayTy())

540

report_fatal_error("A GlobalVariable of Array type is not currently "

541

"supported by the toc data transformation.");

542

543

if (GVType->isStructTy())

544

report_fatal_error("A GlobalVariable of Struct type is not currently "

545

"supported by the toc data transformation.");

546

547

assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 549, __extension__
__PRETTY_FUNCTION__))

548

"A GlobalVariable with size larger than a TOC entry is not currently "(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 549, __extension__
__PRETTY_FUNCTION__))

549

"supported by the toc data transformation.")(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 549, __extension__
__PRETTY_FUNCTION__));

550

551

if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())

552

report_fatal_error("A GlobalVariable with private or local linkage is not "

553

"currently supported by the toc data transformation.");

554

555

assert(!GV->hasCommonLinkage() &&(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 556, __extension__
__PRETTY_FUNCTION__))

556

"Tentative definitions cannot have the mapping class XMC_TD.")(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 556, __extension__
__PRETTY_FUNCTION__));

557

558

return true;

559

}

560

561

/// isInt32Immediate - This method tests to see if the node is a 32-bit constant

562

/// operand. If so Imm will receive the 32-bit value.

563

static bool isInt32Immediate(SDNode *N, unsigned &Imm) {

564

if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {

565

Imm = cast<ConstantSDNode>(N)->getZExtValue();

566

return true;

567

}

568

return false;

569

}

570

571

/// isInt64Immediate - This method tests to see if the node is a 64-bit constant

572

/// operand. If so Imm will receive the 64-bit value.

573

static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {

574

if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {

575

Imm = cast<ConstantSDNode>(N)->getZExtValue();

576

return true;

577

}

578

return false;

579

}

580

581

// isInt32Immediate - This method tests to see if a constant operand.

582

// If so Imm will receive the 32 bit value.

583

static bool isInt32Immediate(SDValue N, unsigned &Imm) {

584

return isInt32Immediate(N.getNode(), Imm);

585

}

586

587

/// isInt64Immediate - This method tests to see if the value is a 64-bit

588

/// constant operand. If so Imm will receive the 64-bit value.

589

static bool isInt64Immediate(SDValue N, uint64_t &Imm) {

590

return isInt64Immediate(N.getNode(), Imm);

591

}

592

593

static unsigned getBranchHint(unsigned PCC,

594

const FunctionLoweringInfo &FuncInfo,

595

const SDValue &DestMBB) {

596

assert(isa<BasicBlockSDNode>(DestMBB))(static_cast <bool> (isa<BasicBlockSDNode>(DestMBB
)) ? void (0) : __assert_fail ("isa<BasicBlockSDNode>(DestMBB)"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 596, __extension__
__PRETTY_FUNCTION__));

597

598

if (!FuncInfo.BPI) return PPC::BR_NO_HINT;

599

600

const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();

601

const Instruction *BBTerm = BB->getTerminator();

602

603

if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;

604

605

const BasicBlock *TBB = BBTerm->getSuccessor(0);

606

const BasicBlock *FBB = BBTerm->getSuccessor(1);

607

608

auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);

609

auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);

610

611

// We only want to handle cases which are easy to predict at static time, e.g.

612

// C++ throw statement, that is very likely not taken, or calling never

613

// returned function, e.g. stdlib exit(). So we set Threshold to filter

614

// unwanted cases.

615

//

616

// Below is LLVM branch weight table, we only want to handle case 1, 2

617

//

618

// Case Taken:Nontaken Example

619

// 1. Unreachable 1048575:1 C++ throw, stdlib exit(),

620

// 2. Invoke-terminating 1:1048575

621

// 3. Coldblock 4:64 __builtin_expect

622

// 4. Loop Branch 124:4 For loop

623

// 5. PH/ZH/FPH 20:12

624

const uint32_t Threshold = 10000;

625

626

if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))

627

return PPC::BR_NO_HINT;

628

629

LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

630

<< "::" << BB->getName() << "'\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

631

<< " -> " << TBB->getName() << ": " << TProb << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

632

<< " -> " << FBB->getName() << ": " << FProb << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false);

633

634

const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);

635

636

// If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,

637

// because we want 'TProb' stands for 'branch probability' to Dest BasicBlock

638

if (BBDN->getBasicBlock()->getBasicBlock() != TBB)

639

std::swap(TProb, FProb);

640

641

return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;

642

}

643

644

// isOpcWithIntImmediate - This method tests to see if the node is a specific

645

// opcode and that it has a immediate integer right operand.

646

// If so Imm will receive the 32 bit value.

647

static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {

648

return N->getOpcode() == Opc

649

&& isInt32Immediate(N->getOperand(1).getNode(), Imm);

650

}

651

652

void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {

653

SDLoc dl(SN);

654

int FI = cast<FrameIndexSDNode>(N)->getIndex();

655

SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));

656

unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;

657

if (SN->hasOneUse())

658

CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,

659

getSmallIPtrImm(Offset, dl));

660

else

661

ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,

662

getSmallIPtrImm(Offset, dl)));

663

}

664

665

bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,

666

bool isShiftMask, unsigned &SH,

667

unsigned &MB, unsigned &ME) {

668

// Don't even go down this path for i64, since different logic will be

669

// necessary for rldicl/rldicr/rldimi.

670

if (N->getValueType(0) != MVT::i32)

671

return false;

672

673

unsigned Shift = 32;

674

unsigned Indeterminant = ~0; // bit mask marking indeterminant results

675

unsigned Opcode = N->getOpcode();

676

if (N->getNumOperands() != 2 ||

677

!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))

678

return false;

679

680

if (Opcode == ISD::SHL) {

681

// apply shift left to mask if it comes first

682

if (isShiftMask) Mask = Mask << Shift;

683

// determine which bits are made indeterminant by shift

684

Indeterminant = ~(0xFFFFFFFFu << Shift);

685

} else if (Opcode == ISD::SRL) {

686

// apply shift right to mask if it comes first

687

if (isShiftMask) Mask = Mask >> Shift;

688

// determine which bits are made indeterminant by shift

689

Indeterminant = ~(0xFFFFFFFFu >> Shift);

690

// adjust for the left rotate

691

Shift = 32 - Shift;

692

} else if (Opcode == ISD::ROTL) {

693

Indeterminant = 0;

694

} else {

695

return false;

696

}

697

698

// if the mask doesn't intersect any Indeterminant bits

699

if (Mask && !(Mask & Indeterminant)) {

700

SH = Shift & 31;

701

// make sure the mask is still a mask (wrap arounds may not be)

702

return isRunOfOnes(Mask, MB, ME);

703

}

704

return false;

705

}

706

707

bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {

708

SDValue Base = ST->getBasePtr();

709

if (Base.getOpcode() != PPCISD::ADD_TLS)

710

return false;

711

SDValue Offset = ST->getOffset();

712

if (!Offset.isUndef())

713

return false;

714

if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)

715

return false;

716

717

SDLoc dl(ST);

718

EVT MemVT = ST->getMemoryVT();

719

EVT RegVT = ST->getValue().getValueType();

720

721

unsigned Opcode;

722

switch (MemVT.getSimpleVT().SimpleTy) {

723

default:

724

return false;

725

case MVT::i8: {

726

Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;

727

break;

728

}

729

case MVT::i16: {

730

Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;

731

break;

732

}

733

case MVT::i32: {

734

Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;

735

break;

736

}

737

case MVT::i64: {

738

Opcode = PPC::STDXTLS;

739

break;

740

}

741

}

742

SDValue Chain = ST->getChain();

743

SDVTList VTs = ST->getVTList();

744

SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),

745

Chain};

746

SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);

747

transferMemOperands(ST, MN);

748

ReplaceNode(ST, MN);

749

return true;

750

}

751

752

bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {

753

SDValue Base = LD->getBasePtr();

754

if (Base.getOpcode() != PPCISD::ADD_TLS)

755

return false;

756

SDValue Offset = LD->getOffset();

757

if (!Offset.isUndef())

758

return false;

759

if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)

760

return false;

761

762

SDLoc dl(LD);

763

EVT MemVT = LD->getMemoryVT();

764

EVT RegVT = LD->getValueType(0);

765

unsigned Opcode;

766

switch (MemVT.getSimpleVT().SimpleTy) {

767

default:

768

return false;

769

case MVT::i8: {

770

Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;

771

break;

772

}

773

case MVT::i16: {

774

Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;

775

break;

776

}

777

case MVT::i32: {

778

Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;

779

break;

780

}

781

case MVT::i64: {

782

Opcode = PPC::LDXTLS;

783

break;

784

}

785

}

786

SDValue Chain = LD->getChain();

787

SDVTList VTs = LD->getVTList();

788

SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};

789

SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);

790

transferMemOperands(LD, MN);

791

ReplaceNode(LD, MN);

792

return true;

793

}

794

795

/// Turn an or of two masked values into the rotate left word immediate then

796

/// mask insert (rlwimi) instruction.

797

bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {

798

SDValue Op0 = N->getOperand(0);

799

SDValue Op1 = N->getOperand(1);

800

SDLoc dl(N);

801

802

KnownBits LKnown = CurDAG->computeKnownBits(Op0);

803

KnownBits RKnown = CurDAG->computeKnownBits(Op1);

804

805

unsigned TargetMask = LKnown.Zero.getZExtValue();

806

unsigned InsertMask = RKnown.Zero.getZExtValue();

807

808

if ((TargetMask | InsertMask) == 0xFFFFFFFF) {

809

unsigned Op0Opc = Op0.getOpcode();

810

unsigned Op1Opc = Op1.getOpcode();

811

unsigned Value, SH = 0;

812

TargetMask = ~TargetMask;

813

InsertMask = ~InsertMask;

814

815

// If the LHS has a foldable shift and the RHS does not, then swap it to the

816

// RHS so that we can fold the shift into the insert.

817

if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {

818

if (Op0.getOperand(0).getOpcode() == ISD::SHL ||

819

Op0.getOperand(0).getOpcode() == ISD::SRL) {

820

if (Op1.getOperand(0).getOpcode() != ISD::SHL &&

821

Op1.getOperand(0).getOpcode() != ISD::SRL) {

822

std::swap(Op0, Op1);

823

std::swap(Op0Opc, Op1Opc);

824

std::swap(TargetMask, InsertMask);

825

}

826

}

827

} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {

828

if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&

829

Op1.getOperand(0).getOpcode() != ISD::SRL) {

830

std::swap(Op0, Op1);

831

std::swap(Op0Opc, Op1Opc);

832

std::swap(TargetMask, InsertMask);

833

}

834

}

835

836

unsigned MB, ME;

837

if (isRunOfOnes(InsertMask, MB, ME)) {

838

if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&

839

isInt32Immediate(Op1.getOperand(1), Value)) {

840

Op1 = Op1.getOperand(0);

841

SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;

842

}

843

if (Op1Opc == ISD::AND) {

844

// The AND mask might not be a constant, and we need to make sure that

845

// if we're going to fold the masking with the insert, all bits not

846

// know to be zero in the mask are known to be one.

847

KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));

848

bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();

849

850

unsigned SHOpc = Op1.getOperand(0).getOpcode();

851

if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&

852

isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {

853

// Note that Value must be in range here (less than 32) because

854

// otherwise there would not be any bits set in InsertMask.

855

Op1 = Op1.getOperand(0).getOperand(0);

856

SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;

857

}

858

}

859

860

SH &= 31;

861

SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),

862

getI32Imm(ME, dl) };

863

ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));

864

return true;

865

}

866

}

867

return false;

868

}

869

870

static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {

871

unsigned MaxTruncation = 0;

872

// Cannot use range-based for loop here as we need the actual use (i.e. we

873

// need the operand number corresponding to the use). A range-based for

874

// will unbox the use and provide an SDNode*.

875

for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();

876

Use != UseEnd; ++Use) {

877

unsigned Opc =

878

Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();

879

switch (Opc) {

880

default: return 0;

881

case ISD::TRUNCATE:

882

if (Use->isMachineOpcode())

883

return 0;

884

MaxTruncation =

885

std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());

886

continue;

887

case ISD::STORE: {

888

if (Use->isMachineOpcode())

889

return 0;

890

StoreSDNode *STN = cast<StoreSDNode>(*Use);

891

unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();

892

if (MemVTSize == 64 || Use.getOperandNo() != 0)

893

return 0;

894

MaxTruncation = std::max(MaxTruncation, MemVTSize);

895

continue;

896

}

897

case PPC::STW8:

898

case PPC::STWX8:

899

case PPC::STWU8:

900

case PPC::STWUX8:

901

if (Use.getOperandNo() != 0)

902

return 0;

903

MaxTruncation = std::max(MaxTruncation, 32u);

904

continue;

905

case PPC::STH8:

906

case PPC::STHX8:

907

case PPC::STHU8:

908

case PPC::STHUX8:

909

if (Use.getOperandNo() != 0)

910

return 0;

911

MaxTruncation = std::max(MaxTruncation, 16u);

912

continue;

913

case PPC::STB8:

914

case PPC::STBX8:

915

case PPC::STBU8:

916

case PPC::STBUX8:

917

if (Use.getOperandNo() != 0)

918

return 0;

919

MaxTruncation = std::max(MaxTruncation, 8u);

920

continue;

921

}

922

}

923

return MaxTruncation;

924

}

925

926

// For any 32 < Num < 64, check if the Imm contains at least Num consecutive

927

// zeros and return the number of bits by the left of these consecutive zeros.

928

static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {

929

unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));

930

unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));

931

if ((HiTZ + LoLZ) >= Num)

932

return (32 + HiTZ);

933

return 0;

934

}

935

936

// Direct materialization of 64-bit constants by enumerated patterns.

937

static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,

938

uint64_t Imm, unsigned &InstCnt) {

939

unsigned TZ = llvm::countr_zero<uint64_t>(Imm);

940

unsigned LZ = llvm::countl_zero<uint64_t>(Imm);

941

unsigned TO = llvm::countr_one<uint64_t>(Imm);

942

unsigned LO = llvm::countl_one<uint64_t>(Imm);

943

unsigned Hi32 = Hi_32(Imm);

944

unsigned Lo32 = Lo_32(Imm);

945

SDNode *Result = nullptr;

946

unsigned Shift = 0;

947

948

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

949

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

950

};

951

952

// Following patterns use 1 instructions to materialize the Imm.

953

InstCnt = 1;

954

// 1-1) Patterns : {zeros}{15-bit valve}

955

// {ones}{15-bit valve}

956

if (isInt<16>(Imm)) {

957

SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);

958

return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);

959

}

960

// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}

961

// {ones}{15-bit valve}{16 zeros}

962

if (TZ > 15 && (LZ > 32 || LO > 32))

963

return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

964

getI32Imm((Imm >> 16) & 0xffff));

965

966

// Following patterns use 2 instructions to materialize the Imm.

967

InstCnt = 2;

968

assert(LZ < 64 && "Unexpected leading zeros here.")(static_cast <bool> (LZ < 64 && "Unexpected leading zeros here."
) ? void (0) : __assert_fail ("LZ < 64 && \"Unexpected leading zeros here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 968, __extension__
__PRETTY_FUNCTION__));

969

// Count of ones follwing the leading zeros.

970

unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);

971

// 2-1) Patterns : {zeros}{31-bit value}

972

// {ones}{31-bit value}

973

if (isInt<32>(Imm)) {

974

uint64_t ImmHi16 = (Imm >> 16) & 0xffff;

975

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

976

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

977

return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

978

getI32Imm(Imm & 0xffff));

979

}

980

// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}

981

// {zeros}{15-bit value}{zeros}

982

// {zeros}{ones}{15-bit value}

983

// {ones}{15-bit value}{zeros}

984

// We can take advantage of LI's sign-extension semantics to generate leading

985

// ones, and then use RLDIC to mask off the ones in both sides after rotation.

986

if ((LZ + FO + TZ) > 48) {

987

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

988

getI32Imm((Imm >> TZ) & 0xffff));

989

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

990

getI32Imm(TZ), getI32Imm(LZ));

991

}

992

// 2-3) Pattern : {zeros}{15-bit value}{ones}

993

// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,

994

// therefore we can take advantage of LI's sign-extension semantics, and then

995

// mask them off after rotation.

996

//

997

// +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+

998

// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|

999

// +------------------------+ +------------------------+

1000

// 63 0 63 0

1001

// Imm (Imm >> (48 - LZ) & 0xffff)

1002

// +----sext-----|--16-bit--+ +clear-|-----------------+

1003

// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|

1004

// +------------------------+ +------------------------+

1005

// 63 0 63 0

1006

// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ

1007

if ((LZ + TO) > 48) {

1008

// Since the immediates with (LZ > 32) have been handled by previous

1009

// patterns, here we have (LZ <= 32) to make sure we will not shift right

1010

// the Imm by a negative value.

1011

assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1011, __extension__
__PRETTY_FUNCTION__));

1012

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1013

getI32Imm((Imm >> (48 - LZ) & 0xffff)));

1014

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1015

getI32Imm(48 - LZ), getI32Imm(LZ));

1016

}

1017

// 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}

1018

// {ones}{15-bit value}{ones}

1019

// We can take advantage of LI's sign-extension semantics to generate leading

1020

// ones, and then use RLDICL to mask off the ones in left sides (if required)

1021

// after rotation.

1022

//

1023

// +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+

1024

// |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|

1025

// +------------------------+ +------------------------+

1026

// 63 0 63 0

1027

// Imm (Imm >> TO) & 0xffff

1028

// +----sext-----|--16-bit--+ +LZ|---------------------+

1029

// |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|

1030

// +------------------------+ +------------------------+

1031

// 63 0 63 0

1032

// LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ

1033

if ((LZ + FO + TO) > 48) {

1034

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1035

getI32Imm((Imm >> TO) & 0xffff));

1036

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1037

getI32Imm(TO), getI32Imm(LZ));

1038

}

1039

// 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}

1040

// If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit

1041

// value, we can use LI for Lo16 without generating leading ones then add the

1042

// Hi16(in Lo32).

1043

if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {

1044

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1045

getI32Imm(Lo32 & 0xffff));

1046

return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),

1047

getI32Imm(Lo32 >> 16));

1048

}

1049

// 2-6) Patterns : {******}{49 zeros}{******}

1050

// {******}{49 ones}{******}

1051

// If the Imm contains 49 consecutive zeros/ones, it means that a total of 15

1052

// bits remain on both sides. Rotate right the Imm to construct an int<16>

1053

// value, use LI for int<16> value and then use RLDICL without mask to rotate

1054

// it back.

1055

//

1056

// 1) findContiguousZerosAtLeast(Imm, 49)

1057

// +------|--zeros-|------+ +---ones--||---15 bit--+

1058

// |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|

1059

// +----------------------+ +----------------------+

1060

// 63 0 63 0

1061

//

1062

// 2) findContiguousZerosAtLeast(~Imm, 49)

1063

// +------|--ones--|------+ +---ones--||---15 bit--+

1064

// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|

1065

// +----------------------+ +----------------------+

1066

// 63 0 63 0

1067

if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||

1068

(Shift = findContiguousZerosAtLeast(~Imm, 49))) {

1069

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1070

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1071

getI32Imm(RotImm & 0xffff));

1072

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1073

getI32Imm(Shift), getI32Imm(0));

1074

}

1075

1076

// Following patterns use 3 instructions to materialize the Imm.

1077

InstCnt = 3;

1078

// 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}

1079

// {zeros}{31-bit value}{zeros}

1080

// {zeros}{ones}{31-bit value}

1081

// {ones}{31-bit value}{zeros}

1082

// We can take advantage of LIS's sign-extension semantics to generate leading

1083

// ones, add the remaining bits with ORI, and then use RLDIC to mask off the

1084

// ones in both sides after rotation.

1085

if ((LZ + FO + TZ) > 32) {

1086

uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;

1087

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1088

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1089

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1090

getI32Imm((Imm >> TZ) & 0xffff));

1091

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

1092

getI32Imm(TZ), getI32Imm(LZ));

1093

}

1094

// 3-2) Pattern : {zeros}{31-bit value}{ones}

1095

// Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits

1096

// value, therefore we can take advantage of LIS's sign-extension semantics,

1097

// add the remaining bits with ORI, and then mask them off after rotation.

1098

// This is similar to Pattern 2-3, please refer to the diagram there.

1099

if ((LZ + TO) > 32) {

1100

// Since the immediates with (LZ > 32) have been handled by previous

1101

// patterns, here we have (LZ <= 32) to make sure we will not shift right

1102

// the Imm by a negative value.

1103

assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1103, __extension__
__PRETTY_FUNCTION__));

1104

Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

1105

getI32Imm((Imm >> (48 - LZ)) & 0xffff));

1106

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1107

getI32Imm((Imm >> (32 - LZ)) & 0xffff));

1108

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1109

getI32Imm(32 - LZ), getI32Imm(LZ));

1110

}

1111

// 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}

1112

// {ones}{31-bit value}{ones}

1113

// We can take advantage of LIS's sign-extension semantics to generate leading

1114

// ones, add the remaining bits with ORI, and then use RLDICL to mask off the

1115

// ones in left sides (if required) after rotation.

1116

// This is similar to Pattern 2-4, please refer to the diagram there.

1117

if ((LZ + FO + TO) > 32) {

1118

Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

1119

getI32Imm((Imm >> (TO + 16)) & 0xffff));

1120

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1121

getI32Imm((Imm >> TO) & 0xffff));

1122

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1123

getI32Imm(TO), getI32Imm(LZ));

1124

}

1125

// 3-4) Patterns : High word == Low word

1126

if (Hi32 == Lo32) {

1127

// Handle the first 32 bits.

1128

uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;

1129

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1130

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1131

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1132

getI32Imm(Lo32 & 0xffff));

1133

// Use rldimi to insert the Low word into High word.

1134

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),

1135

getI32Imm(0)};

1136

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1137

}

1138

// 3-5) Patterns : {******}{33 zeros}{******}

1139

// {******}{33 ones}{******}

1140

// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31

1141

// bits remain on both sides. Rotate right the Imm to construct an int<32>

1142

// value, use LIS + ORI for int<32> value and then use RLDICL without mask to

1143

// rotate it back.

1144

// This is similar to Pattern 2-6, please refer to the diagram there.

1145

if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||

1146

(Shift = findContiguousZerosAtLeast(~Imm, 33))) {

1147

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1148

uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;

1149

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1150

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1151

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1152

getI32Imm(RotImm & 0xffff));

1153

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1154

getI32Imm(Shift), getI32Imm(0));

1155

}

1156

1157

InstCnt = 0;

1158

return nullptr;

1159

}

1160

1161

// Try to select instructions to generate a 64 bit immediate using prefix as

1162

// well as non prefix instructions. The function will return the SDNode

1163

// to materialize that constant or it will return nullptr if it does not

1164

// find one. The variable InstCnt is set to the number of instructions that

1165

// were selected.

1166

static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,

1167

uint64_t Imm, unsigned &InstCnt) {

1168

unsigned TZ = llvm::countr_zero<uint64_t>(Imm);

1169

unsigned LZ = llvm::countl_zero<uint64_t>(Imm);

1170

unsigned TO = llvm::countr_one<uint64_t>(Imm);

1171

unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));

1172

unsigned Hi32 = Hi_32(Imm);

1173

unsigned Lo32 = Lo_32(Imm);

1174

1175

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

1176

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

1177

};

1178

1179

auto getI64Imm = [CurDAG, dl](uint64_t Imm) {

1180

return CurDAG->getTargetConstant(Imm, dl, MVT::i64);

1181

};

1182

1183

// Following patterns use 1 instruction to materialize Imm.

1184

InstCnt = 1;

1185

1186

// The pli instruction can materialize up to 34 bits directly.

1187

// If a constant fits within 34-bits, emit the pli instruction here directly.

1188

if (isInt<34>(Imm))

1189

return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1190

CurDAG->getTargetConstant(Imm, dl, MVT::i64));

1191

1192

// Require at least two instructions.

1193

InstCnt = 2;

1194

SDNode *Result = nullptr;

1195

// Patterns : {zeros}{ones}{33-bit value}{zeros}

1196

// {zeros}{33-bit value}{zeros}

1197

// {zeros}{ones}{33-bit value}

1198

// {ones}{33-bit value}{zeros}

1199

// We can take advantage of PLI's sign-extension semantics to generate leading

1200

// ones, and then use RLDIC to mask off the ones on both sides after rotation.

1201

if ((LZ + FO + TZ) > 30) {

1202

APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);

1203

APInt Extended = SignedInt34.sext(64);

1204

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1205

getI64Imm(*Extended.getRawData()));

1206

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

1207

getI32Imm(TZ), getI32Imm(LZ));

1208

}

1209

// Pattern : {zeros}{33-bit value}{ones}

1210

// Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,

1211

// therefore we can take advantage of PLI's sign-extension semantics, and then

1212

// mask them off after rotation.

1213

//

1214

// +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+

1215

// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|

1216

// +------------------------+ +------------------------+

1217

// 63 0 63 0

1218

//

1219

// +----sext-----|--34-bit--+ +clear-|-----------------+

1220

// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|

1221

// +------------------------+ +------------------------+

1222

// 63 0 63 0

1223

if ((LZ + TO) > 30) {

1224

APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);

1225

APInt Extended = SignedInt34.sext(64);

1226

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1227

getI64Imm(*Extended.getRawData()));

1228

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1229

getI32Imm(30 - LZ), getI32Imm(LZ));

1230

}

1231

// Patterns : {zeros}{ones}{33-bit value}{ones}

1232

// {ones}{33-bit value}{ones}

1233

// Similar to LI we can take advantage of PLI's sign-extension semantics to

1234

// generate leading ones, and then use RLDICL to mask off the ones in left

1235

// sides (if required) after rotation.

1236

if ((LZ + FO + TO) > 30) {

1237

APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);

1238

APInt Extended = SignedInt34.sext(64);

1239

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1240

getI64Imm(*Extended.getRawData()));

1241

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1242

getI32Imm(TO), getI32Imm(LZ));

1243

}

1244

// Patterns : {******}{31 zeros}{******}

1245

// : {******}{31 ones}{******}

1246

// If Imm contains 31 consecutive zeros/ones then the remaining bit count

1247

// is 33. Rotate right the Imm to construct a int<33> value, we can use PLI

1248

// for the int<33> value and then use RLDICL without a mask to rotate it back.

1249

//

1250

// +------|--ones--|------+ +---ones--||---33 bit--+

1251

// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|

1252

// +----------------------+ +----------------------+

1253

// 63 0 63 0

1254

for (unsigned Shift = 0; Shift < 63; ++Shift) {

1255

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1256

if (isInt<34>(RotImm)) {

1257

Result =

1258

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));

1259

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

1260

SDValue(Result, 0), getI32Imm(Shift),

1261

getI32Imm(0));

1262

}

1263

}

1264

1265

// Patterns : High word == Low word

1266

// This is basically a splat of a 32 bit immediate.

1267

if (Hi32 == Lo32) {

1268

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));

1269

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),

1270

getI32Imm(0)};

1271

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1272

}

1273

1274

InstCnt = 3;

1275

// Catch-all

1276

// This pattern can form any 64 bit immediate in 3 instructions.

1277

SDNode *ResultHi =

1278

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));

1279

SDNode *ResultLo =

1280

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));

1281

SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),

1282

getI32Imm(0)};

1283

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1284

}

1285

1286

static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,

1287

unsigned *InstCnt = nullptr) {

1288

unsigned InstCntDirect = 0;

1289

// No more than 3 instructions are used if we can select the i64 immediate

1290

// directly.

1291

SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);

1292

1293

const PPCSubtarget &Subtarget =

1294

CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();

1295

1296

// If we have prefixed instructions and there is a chance we can

1297

// materialize the constant with fewer prefixed instructions than

1298

// non-prefixed, try that.

1299

if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {

1300

unsigned InstCntDirectP = 0;

1301

SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);

1302

// Use the prefix case in either of two cases:

1303

// 1) We have no result from the non-prefix case to use.

1304

// 2) The non-prefix case uses more instructions than the prefix case.

1305

// If the prefix and non-prefix cases use the same number of instructions

1306

// we will prefer the non-prefix case.

1307

if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {

1308

if (InstCnt)

1309

*InstCnt = InstCntDirectP;

1310

return ResultP;

1311

}

1312

}

1313

1314

if (Result) {

1315

if (InstCnt)

1316

*InstCnt = InstCntDirect;

1317

return Result;

1318

}

1319

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

1320

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

1321

};

1322

1323

uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;

1324

uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;

1325

1326

// Try to use 4 instructions to materialize the immediate which is "almost" a

1327

// splat of a 32 bit immediate.

1328

if (Hi16OfLo32 && Lo16OfLo32) {

1329

uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;

1330

uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;

1331

bool IsSelected = false;

1332

1333

auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {

1334

SDNode *Result =

1335

CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));

1336

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,

1337

SDValue(Result, 0), getI32Imm(Lo16));

1338

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),

1339

getI32Imm(0)};

1340

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1341

};

1342

1343

if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {

1344

IsSelected = true;

1345

Result = getSplat(Hi16OfLo32, Lo16OfLo32);

1346

// Modify Hi16OfHi32.

1347

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),

1348

getI32Imm(0)};

1349

Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1350

} else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {

1351

IsSelected = true;

1352

Result = getSplat(Hi16OfHi32, Lo16OfHi32);

1353

// Modify Lo16OfLo32.

1354

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),

1355

getI32Imm(16), getI32Imm(31)};

1356

Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);

1357

} else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {

1358

IsSelected = true;

1359

Result = getSplat(Hi16OfHi32, Lo16OfHi32);

1360

// Modify Hi16OfLo32.

1361

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),

1362

getI32Imm(0), getI32Imm(15)};

1363

Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);

1364

}

1365

if (IsSelected == true) {

1366

if (InstCnt)

1367

*InstCnt = 4;

1368

return Result;

1369

}

1370

}

1371

1372

// Handle the upper 32 bit value.

1373

Result =

1374

selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);

1375

// Add in the last bits as required.

1376

if (Hi16OfLo32) {

1377

Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,

1378

SDValue(Result, 0), getI32Imm(Hi16OfLo32));

1379

++InstCntDirect;

1380

}

1381

if (Lo16OfLo32) {

1382

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1383

getI32Imm(Lo16OfLo32));

1384

++InstCntDirect;

1385

}

1386

if (InstCnt)

1387

*InstCnt = InstCntDirect;

1388

return Result;

1389

}

1390

1391

// Select a 64-bit constant.

1392

static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {

1393

SDLoc dl(N);

1394

1395

// Get 64 bit value.

1396

int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();

1397

if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {

1398

uint64_t SextImm = SignExtend64(Imm, MinSize);

1399

SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);

1400

if (isInt<16>(SextImm))

1401

return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);

1402

}

1403

return selectI64Imm(CurDAG, dl, Imm);

1404

}

1405

1406

namespace {

1407

1408

class BitPermutationSelector {

1409

struct ValueBit {

1410

SDValue V;

1411

1412

// The bit number in the value, using a convention where bit 0 is the

1413

// lowest-order bit.

1414

unsigned Idx;

1415

1416

// ConstZero means a bit we need to mask off.

1417

// Variable is a bit comes from an input variable.

1418

// VariableKnownToBeZero is also a bit comes from an input variable,

1419

// but it is known to be already zero. So we do not need to mask them.

1420

enum Kind {

1421

ConstZero,

1422

Variable,

1423

VariableKnownToBeZero

1424

} K;

1425

1426

ValueBit(SDValue V, unsigned I, Kind K = Variable)

1427

: V(V), Idx(I), K(K) {}

1428

ValueBit(Kind K = Variable) : Idx(UINT32_MAX(4294967295U)), K(K) {}

1429

1430

bool isZero() const {

1431

return K == ConstZero || K == VariableKnownToBeZero;

1432

}

1433

1434

bool hasValue() const {

1435

return K == Variable || K == VariableKnownToBeZero;

1436

}

1437

1438

SDValue getValue() const {

1439

assert(hasValue() && "Cannot get the value of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value of a constant bit\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1439, __extension__
__PRETTY_FUNCTION__));

1440

return V;

1441

}

1442

1443

unsigned getValueBitIndex() const {

1444

assert(hasValue() && "Cannot get the value bit index of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value bit index of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value bit index of a constant bit\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1444, __extension__
__PRETTY_FUNCTION__));

1445

return Idx;

1446

}

1447

};

1448

1449

// A bit group has the same underlying value and the same rotate factor.

1450

struct BitGroup {

1451

SDValue V;

1452

unsigned RLAmt;

1453

unsigned StartIdx, EndIdx;

1454

1455

// This rotation amount assumes that the lower 32 bits of the quantity are

1456

// replicated in the high 32 bits by the rotation operator (which is done

1457

// by rlwinm and friends in 64-bit mode).

1458

bool Repl32;

1459

// Did converting to Repl32 == true change the rotation factor? If it did,

1460

// it decreased it by 32.

1461

bool Repl32CR;

1462

// Was this group coalesced after setting Repl32 to true?

1463

bool Repl32Coalesced;

1464

1465

BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)

1466

: V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),

1467

Repl32Coalesced(false) {

1468

LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << Rdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false)

1469

<< " [" << S << ", " << E << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false);

1470

}

1471

};

1472

1473

// Information on each (Value, RLAmt) pair (like the number of groups

1474

// associated with each) used to choose the lowering method.

1475

struct ValueRotInfo {

1476

SDValue V;

1477

unsigned RLAmt = std::numeric_limits<unsigned>::max();

1478

unsigned NumGroups = 0;

1479

unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();

1480

bool Repl32 = false;

1481

1482

ValueRotInfo() = default;

1483

1484

// For sorting (in reverse order) by NumGroups, and then by

1485

// FirstGroupStartIdx.

1486

bool operator < (const ValueRotInfo &Other) const {

1487

// We need to sort so that the non-Repl32 come first because, when we're

1488

// doing masking, the Repl32 bit groups might be subsumed into the 64-bit

1489

// masking operation.

1490

if (Repl32 < Other.Repl32)

1491

return true;

1492

else if (Repl32 > Other.Repl32)

1493

return false;

1494

else if (NumGroups > Other.NumGroups)

1495

return true;

1496

else if (NumGroups < Other.NumGroups)

1497

return false;

1498

else if (RLAmt == 0 && Other.RLAmt != 0)

1499

return true;

1500

else if (RLAmt != 0 && Other.RLAmt == 0)

1501

return false;

1502

else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)

1503

return true;

1504

return false;

1505

}

1506

};

1507

1508

using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;

1509

using ValueBitsMemoizer =

1510

DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;

1511

ValueBitsMemoizer Memoizer;

1512

1513

// Return a pair of bool and a SmallVector pointer to a memoization entry.

1514

// The bool is true if something interesting was deduced, otherwise if we're

1515

// providing only a generic representation of V (or something else likewise

1516

// uninteresting for instruction selection) through the SmallVector.

1517

std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,

1518

unsigned NumBits) {

1519

auto &ValueEntry = Memoizer[V];

1520

if (ValueEntry)

1521

return std::make_pair(ValueEntry->first, &ValueEntry->second);

1522

ValueEntry.reset(new ValueBitsMemoizedValue());

1523

bool &Interesting = ValueEntry->first;

1524

SmallVector<ValueBit, 64> &Bits = ValueEntry->second;

1525

Bits.resize(NumBits);

1526

1527

switch (V.getOpcode()) {

1528

default: break;

1529

case ISD::ROTL:

1530

if (isa<ConstantSDNode>(V.getOperand(1))) {

1531

unsigned RotAmt = V.getConstantOperandVal(1);

1532

1533

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1534

1535

for (unsigned i = 0; i < NumBits; ++i)

1536

Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];

1537

1538

return std::make_pair(Interesting = true, &Bits);

1539

}

1540

break;

1541

case ISD::SHL:

1542

case PPCISD::SHL:

1543

if (isa<ConstantSDNode>(V.getOperand(1))) {

1544

unsigned ShiftAmt = V.getConstantOperandVal(1);

1545

1546

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1547

1548

for (unsigned i = ShiftAmt; i < NumBits; ++i)

1549

Bits[i] = LHSBits[i - ShiftAmt];

1550

1551

for (unsigned i = 0; i < ShiftAmt; ++i)

1552

Bits[i] = ValueBit(ValueBit::ConstZero);

1553

1554

return std::make_pair(Interesting = true, &Bits);

1555

}

1556

break;

1557

case ISD::SRL:

1558

case PPCISD::SRL:

1559

if (isa<ConstantSDNode>(V.getOperand(1))) {

1560

unsigned ShiftAmt = V.getConstantOperandVal(1);

1561

1562

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1563

1564

for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)

1565

Bits[i] = LHSBits[i + ShiftAmt];

1566

1567

for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)

1568

Bits[i] = ValueBit(ValueBit::ConstZero);

1569

1570

return std::make_pair(Interesting = true, &Bits);

1571

}

1572

break;

1573

case ISD::AND:

1574

if (isa<ConstantSDNode>(V.getOperand(1))) {

1575

uint64_t Mask = V.getConstantOperandVal(1);

1576

1577

const SmallVector<ValueBit, 64> *LHSBits;

1578

// Mark this as interesting, only if the LHS was also interesting. This

1579

// prevents the overall procedure from matching a single immediate 'and'

1580

// (which is non-optimal because such an and might be folded with other

1581

// things if we don't select it here).

1582

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);

1583

1584

for (unsigned i = 0; i < NumBits; ++i)

1585

if (((Mask >> i) & 1) == 1)

1586

Bits[i] = (*LHSBits)[i];

1587

else {

1588

// AND instruction masks this bit. If the input is already zero,

1589

// we have nothing to do here. Otherwise, make the bit ConstZero.

1590

if ((*LHSBits)[i].isZero())

1591

Bits[i] = (*LHSBits)[i];

1592

else

1593

Bits[i] = ValueBit(ValueBit::ConstZero);

1594

}

1595

1596

return std::make_pair(Interesting, &Bits);

1597

}

1598

break;

1599

case ISD::OR: {

1600

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1601

const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;

1602

1603

bool AllDisjoint = true;

1604

SDValue LastVal = SDValue();

1605

unsigned LastIdx = 0;

1606

for (unsigned i = 0; i < NumBits; ++i) {

1607

if (LHSBits[i].isZero() && RHSBits[i].isZero()) {

1608

// If both inputs are known to be zero and one is ConstZero and

1609

// another is VariableKnownToBeZero, we can select whichever

1610

// we like. To minimize the number of bit groups, we select

1611

// VariableKnownToBeZero if this bit is the next bit of the same

1612

// input variable from the previous bit. Otherwise, we select

1613

// ConstZero.

1614

if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&

1615

LHSBits[i].getValueBitIndex() == LastIdx + 1)

1616

Bits[i] = LHSBits[i];

1617

else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&

1618

RHSBits[i].getValueBitIndex() == LastIdx + 1)

1619

Bits[i] = RHSBits[i];

1620

else

1621

Bits[i] = ValueBit(ValueBit::ConstZero);

1622

}

1623

else if (LHSBits[i].isZero())

1624

Bits[i] = RHSBits[i];

1625

else if (RHSBits[i].isZero())

1626

Bits[i] = LHSBits[i];

1627

else {

1628

AllDisjoint = false;

1629

break;

1630

}

1631

// We remember the value and bit index of this bit.

1632

if (Bits[i].hasValue()) {

1633

LastVal = Bits[i].getValue();

1634

LastIdx = Bits[i].getValueBitIndex();

1635

}

1636

else {

1637

if (LastVal) LastVal = SDValue();

1638

LastIdx = 0;

1639

}

1640

}

1641

1642

if (!AllDisjoint)

1643

break;

1644

1645

return std::make_pair(Interesting = true, &Bits);

1646

}

1647

case ISD::ZERO_EXTEND: {

1648

// We support only the case with zero extension from i32 to i64 so far.

1649

if (V.getValueType() != MVT::i64 ||

1650

V.getOperand(0).getValueType() != MVT::i32)

1651

break;

1652

1653

const SmallVector<ValueBit, 64> *LHSBits;

1654

const unsigned NumOperandBits = 32;

1655

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),

1656

NumOperandBits);

1657

1658

for (unsigned i = 0; i < NumOperandBits; ++i)

1659

Bits[i] = (*LHSBits)[i];

1660

1661

for (unsigned i = NumOperandBits; i < NumBits; ++i)

1662

Bits[i] = ValueBit(ValueBit::ConstZero);

1663

1664

return std::make_pair(Interesting, &Bits);

1665

}

1666

case ISD::TRUNCATE: {

1667

EVT FromType = V.getOperand(0).getValueType();

1668

EVT ToType = V.getValueType();

1669

// We support only the case with truncate from i64 to i32.

1670

if (FromType != MVT::i64 || ToType != MVT::i32)

1671

break;

1672

const unsigned NumAllBits = FromType.getSizeInBits();

1673

SmallVector<ValueBit, 64> *InBits;

1674

std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),

1675

NumAllBits);

1676

const unsigned NumValidBits = ToType.getSizeInBits();

1677

1678

// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.

1679

// So, we cannot include this truncate.

1680

bool UseUpper32bit = false;

1681

for (unsigned i = 0; i < NumValidBits; ++i)

1682

if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {

1683

UseUpper32bit = true;

1684

break;

1685

}

1686

if (UseUpper32bit)

1687

break;

1688

1689

for (unsigned i = 0; i < NumValidBits; ++i)

1690

Bits[i] = (*InBits)[i];

1691

1692

return std::make_pair(Interesting, &Bits);

1693

}

1694

case ISD::AssertZext: {

1695

// For AssertZext, we look through the operand and

1696

// mark the bits known to be zero.

1697

const SmallVector<ValueBit, 64> *LHSBits;

1698

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),

1699

NumBits);

1700

1701

EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();

1702

const unsigned NumValidBits = FromType.getSizeInBits();

1703

for (unsigned i = 0; i < NumValidBits; ++i)

1704

Bits[i] = (*LHSBits)[i];

1705

1706

// These bits are known to be zero but the AssertZext may be from a value

1707

// that already has some constant zero bits (i.e. from a masking and).

1708

for (unsigned i = NumValidBits; i < NumBits; ++i)

1709

Bits[i] = (*LHSBits)[i].hasValue()

1710

? ValueBit((*LHSBits)[i].getValue(),

1711

(*LHSBits)[i].getValueBitIndex(),

1712

ValueBit::VariableKnownToBeZero)

1713

: ValueBit(ValueBit::ConstZero);

1714

1715

return std::make_pair(Interesting, &Bits);

1716

}

1717

case ISD::LOAD:

1718

LoadSDNode *LD = cast<LoadSDNode>(V);

1719

if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {

1720

EVT VT = LD->getMemoryVT();

1721

const unsigned NumValidBits = VT.getSizeInBits();

1722

1723

for (unsigned i = 0; i < NumValidBits; ++i)

1724

Bits[i] = ValueBit(V, i);

1725

1726

// These bits are known to be zero.

1727

for (unsigned i = NumValidBits; i < NumBits; ++i)

1728

Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);

1729

1730

// Zero-extending load itself cannot be optimized. So, it is not

1731

// interesting by itself though it gives useful information.

1732

return std::make_pair(Interesting = false, &Bits);

1733

}

1734

break;

1735

}

1736

1737

for (unsigned i = 0; i < NumBits; ++i)

1738

Bits[i] = ValueBit(V, i);

1739

1740

return std::make_pair(Interesting = false, &Bits);

1741

}

1742

1743

// For each value (except the constant ones), compute the left-rotate amount

1744

// to get it from its original to final position.

1745

void computeRotationAmounts() {

1746

NeedMask = false;

1747

RLAmt.resize(Bits.size());

1748

for (unsigned i = 0; i < Bits.size(); ++i)

1749

if (Bits[i].hasValue()) {

1750

unsigned VBI = Bits[i].getValueBitIndex();

1751

if (i >= VBI)

1752

RLAmt[i] = i - VBI;

1753

else

1754

RLAmt[i] = Bits.size() - (VBI - i);

1755

} else if (Bits[i].isZero()) {

1756

NeedMask = true;

1757

RLAmt[i] = UINT32_MAX(4294967295U);

1758

} else {

1759

llvm_unreachable("Unknown value bit type")::llvm::llvm_unreachable_internal("Unknown value bit type", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1759);

1760

}

1761

}

1762

1763

// Collect groups of consecutive bits with the same underlying value and

1764

// rotation factor. If we're doing late masking, we ignore zeros, otherwise

1765

// they break up groups.

1766

void collectBitGroups(bool LateMask) {

1767

BitGroups.clear();

1768

1769

unsigned LastRLAmt = RLAmt[0];

1770

SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();

1771

unsigned LastGroupStartIdx = 0;

1772

bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

1773

for (unsigned i = 1; i < Bits.size(); ++i) {

1774

unsigned ThisRLAmt = RLAmt[i];

1775

SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();

1776

if (LateMask && !ThisValue) {

1777

ThisValue = LastValue;

1778

ThisRLAmt = LastRLAmt;

1779

// If we're doing late masking, then the first bit group always starts

1780

// at zero (even if the first bits were zero).

1781

if (BitGroups.empty())

1782

LastGroupStartIdx = 0;

1783

}

1784

1785

// If this bit is known to be zero and the current group is a bit group

1786

// of zeros, we do not need to terminate the current bit group even the

1787

// Value or RLAmt does not match here. Instead, we terminate this group

1788

// when the first non-zero bit appears later.

1789

if (IsGroupOfZeros && Bits[i].isZero())

1790

continue;

1791

1792

// If this bit has the same underlying value and the same rotate factor as

1793

// the last one, then they're part of the same group.

1794

if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)

1795

// We cannot continue the current group if this bits is not known to

1796

// be zero in a bit group of zeros.

1797

if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))

1798

continue;

1799

1800

if (LastValue.getNode())

1801

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

1802

i-1));

1803

LastRLAmt = ThisRLAmt;

1804

LastValue = ThisValue;

1805

LastGroupStartIdx = i;

1806

IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

1807

}

1808

if (LastValue.getNode())

1809

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

1810

Bits.size()-1));

1811

1812

if (BitGroups.empty())

1813

return;

1814

1815

// We might be able to combine the first and last groups.

1816

if (BitGroups.size() > 1) {

1817

// If the first and last groups are the same, then remove the first group

1818

// in favor of the last group, making the ending index of the last group

1819

// equal to the ending index of the to-be-removed first group.

1820

if (BitGroups[0].StartIdx == 0 &&

1821

BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&

1822

BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&

1823

BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {

1824

LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining final bit group with initial one\n"
; } } while (false);

1825

BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;

1826

BitGroups.erase(BitGroups.begin());

1827

}

1828

}

1829

}

1830

1831

// Take all (SDValue, RLAmt) pairs and sort them by the number of groups

1832

// associated with each. If the number of groups are same, we prefer a group

1833

// which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate

1834

// instruction. If there is a degeneracy, pick the one that occurs

1835

// first (in the final value).

1836

void collectValueRotInfo() {

1837

ValueRots.clear();

1838

1839

for (auto &BG : BitGroups) {

1840

unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);

1841

ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];

1842

VRI.V = BG.V;

1843

VRI.RLAmt = BG.RLAmt;

1844

VRI.Repl32 = BG.Repl32;

1845

VRI.NumGroups += 1;

1846

VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);

1847

}

1848

1849

// Now that we've collected the various ValueRotInfo instances, we need to

1850

// sort them.

1851

ValueRotsVec.clear();

1852

for (auto &I : ValueRots) {

1853

ValueRotsVec.push_back(I.second);

1854

}

1855

llvm::sort(ValueRotsVec);

1856

}

1857

1858

// In 64-bit mode, rlwinm and friends have a rotation operator that

1859

// replicates the low-order 32 bits into the high-order 32-bits. The mask

1860

// indices of these instructions can only be in the lower 32 bits, so they

1861

// can only represent some 64-bit bit groups. However, when they can be used,

1862

// the 32-bit replication can be used to represent, as a single bit group,

1863

// otherwise separate bit groups. We'll convert to replicated-32-bit bit

1864

// groups when possible. Returns true if any of the bit groups were

1865

// converted.

1866

void assignRepl32BitGroups() {

1867

// If we have bits like this:

1868

//

1869

// Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0

1870

// V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24

1871

// Groups: | RLAmt = 8 | RLAmt = 40 |

1872

//

1873

// But, making use of a 32-bit operation that replicates the low-order 32

1874

// bits into the high-order 32 bits, this can be one bit group with a RLAmt

1875

// of 8.

1876

1877

auto IsAllLow32 = [this](BitGroup & BG) {

1878

if (BG.StartIdx <= BG.EndIdx) {

1879

for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {

1880

if (!Bits[i].hasValue())

1881

continue;

1882

if (Bits[i].getValueBitIndex() >= 32)

1883

return false;

1884

}

1885

} else {

1886

for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {

1887

if (!Bits[i].hasValue())

1888

continue;

1889

if (Bits[i].getValueBitIndex() >= 32)

1890

return false;

1891

}

1892

for (unsigned i = 0; i <= BG.EndIdx; ++i) {

1893

if (!Bits[i].hasValue())

1894

continue;

1895

if (Bits[i].getValueBitIndex() >= 32)

1896

return false;

1897

}

1898

}

1899

1900

return true;

1901

};

1902

1903

for (auto &BG : BitGroups) {

1904

// If this bit group has RLAmt of 0 and will not be merged with

1905

// another bit group, we don't benefit from Repl32. We don't mark

1906

// such group to give more freedom for later instruction selection.

1907

if (BG.RLAmt == 0) {

1908

auto PotentiallyMerged = [this](BitGroup & BG) {

1909

for (auto &BG2 : BitGroups)

1910

if (&BG != &BG2 && BG.V == BG2.V &&

1911

(BG2.RLAmt == 0 || BG2.RLAmt == 32))

1912

return true;

1913

return false;

1914

};

1915

if (!PotentiallyMerged(BG))

1916

continue;

1917

}

1918

if (BG.StartIdx < 32 && BG.EndIdx < 32) {

1919

if (IsAllLow32(BG)) {

1920

if (BG.RLAmt >= 32) {

1921

BG.RLAmt -= 32;

1922

BG.Repl32CR = true;

1923

}

1924

1925

BG.Repl32 = true;

1926

1927

LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)

1928

<< BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)

1929

<< BG.StartIdx << ", " << BG.EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false);

1930

}

1931

}

1932

}

1933

1934

// Now walk through the bit groups, consolidating where possible.

1935

for (auto I = BitGroups.begin(); I != BitGroups.end();) {

1936

// We might want to remove this bit group by merging it with the previous

1937

// group (which might be the ending group).

1938

auto IP = (I == BitGroups.begin()) ?

1939

std::prev(BitGroups.end()) : std::prev(I);

1940

if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&

1941

I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {

1942

1943

LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1944

<< I->V.getNode() << " RLAmt = " << I->RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1945

<< I->StartIdx << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1946

<< "] with group with range [" << IP->StartIdx << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1947

<< IP->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false);

1948

1949

IP->EndIdx = I->EndIdx;

1950

IP->Repl32CR = IP->Repl32CR || I->Repl32CR;

1951

IP->Repl32Coalesced = true;

1952

I = BitGroups.erase(I);

1953

continue;

1954

} else {

1955

// There is a special case worth handling: If there is a single group

1956

// covering the entire upper 32 bits, and it can be merged with both

1957

// the next and previous groups (which might be the same group), then

1958

// do so. If it is the same group (so there will be only one group in

1959

// total), then we need to reverse the order of the range so that it

1960

// covers the entire 64 bits.

1961

if (I->StartIdx == 32 && I->EndIdx == 63) {

1962

assert(std::next(I) == BitGroups.end() &&(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1963, __extension__
__PRETTY_FUNCTION__))

1963

"bit group ends at index 63 but there is another?")(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1963, __extension__
__PRETTY_FUNCTION__));

1964

auto IN = BitGroups.begin();

1965

1966

if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&

1967

(I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&

1968

IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&

1969

IsAllLow32(*I)) {

1970

1971

LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false)

1972

<< " RLAmt = " << I->RLAmt << " [" << I->StartIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false)

1973

<< ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false)

1974

<< "] with 32-bit replicated groups with ranges ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false)

1975

<< IP->StartIdx << ", " << IP->EndIdx << "] and ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false)

1976

<< IN->StartIdx << ", " << IN->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tcombining bit group for " <<
I->V.getNode() << " RLAmt = " << I->RLAmt <<
" [" << I->StartIdx << ", " << I->EndIdx
<< "] with 32-bit replicated groups with ranges [" <<
IP->StartIdx << ", " << IP->EndIdx <<
"] and [" << IN->StartIdx << ", " << IN
->EndIdx << "]\n"; } } while (false);

1977

1978

if (IP == IN) {

1979

// There is only one other group; change it to cover the whole

1980

// range (backward, so that it can still be Repl32 but cover the

1981

// whole 64-bit range).

1982

IP->StartIdx = 31;

1983

IP->EndIdx = 30;

1984

IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;

1985

IP->Repl32Coalesced = true;

1986

I = BitGroups.erase(I);

1987

} else {

1988

// There are two separate groups, one before this group and one

1989

// after us (at the beginning). We're going to remove this group,

1990

// but also the group at the very beginning.

1991

IP->EndIdx = IN->EndIdx;

1992

IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;

1993

IP->Repl32Coalesced = true;

1994

I = BitGroups.erase(I);

1995

BitGroups.erase(BitGroups.begin());

1996

}

1997

1998

// This must be the last group in the vector (and we might have

1999

// just invalidated the iterator above), so break here.

2000

break;

2001

}

2002

}

2003

}

2004

2005

++I;

2006

}

2007

}

2008

2009

SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {

2010

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

2011

}

2012

2013

uint64_t getZerosMask() {

2014

uint64_t Mask = 0;

2015

for (unsigned i = 0; i < Bits.size(); ++i) {

2016

if (Bits[i].hasValue())

2017

continue;

2018

Mask |= (UINT64_C(1)1UL << i);

2019

}

2020

2021

return ~Mask;

2022

}

2023

2024

// This method extends an input value to 64 bit if input is 32-bit integer.

2025

// While selecting instructions in BitPermutationSelector in 64-bit mode,

2026

// an input value can be a 32-bit integer if a ZERO_EXTEND node is included.

2027

// In such case, we extend it to 64 bit to be consistent with other values.

2028

SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {

2029

if (V.getValueSizeInBits() == 64)

2030

return V;

2031

2032

assert(V.getValueSizeInBits() == 32)(static_cast <bool> (V.getValueSizeInBits() == 32) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 32", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2032, __extension__ __PRETTY_FUNCTION__));

2033

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

2034

SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,

2035

MVT::i64), 0);

2036

SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,

2037

MVT::i64, ImDef, V,

2038

SubRegIdx), 0);

2039

return ExtVal;

2040

}

2041

2042

SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {

2043

if (V.getValueSizeInBits() == 32)

2044

return V;

2045

2046

assert(V.getValueSizeInBits() == 64)(static_cast <bool> (V.getValueSizeInBits() == 64) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 64", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2046, __extension__ __PRETTY_FUNCTION__));

2047

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

2048

SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,

2049

MVT::i32, V, SubRegIdx), 0);

2050

return SubVal;

2051

}

2052

2053

// Depending on the number of groups for a particular value, it might be

2054

// better to rotate, mask explicitly (using andi/andis), and then or the

2055

// result. Select this part of the result first.

2056

void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {

2057

if (BPermRewriterNoMasking)

2058

return;

2059

2060

for (ValueRotInfo &VRI : ValueRotsVec) {

2061

unsigned Mask = 0;

2062

for (unsigned i = 0; i < Bits.size(); ++i) {

2063

if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)

2064

continue;

2065

if (RLAmt[i] != VRI.RLAmt)

2066

continue;

2067

Mask |= (1u << i);

2068

}

2069

2070

// Compute the masks for andi/andis that would be necessary.

2071

unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;

2072

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2073, __extension__
__PRETTY_FUNCTION__))

2073

"No set bits in mask for value bit groups")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2073, __extension__
__PRETTY_FUNCTION__));

2074

bool NeedsRotate = VRI.RLAmt != 0;

2075

2076

// We're trying to minimize the number of instructions. If we have one

2077

// group, using one of andi/andis can break even. If we have three

2078

// groups, we can use both andi and andis and break even (to use both

2079

// andi and andis we also need to or the results together). We need four

2080

// groups if we also need to rotate. To use andi/andis we need to do more

2081

// than break even because rotate-and-mask instructions tend to be easier

2082

// to schedule.

2083

2084

// FIXME: We've biased here against using andi/andis, which is right for

2085

// POWER cores, but not optimal everywhere. For example, on the A2,

2086

// andi/andis have single-cycle latency whereas the rotate-and-mask

2087

// instructions take two cycles, and it would be better to bias toward

2088

// andi/andis in break-even cases.

2089

2090

unsigned NumAndInsts = (unsigned) NeedsRotate +

2091

(unsigned) (ANDIMask != 0) +

2092

(unsigned) (ANDISMask != 0) +

2093

(unsigned) (ANDIMask != 0 && ANDISMask != 0) +

2094

(unsigned) (bool) Res;

2095

2096

LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
":" << "\n\t\t\tisel using masking: " << NumAndInsts
<< " using rotates: " << VRI.NumGroups << "\n"
; } } while (false)

2097

<< " RL: " << VRI.RLAmt << ":"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
":" << "\n\t\t\tisel using masking: " << NumAndInsts
<< " using rotates: " << VRI.NumGroups << "\n"
; } } while (false)

2098

<< "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
":" << "\n\t\t\tisel using masking: " << NumAndInsts
<< " using rotates: " << VRI.NumGroups << "\n"
; } } while (false)

2099

<< " using rotates: " << VRI.NumGroups << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
":" << "\n\t\t\tisel using masking: " << NumAndInsts
<< " using rotates: " << VRI.NumGroups << "\n"
; } } while (false);

2100

2101

if (NumAndInsts >= VRI.NumGroups)

2102

continue;

2103

2104

LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\t\t\tusing masking\n"; } }
while (false);

2105

2106

if (InstCnt) *InstCnt += NumAndInsts;

2107

2108

SDValue VRot;

2109

if (VRI.RLAmt) {

2110

SDValue Ops[] =

2111

{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

2112

getI32Imm(0, dl), getI32Imm(31, dl) };

2113

VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

2114

Ops), 0);

2115

} else {

2116

VRot = TruncateToInt32(VRI.V, dl);

2117

}

2118

2119

SDValue ANDIVal, ANDISVal;

2120

if (ANDIMask != 0)

2121

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,

2122

VRot, getI32Imm(ANDIMask, dl)),

2123

0);

2124

if (ANDISMask != 0)

2125

ANDISVal =

2126

SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,

2127

getI32Imm(ANDISMask, dl)),

2128

0);

2129

2130

SDValue TotalVal;

2131

if (!ANDIVal)

2132

TotalVal = ANDISVal;

2133

else if (!ANDISVal)

2134

TotalVal = ANDIVal;

2135

else

2136

TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2137

ANDIVal, ANDISVal), 0);

2138

2139

if (!Res)

2140

Res = TotalVal;

2141

else

2142

Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2143

Res, TotalVal), 0);

2144

2145

// Now, remove all groups with this underlying value and rotation

2146

// factor.

2147

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2148

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;

2149

});

2150

}

2151

}

2152

2153

// Instruction selection for the 32-bit case.

2154

SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {

2155

SDLoc dl(N);

2156

SDValue Res;

2157

2158

if (InstCnt) *InstCnt = 0;

2159

2160

// Take care of cases that should use andi/andis first.

2161

SelectAndParts32(dl, Res, InstCnt);

2162

2163

// If we've not yet selected a 'starting' instruction, and we have no zeros

2164

// to fill in, select the (Value, RLAmt) with the highest priority (largest

2165

// number of groups), and start with this rotated value.

2166

if ((!NeedMask || LateMask) && !Res) {

2167

ValueRotInfo &VRI = ValueRotsVec[0];

2168

if (VRI.RLAmt) {

2169

if (InstCnt) *InstCnt += 1;

2170

SDValue Ops[] =

2171

{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

2172

getI32Imm(0, dl), getI32Imm(31, dl) };

2173

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),

2174

0);

2175

} else {

2176

Res = TruncateToInt32(VRI.V, dl);

2177

}

2178

2179

// Now, remove all groups with this underlying value and rotation factor.

2180

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2181

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;

2182

});

2183

}

2184

2185

if (InstCnt) *InstCnt += BitGroups.size();

2186

2187

// Insert the other groups (one at a time).

2188

for (auto &BG : BitGroups) {

2189

if (!Res) {

2190

SDValue Ops[] =

2191

{ TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

2192

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

2193

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

2194

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

2195

} else {

2196

SDValue Ops[] =

2197

{ Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

2198

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

2199

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

2200

Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);

2201

}

2202

}

2203

2204

if (LateMask) {

2205

unsigned Mask = (unsigned) getZerosMask();

2206

2207

unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;

2208

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2209, __extension__
__PRETTY_FUNCTION__))

2209

"No set bits in zeros mask?")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2209, __extension__
__PRETTY_FUNCTION__));

2210

2211

if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +

2212

(unsigned) (ANDISMask != 0) +

2213

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2214

2215

SDValue ANDIVal, ANDISVal;

2216

if (ANDIMask != 0)

2217

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,

2218

Res, getI32Imm(ANDIMask, dl)),

2219

0);

2220

if (ANDISMask != 0)

2221

ANDISVal =

2222

SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,

2223

getI32Imm(ANDISMask, dl)),

2224

0);

2225

2226

if (!ANDIVal)

2227

Res = ANDISVal;

2228

else if (!ANDISVal)

2229

Res = ANDIVal;

2230

else

2231

Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2232

ANDIVal, ANDISVal), 0);

2233

}

2234

2235

return Res.getNode();

2236

}

2237

2238

unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,

2239

unsigned MaskStart, unsigned MaskEnd,

2240

bool IsIns) {

2241

// In the notation used by the instructions, 'start' and 'end' are reversed

2242

// because bits are counted from high to low order.

2243

unsigned InstMaskStart = 64 - MaskEnd - 1,

2244

InstMaskEnd = 64 - MaskStart - 1;

2245

2246

if (Repl32)

2247

return 1;

2248

2249

if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||

2250

InstMaskEnd == 63 - RLAmt)

2251

return 1;

2252

2253

return 2;

2254

}

2255

2256

// For 64-bit values, not all combinations of rotates and masks are

2257

// available. Produce one if it is available.

2258

SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,

2259

bool Repl32, unsigned MaskStart, unsigned MaskEnd,

2260

unsigned *InstCnt = nullptr) {

2261

// In the notation used by the instructions, 'start' and 'end' are reversed

2262

// because bits are counted from high to low order.

2263

unsigned InstMaskStart = 64 - MaskEnd - 1,

2264

InstMaskEnd = 64 - MaskStart - 1;

2265

2266

if (InstCnt) *InstCnt += 1;

2267

2268

if (Repl32) {

2269

// This rotation amount assumes that the lower 32 bits of the quantity

2270

// are replicated in the high 32 bits by the rotation operator (which is

2271

// done by rlwinm and friends).

2272

assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2272, __extension__
__PRETTY_FUNCTION__));

2273

assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2273, __extension__
__PRETTY_FUNCTION__));

2274

SDValue Ops[] =

2275

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2276

getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };

2277

return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,

2278

Ops), 0);

2279

}

2280

2281

if (InstMaskEnd == 63) {

2282

SDValue Ops[] =

2283

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2284

getI32Imm(InstMaskStart, dl) };

2285

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);

2286

}

2287

2288

if (InstMaskStart == 0) {

2289

SDValue Ops[] =

2290

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2291

getI32Imm(InstMaskEnd, dl) };

2292

return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);

2293

}

2294

2295

if (InstMaskEnd == 63 - RLAmt) {

2296

SDValue Ops[] =

2297

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2298

getI32Imm(InstMaskStart, dl) };

2299

return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);

2300

}

2301

2302

// We cannot do this with a single instruction, so we'll use two. The

2303

// problem is that we're not free to choose both a rotation amount and mask

2304

// start and end independently. We can choose an arbitrary mask start and

2305

// end, but then the rotation amount is fixed. Rotation, however, can be

2306

// inverted, and so by applying an "inverse" rotation first, we can get the

2307

// desired result.

2308

if (InstCnt) *InstCnt += 1;

2309

2310

// The rotation mask for the second instruction must be MaskStart.

2311

unsigned RLAmt2 = MaskStart;

2312

// The first instruction must rotate V so that the overall rotation amount

2313

// is RLAmt.

2314

unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;

2315

if (RLAmt1)

2316

V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);

2317

return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);

2318

}

2319

2320

// For 64-bit values, not all combinations of rotates and masks are

2321

// available. Produce a rotate-mask-and-insert if one is available.

2322

SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,

2323

unsigned RLAmt, bool Repl32, unsigned MaskStart,

2324

unsigned MaskEnd, unsigned *InstCnt = nullptr) {

2325

// In the notation used by the instructions, 'start' and 'end' are reversed

2326

// because bits are counted from high to low order.

2327

unsigned InstMaskStart = 64 - MaskEnd - 1,

2328

InstMaskEnd = 64 - MaskStart - 1;

2329

2330

if (InstCnt) *InstCnt += 1;

2331

2332

if (Repl32) {

2333

// This rotation amount assumes that the lower 32 bits of the quantity

2334

// are replicated in the high 32 bits by the rotation operator (which is

2335

// done by rlwinm and friends).

2336

assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2336, __extension__
__PRETTY_FUNCTION__));

2337

assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2337, __extension__
__PRETTY_FUNCTION__));

2338

SDValue Ops[] =

2339

{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2340

getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };

2341

return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,

2342

Ops), 0);

2343

}

2344

2345

if (InstMaskEnd == 63 - RLAmt) {

2346

SDValue Ops[] =

2347

{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2348

getI32Imm(InstMaskStart, dl) };

2349

return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);

2350

}

2351

2352

// We cannot do this with a single instruction, so we'll use two. The

2353

// problem is that we're not free to choose both a rotation amount and mask

2354

// start and end independently. We can choose an arbitrary mask start and

2355

// end, but then the rotation amount is fixed. Rotation, however, can be

2356

// inverted, and so by applying an "inverse" rotation first, we can get the

2357

// desired result.

2358

if (InstCnt) *InstCnt += 1;

2359

2360

// The rotation mask for the second instruction must be MaskStart.

2361

unsigned RLAmt2 = MaskStart;

2362

// The first instruction must rotate V so that the overall rotation amount

2363

// is RLAmt.

2364

unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;

2365

if (RLAmt1)

2366

V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);

2367

return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);

2368

}

2369

2370

void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {

2371

if (BPermRewriterNoMasking)

2372

return;

2373

2374

// The idea here is the same as in the 32-bit version, but with additional

2375

// complications from the fact that Repl32 might be true. Because we

2376

// aggressively convert bit groups to Repl32 form (which, for small

2377

// rotation factors, involves no other change), and then coalesce, it might

2378

// be the case that a single 64-bit masking operation could handle both

2379

// some Repl32 groups and some non-Repl32 groups. If converting to Repl32

2380

// form allowed coalescing, then we must use a 32-bit rotaton in order to

2381

// completely capture the new combined bit group.

2382

2383

for (ValueRotInfo &VRI : ValueRotsVec) {

2384

uint64_t Mask = 0;

2385

2386

// We need to add to the mask all bits from the associated bit groups.

2387

// If Repl32 is false, we need to add bits from bit groups that have

2388

// Repl32 true, but are trivially convertable to Repl32 false. Such a

2389

// group is trivially convertable if it overlaps only with the lower 32

2390

// bits, and the group has not been coalesced.

2391

auto MatchingBG = [VRI](const BitGroup &BG) {

2392

if (VRI.V != BG.V)

2393

return false;

2394

2395

unsigned EffRLAmt = BG.RLAmt;

2396

if (!VRI.Repl32 && BG.Repl32) {

2397

if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&

2398

!BG.Repl32Coalesced) {

2399

if (BG.Repl32CR)

2400

EffRLAmt += 32;

2401

} else {

2402

return false;

2403

}

2404

} else if (VRI.Repl32 != BG.Repl32) {

2405

return false;

2406

}

2407

2408

return VRI.RLAmt == EffRLAmt;

2409

};

2410

2411

for (auto &BG : BitGroups) {

2412

if (!MatchingBG(BG))

2413

continue;

2414

2415

if (BG.StartIdx <= BG.EndIdx) {

2416

for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)

2417

Mask |= (UINT64_C(1)1UL << i);

2418

} else {

2419

for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)

2420

Mask |= (UINT64_C(1)1UL << i);

2421

for (unsigned i = 0; i <= BG.EndIdx; ++i)

2422

Mask |= (UINT64_C(1)1UL << i);

2423

}

2424

}

2425

2426

// We can use the 32-bit andi/andis technique if the mask does not

2427

// require any higher-order bits. This can save an instruction compared

2428

// to always using the general 64-bit technique.

2429

bool Use32BitInsts = isUInt<32>(Mask);

2430

// Compute the masks for andi/andis that would be necessary.

2431

unsigned ANDIMask = (Mask & UINT16_MAX(65535)),

2432

ANDISMask = (Mask >> 16) & UINT16_MAX(65535);

2433

2434

bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));

2435

2436

unsigned NumAndInsts = (unsigned) NeedsRotate +

2437

(unsigned) (bool) Res;

2438

unsigned NumOfSelectInsts = 0;

2439

selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);

2440

assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.")(static_cast <bool> (NumOfSelectInsts > 0 &&
"Failed to select an i64 constant.") ? void (0) : __assert_fail
("NumOfSelectInsts > 0 && \"Failed to select an i64 constant.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2440, __extension__
__PRETTY_FUNCTION__));

2441

if (Use32BitInsts)

2442

NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +

2443

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2444

else

2445

NumAndInsts += NumOfSelectInsts + /* and */ 1;

2446

2447

unsigned NumRLInsts = 0;

2448

bool FirstBG = true;

2449

bool MoreBG = false;

2450

for (auto &BG : BitGroups) {

2451

if (!MatchingBG(BG)) {

2452

MoreBG = true;

2453

continue;

2454

}

2455

NumRLInsts +=

2456

SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,

2457

!FirstBG);

2458

FirstBG = false;

2459

}

2460

2461

LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
(VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2462

<< " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
(VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2463

<< "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
(VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2464

<< " using rotates: " << NumRLInsts << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\trotation groups for " <<
VRI.V.getNode() << " RL: " << VRI.RLAmt <<
(VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false);

2465

2466

// When we'd use andi/andis, we bias toward using the rotates (andi only

2467

// has a record form, and is cracked on POWER cores). However, when using

2468

// general 64-bit constant formation, bias toward the constant form,

2469

// because that exposes more opportunities for CSE.

2470

if (NumAndInsts > NumRLInsts)

2471

continue;

2472

// When merging multiple bit groups, instruction or is used.

2473

// But when rotate is used, rldimi can inert the rotated value into any

2474

// register, so instruction or can be avoided.

2475

if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)

2476

continue;

2477

2478

LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\t\t\tusing masking\n"; } }
while (false);

2479

2480

if (InstCnt) *InstCnt += NumAndInsts;

2481

2482

SDValue VRot;

2483

// We actually need to generate a rotation if we have a non-zero rotation

2484

// factor or, in the Repl32 case, if we care about any of the

2485

// higher-order replicated bits. In the latter case, we generate a mask

2486

// backward so that it actually includes the entire 64 bits.

2487

if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))

2488

VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,

2489

VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);

2490

else

2491

VRot = VRI.V;

2492

2493

SDValue TotalVal;

2494

if (Use32BitInsts) {

2495

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2496, __extension__
__PRETTY_FUNCTION__))

2496

"No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2496, __extension__
__PRETTY_FUNCTION__));

2497

2498

SDValue ANDIVal, ANDISVal;

2499

if (ANDIMask != 0)

2500

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,

2501

ExtendToInt64(VRot, dl),

2502

getI32Imm(ANDIMask, dl)),

2503

0);

2504

if (ANDISMask != 0)

2505

ANDISVal =

2506

SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,

2507

ExtendToInt64(VRot, dl),

2508

getI32Imm(ANDISMask, dl)),

2509

0);

2510

2511

if (!ANDIVal)

2512

TotalVal = ANDISVal;

2513

else if (!ANDISVal)

2514

TotalVal = ANDIVal;

2515

else

2516

TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2517

ExtendToInt64(ANDIVal, dl), ANDISVal), 0);

2518

} else {

2519

TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);

2520

TotalVal =

2521

SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,

2522

ExtendToInt64(VRot, dl), TotalVal),

2523

0);

2524

}

2525

2526

if (!Res)

2527

Res = TotalVal;

2528

else

2529

Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2530

ExtendToInt64(Res, dl), TotalVal),

2531

0);

2532

2533

// Now, remove all groups with this underlying value and rotation

2534

// factor.

2535

eraseMatchingBitGroups(MatchingBG);

2536

}

2537

}

2538

2539

// Instruction selection for the 64-bit case.

2540

SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {

2541

SDLoc dl(N);

2542

SDValue Res;

2543

2544

if (InstCnt) *InstCnt = 0;

2545

2546

// Take care of cases that should use andi/andis first.

2547

SelectAndParts64(dl, Res, InstCnt);

2548

2549

// If we've not yet selected a 'starting' instruction, and we have no zeros

2550

// to fill in, select the (Value, RLAmt) with the highest priority (largest

2551

// number of groups), and start with this rotated value.

2552

if ((!NeedMask || LateMask) && !Res) {

2553

// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32

2554

// groups will come first, and so the VRI representing the largest number

2555

// of groups might not be first (it might be the first Repl32 groups).

2556

unsigned MaxGroupsIdx = 0;

2557

if (!ValueRotsVec[0].Repl32) {

2558

for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)

2559

if (ValueRotsVec[i].Repl32) {

2560

if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)

2561

MaxGroupsIdx = i;

2562

break;

2563

}

2564

}

2565

2566

ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];

2567

bool NeedsRotate = false;

2568

if (VRI.RLAmt) {

2569

NeedsRotate = true;

2570

} else if (VRI.Repl32) {

2571

for (auto &BG : BitGroups) {

2572

if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||

2573

BG.Repl32 != VRI.Repl32)

2574

continue;

2575

2576

// We don't need a rotate if the bit group is confined to the lower

2577

// 32 bits.

2578

if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)

2579

continue;

2580

2581

NeedsRotate = true;

2582

break;

2583

}

2584

}

2585

2586

if (NeedsRotate)

2587

Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,

2588

VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,

2589

InstCnt);

2590

else

2591

Res = VRI.V;

2592

2593

// Now, remove all groups with this underlying value and rotation factor.

2594

if (Res)

2595

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2596

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&

2597

BG.Repl32 == VRI.Repl32;

2598

});

2599

}

2600

2601

// Because 64-bit rotates are more flexible than inserts, we might have a

2602

// preference regarding which one we do first (to save one instruction).

2603

if (!Res)

2604

for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {

2605

if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,

2606

false) <

2607

SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,

2608

true)) {

2609

if (I != BitGroups.begin()) {

2610

BitGroup BG = *I;

2611

BitGroups.erase(I);

2612

BitGroups.insert(BitGroups.begin(), BG);

2613

}

2614

2615

break;

2616

}

2617

}

2618

2619

// Insert the other groups (one at a time).

2620

for (auto &BG : BitGroups) {

2621

if (!Res)

2622

Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,

2623

BG.EndIdx, InstCnt);

2624

else

2625

Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,

2626

BG.StartIdx, BG.EndIdx, InstCnt);

2627

}

2628

2629

if (LateMask) {

2630

uint64_t Mask = getZerosMask();

2631

2632

// We can use the 32-bit andi/andis technique if the mask does not

2633

// require any higher-order bits. This can save an instruction compared

2634

// to always using the general 64-bit technique.

2635

bool Use32BitInsts = isUInt<32>(Mask);

2636

// Compute the masks for andi/andis that would be necessary.

2637

unsigned ANDIMask = (Mask & UINT16_MAX(65535)),

2638

ANDISMask = (Mask >> 16) & UINT16_MAX(65535);

2639

2640

if (Use32BitInsts) {

2641

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2642, __extension__
__PRETTY_FUNCTION__))

2642

"No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2642, __extension__
__PRETTY_FUNCTION__));

2643

2644

if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +

2645

(unsigned) (ANDISMask != 0) +

2646

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2647

2648

SDValue ANDIVal, ANDISVal;

2649

if (ANDIMask != 0)

2650

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,

2651

ExtendToInt64(Res, dl),

2652

getI32Imm(ANDIMask, dl)),

2653

0);

2654

if (ANDISMask != 0)

2655

ANDISVal =

2656

SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,

2657

ExtendToInt64(Res, dl),

2658

getI32Imm(ANDISMask, dl)),

2659

0);

2660

2661

if (!ANDIVal)

2662

Res = ANDISVal;

2663

else if (!ANDISVal)

2664

Res = ANDIVal;

2665

else

2666

Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2667

ExtendToInt64(ANDIVal, dl), ANDISVal), 0);

2668

} else {

2669

unsigned NumOfSelectInsts = 0;

2670

SDValue MaskVal =

2671

SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);

2672

Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,

2673

ExtendToInt64(Res, dl), MaskVal),

2674

0);

2675

if (InstCnt)

2676

*InstCnt += NumOfSelectInsts + /* and */ 1;

2677

}

2678

}

2679

2680

return Res.getNode();

2681

}

2682

2683

SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {

2684

// Fill in BitGroups.

2685

collectBitGroups(LateMask);

2686

if (BitGroups.empty())

2687

return nullptr;

2688

2689

// For 64-bit values, figure out when we can use 32-bit instructions.

2690

if (Bits.size() == 64)

2691

assignRepl32BitGroups();

2692

2693

// Fill in ValueRotsVec.

2694

collectValueRotInfo();

2695

2696

if (Bits.size() == 32) {

2697

return Select32(N, LateMask, InstCnt);

2698

} else {

2699

assert(Bits.size() == 64 && "Not 64 bits here?")(static_cast <bool> (Bits.size() == 64 && "Not 64 bits here?"
) ? void (0) : __assert_fail ("Bits.size() == 64 && \"Not 64 bits here?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2699, __extension__
__PRETTY_FUNCTION__));

2700

return Select64(N, LateMask, InstCnt);

2701

}

2702

2703

return nullptr;

2704

}

2705

2706

void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {

2707

erase_if(BitGroups, F);

2708

}

2709

2710

SmallVector<ValueBit, 64> Bits;

2711

2712

bool NeedMask = false;

2713

SmallVector<unsigned, 64> RLAmt;

2714

2715

SmallVector<BitGroup, 16> BitGroups;

2716

2717

DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;

2718

SmallVector<ValueRotInfo, 16> ValueRotsVec;

2719

2720

SelectionDAG *CurDAG = nullptr;

2721

2722

public:

2723

BitPermutationSelector(SelectionDAG *DAG)

2724

: CurDAG(DAG) {}

2725

2726

// Here we try to match complex bit permutations into a set of

2727

// rotate-and-shift/shift/and/or instructions, using a set of heuristics

2728

// known to produce optimal code for common cases (like i32 byte swapping).

2729

SDNode *Select(SDNode *N) {

2730

Memoizer.clear();

2731

auto Result =

2732

getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());

2733

if (!Result.first)

2734

return nullptr;

2735

Bits = std::move(*Result.second);

2736

2737

LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false)

2738

" selection for: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false);

2739

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { N->dump(CurDAG); } } while (false);

2740

2741

// Fill it RLAmt and set NeedMask.

2742

computeRotationAmounts();

2743

2744

if (!NeedMask)

2745

return Select(N, false);

2746

2747

// We currently have two techniques for handling results with zeros: early

2748

// masking (the default) and late masking. Late masking is sometimes more

2749

// efficient, but because the structure of the bit groups is different, it

2750

// is hard to tell without generating both and comparing the results. With

2751

// late masking, we ignore zeros in the resulting value when inserting each

2752

// set of bit groups, and then mask in the zeros at the end. With early

2753

// masking, we only insert the non-zero parts of the result at every step.

2754

2755

unsigned InstCnt = 0, InstCntLateMask = 0;

2756

LLVM_DEBUG(dbgs() << "\tEarly masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tEarly masking:\n"; } } while
(false);

2757

SDNode *RN = Select(N, false, &InstCnt);

2758

LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\tisel would use " <<
InstCnt << " instructions\n"; } } while (false);

2759

2760

LLVM_DEBUG(dbgs() << "\tLate masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tLate masking:\n"; } } while
(false);

2761

SDNode *RNLM = Select(N, true, &InstCntLateMask);

2762

LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMaskdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
)

2763

<< " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
);

2764

2765

if (InstCnt <= InstCntLateMask) {

2766

LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tUsing early-masking for isel\n"
; } } while (false);

2767

return RN;

2768

}

2769

2770

LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\tUsing late-masking for isel\n"
; } } while (false);

2771

return RNLM;

2772

}

2773

};

2774

2775

class IntegerCompareEliminator {

2776

SelectionDAG *CurDAG;

2777

PPCDAGToDAGISel *S;

2778

// Conversion type for interpreting results of a 32-bit instruction as

2779

// a 64-bit value or vice versa.

2780

enum ExtOrTruncConversion { Ext, Trunc };

2781

2782

// Modifiers to guide how an ISD::SETCC node's result is to be computed

2783

// in a GPR.

2784

// ZExtOrig - use the original condition code, zero-extend value

2785

// ZExtInvert - invert the condition code, zero-extend value

2786

// SExtOrig - use the original condition code, sign-extend value

2787

// SExtInvert - invert the condition code, sign-extend value

2788

enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };

2789

2790

// Comparisons against zero to emit GPR code sequences for. Each of these

2791

// sequences may need to be emitted for two or more equivalent patterns.

2792

// For example (a >= 0) == (a > -1). The direction of the comparison (</>)

2793

// matters as well as the extension type: sext (-1/0), zext (1/0).

2794

// GEZExt - (zext (LHS >= 0))

2795

// GESExt - (sext (LHS >= 0))

2796

// LEZExt - (zext (LHS <= 0))

2797

// LESExt - (sext (LHS <= 0))

2798

enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };

2799

2800

SDNode *tryEXTEND(SDNode *N);

2801

SDNode *tryLogicOpOfCompares(SDNode *N);

2802

SDValue computeLogicOpInGPR(SDValue LogicOp);

2803

SDValue signExtendInputIfNeeded(SDValue Input);

2804

SDValue zeroExtendInputIfNeeded(SDValue Input);

2805

SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);

2806

SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,

2807

ZeroCompare CmpTy);

2808

SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2809

int64_t RHSValue, SDLoc dl);

2810

SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2811

int64_t RHSValue, SDLoc dl);

2812

SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2813

int64_t RHSValue, SDLoc dl);

2814

SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2815

int64_t RHSValue, SDLoc dl);

2816

SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);

2817

2818

public:

2819

IntegerCompareEliminator(SelectionDAG *DAG,

2820

PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {

2821

assert(CurDAG->getTargetLoweringInfo()(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2823, __extension__
__PRETTY_FUNCTION__))

2822

.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2823, __extension__
__PRETTY_FUNCTION__))

2823

"Only expecting to use this on 64 bit targets.")(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2823, __extension__
__PRETTY_FUNCTION__));

2824

}

2825

SDNode *Select(SDNode *N) {

2826

if (CmpInGPR == ICGPR_None)

2827

return nullptr;

2828

switch (N->getOpcode()) {

2829

default: break;

2830

case ISD::ZERO_EXTEND:

2831

if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||

2832

CmpInGPR == ICGPR_SextI64)

2833

return nullptr;

2834

[[fallthrough]];

2835

case ISD::SIGN_EXTEND:

2836

if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||

2837

CmpInGPR == ICGPR_ZextI64)

2838

return nullptr;

2839

return tryEXTEND(N);

2840

case ISD::AND:

2841

case ISD::OR:

2842

case ISD::XOR:

2843

return tryLogicOpOfCompares(N);

2844

}

2845

return nullptr;

2846

}

2847

};

2848

2849

// The obvious case for wanting to keep the value in a GPR. Namely, the

2850

// result of the comparison is actually needed in a GPR.

2851

SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {

2852

assert((N->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2854, __extension__
__PRETTY_FUNCTION__))

2853

N->getOpcode() == ISD::SIGN_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2854, __extension__
__PRETTY_FUNCTION__))

2854

"Expecting a zero/sign extend node!")(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2854, __extension__
__PRETTY_FUNCTION__));

2855

SDValue WideRes;

2856

// If we are zero-extending the result of a logical operation on i1

2857

// values, we can keep the values in GPRs.

2858

if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&

2859

N->getOperand(0).getValueType() == MVT::i1 &&

2860

N->getOpcode() == ISD::ZERO_EXTEND)

2861

WideRes = computeLogicOpInGPR(N->getOperand(0));

2862

else if (N->getOperand(0).getOpcode() != ISD::SETCC)

2863

return nullptr;

2864

else

2865

WideRes =

2866

getSETCCInGPR(N->getOperand(0),

2867

N->getOpcode() == ISD::SIGN_EXTEND ?

2868

SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);

2869

2870

if (!WideRes)

2871

return nullptr;

2872

2873

SDLoc dl(N);

2874

bool Input32Bit = WideRes.getValueType() == MVT::i32;

2875

bool Output32Bit = N->getValueType(0) == MVT::i32;

2876

2877

NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;

2878

NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;

2879

2880

SDValue ConvOp = WideRes;

2881

if (Input32Bit != Output32Bit)

2882

ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :

2883

ExtOrTruncConversion::Trunc);

2884

return ConvOp.getNode();

2885

}

2886

2887

// Attempt to perform logical operations on the results of comparisons while

2888

// keeping the values in GPRs. Without doing so, these would end up being

2889

// lowered to CR-logical operations which suffer from significant latency and

2890

// low ILP.

2891

SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {

2892

if (N->getValueType(0) != MVT::i1)

2893

return nullptr;

2894

assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&(static_cast <bool> (ISD::isBitwiseLogicOp(N->getOpcode
()) && "Expected a logic operation on setcc results."
) ? void (0) : __assert_fail ("ISD::isBitwiseLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2895, __extension__
__PRETTY_FUNCTION__))

2895

"Expected a logic operation on setcc results.")(static_cast <bool> (ISD::isBitwiseLogicOp(N->getOpcode
()) && "Expected a logic operation on setcc results."
) ? void (0) : __assert_fail ("ISD::isBitwiseLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2895, __extension__
__PRETTY_FUNCTION__));

2896

SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));

2897

if (!LoweredLogical)

2898

return nullptr;

2899

2900

SDLoc dl(N);

2901

bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;

2902

unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;

2903

SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);

2904

SDValue LHS = LoweredLogical.getOperand(0);

2905

SDValue RHS = LoweredLogical.getOperand(1);

2906

SDValue WideOp;

2907

SDValue OpToConvToRecForm;

2908

2909

// Look through any 32-bit to 64-bit implicit extend nodes to find the

2910

// opcode that is input to the XORI.

2911

if (IsBitwiseNegate &&

2912

LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)

2913

OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);

2914

else if (IsBitwiseNegate)

2915

// If the input to the XORI isn't an extension, that's what we're after.

2916

OpToConvToRecForm = LoweredLogical.getOperand(0);

2917

else

2918

// If this is not an XORI, it is a reg-reg logical op and we can convert

2919

// it to record-form.

2920

OpToConvToRecForm = LoweredLogical;

2921

2922

// Get the record-form version of the node we're looking to use to get the

2923

// CR result from.

2924

uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();

2925

int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);

2926

2927

// Convert the right node to record-form. This is either the logical we're

2928

// looking at or it is the input node to the negation (if we're looking at

2929

// a bitwise negation).

2930

if (NewOpc != -1 && IsBitwiseNegate) {

2931

// The input to the XORI has a record-form. Use it.

2932

assert(LoweredLogical.getConstantOperandVal(1) == 1 &&(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2933, __extension__
__PRETTY_FUNCTION__))

2933

"Expected a PPC::XORI8 only for bitwise negation.")(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2933, __extension__
__PRETTY_FUNCTION__));

2934

// Emit the record-form instruction.

2935

std::vector<SDValue> Ops;

2936

for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)

2937

Ops.push_back(OpToConvToRecForm.getOperand(i));

2938

2939

WideOp =

2940

SDValue(CurDAG->getMachineNode(NewOpc, dl,

2941

OpToConvToRecForm.getValueType(),

2942

MVT::Glue, Ops), 0);

2943

} else {

2944

assert((NewOpc != -1 || !IsBitwiseNegate) &&(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2945, __extension__
__PRETTY_FUNCTION__))

2945

"No record form available for AND8/OR8/XOR8?")(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2945, __extension__
__PRETTY_FUNCTION__));

2946

WideOp =

2947

SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,

2948

dl, MVT::i64, MVT::Glue, LHS, RHS),

2949

0);

2950

}

2951

2952

// Select this node to a single bit from CR0 set by the record-form node

2953

// just created. For bitwise negation, use the EQ bit which is the equivalent

2954

// of negating the result (i.e. it is a bit set when the result of the

2955

// operation is zero).

2956

SDValue SRIdxVal =

2957

CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);

2958

SDValue CRBit =

2959

SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,

2960

MVT::i1, CR0Reg, SRIdxVal,

2961

WideOp.getValue(1)), 0);

2962

return CRBit.getNode();

2963

}

2964

2965

// Lower a logical operation on i1 values into a GPR sequence if possible.

2966

// The result can be kept in a GPR if requested.

2967

// Three types of inputs can be handled:

2968

// - SETCC

2969

// - TRUNCATE

2970

// - Logical operation (AND/OR/XOR)

2971

// There is also a special case that is handled (namely a complement operation

2972

// achieved with xor %a, -1).

2973

SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {

2974

assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) &&(static_cast <bool> (ISD::isBitwiseLogicOp(LogicOp.getOpcode
()) && "Can only handle logic operations here.") ? void
(0) : __assert_fail ("ISD::isBitwiseLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2975, __extension__
__PRETTY_FUNCTION__))

2975

"Can only handle logic operations here.")(static_cast <bool> (ISD::isBitwiseLogicOp(LogicOp.getOpcode
()) && "Can only handle logic operations here.") ? void
(0) : __assert_fail ("ISD::isBitwiseLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2975, __extension__
__PRETTY_FUNCTION__));

2976

assert(LogicOp.getValueType() == MVT::i1 &&(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2977, __extension__
__PRETTY_FUNCTION__))

2977

"Can only handle logic operations on i1 values here.")(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2977, __extension__
__PRETTY_FUNCTION__));

2978

SDLoc dl(LogicOp);

2979

SDValue LHS, RHS;

2980

2981

// Special case: xor %a, -1

2982

bool IsBitwiseNegation = isBitwiseNot(LogicOp);

2983

2984

// Produces a GPR sequence for each operand of the binary logic operation.

2985

// For SETCC, it produces the respective comparison, for TRUNCATE it truncates

2986

// the value in a GPR and for logic operations, it will recursively produce

2987

// a GPR sequence for the operation.

2988

auto getLogicOperand = [&] (SDValue Operand) -> SDValue {

2989

unsigned OperandOpcode = Operand.getOpcode();

2990

if (OperandOpcode == ISD::SETCC)

2991

return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);

2992

else if (OperandOpcode == ISD::TRUNCATE) {

2993

SDValue InputOp = Operand.getOperand(0);

2994

EVT InVT = InputOp.getValueType();

2995

return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :

2996

PPC::RLDICL, dl, InVT, InputOp,

2997

S->getI64Imm(0, dl),

2998

S->getI64Imm(63, dl)), 0);

2999

} else if (ISD::isBitwiseLogicOp(OperandOpcode))

3000

return computeLogicOpInGPR(Operand);

3001

return SDValue();

3002

};

3003

LHS = getLogicOperand(LogicOp.getOperand(0));

3004

RHS = getLogicOperand(LogicOp.getOperand(1));

3005

3006

// If a GPR sequence can't be produced for the LHS we can't proceed.

3007

// Not producing a GPR sequence for the RHS is only a problem if this isn't

3008

// a bitwise negation operation.

3009

if (!LHS || (!RHS && !IsBitwiseNegation))

3010

return SDValue();

3011

3012

NumLogicOpsOnComparison++;

3013

3014

// We will use the inputs as 64-bit values.

3015

if (LHS.getValueType() == MVT::i32)

3016

LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);

3017

if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)

3018

RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);

3019

3020

unsigned NewOpc;

3021

switch (LogicOp.getOpcode()) {

3022

default: llvm_unreachable("Unknown logic operation.")::llvm::llvm_unreachable_internal("Unknown logic operation.",
"llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3022);

3023

case ISD::AND: NewOpc = PPC::AND8; break;

3024

case ISD::OR: NewOpc = PPC::OR8; break;

3025

case ISD::XOR: NewOpc = PPC::XOR8; break;

3026

}

3027

3028

if (IsBitwiseNegation) {

3029

RHS = S->getI64Imm(1, dl);

3030

NewOpc = PPC::XORI8;

3031

}

3032

3033

return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);

3034

3035

}

3036

3037

/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.

3038

/// Otherwise just reinterpret it as a 64-bit value.

3039

/// Useful when emitting comparison code for 32-bit values without using

3040

/// the compare instruction (which only considers the lower 32-bits).

3041

SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {

3042

assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3043, __extension__
__PRETTY_FUNCTION__))

3043

"Can only sign-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3043, __extension__
__PRETTY_FUNCTION__));

3044

unsigned Opc = Input.getOpcode();

3045

3046

// The value was sign extended and then truncated to 32-bits. No need to

3047

// sign extend it again.

3048

if (Opc == ISD::TRUNCATE &&

3049

(Input.getOperand(0).getOpcode() == ISD::AssertSext ||

3050

Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))

3051

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3052

3053

LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);

3054

// The input is a sign-extending load. All ppc sign-extending loads

3055

// sign-extend to the full 64-bits.

3056

if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)

3057

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3058

3059

ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);

3060

// We don't sign-extend constants.

3061

if (InputConst)

3062

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3063

3064

SDLoc dl(Input);

3065

SignExtensionsAdded++;

3066

return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,

3067

MVT::i64, Input), 0);

3068

}

3069

3070

/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.

3071

/// Otherwise just reinterpret it as a 64-bit value.

3072

/// Useful when emitting comparison code for 32-bit values without using

3073

/// the compare instruction (which only considers the lower 32-bits).

3074

SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {

3075

assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3076, __extension__
__PRETTY_FUNCTION__))

3076

"Can only zero-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3076, __extension__
__PRETTY_FUNCTION__));

3077

unsigned Opc = Input.getOpcode();

3078

3079

// The only condition under which we can omit the actual extend instruction:

3080

// - The value is a positive constant

3081

// - The value comes from a load that isn't a sign-extending load

3082

// An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.

3083

bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&

3084

(Input.getOperand(0).getOpcode() == ISD::AssertZext ||

3085

Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);

3086

if (IsTruncateOfZExt)

3087

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3088

3089

ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);

3090

if (InputConst && InputConst->getSExtValue() >= 0)

3091

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3092

3093

LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);

3094

// The input is a load that doesn't sign-extend (it will be zero-extended).

3095

if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)

3096

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3097

3098

// None of the above, need to zero-extend.

3099

SDLoc dl(Input);

3100

ZeroExtensionsAdded++;

3101

return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,

3102

S->getI64Imm(0, dl),

3103

S->getI64Imm(32, dl)), 0);

3104

}

3105

3106

// Handle a 32-bit value in a 64-bit register and vice-versa. These are of

3107

// course not actual zero/sign extensions that will generate machine code,

3108

// they're just a way to reinterpret a 32 bit value in a register as a

3109

// 64 bit value and vice-versa.

3110

SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,

3111

ExtOrTruncConversion Conv) {

3112

SDLoc dl(NatWidthRes);

3113

3114

// For reinterpreting 32-bit values as 64 bit values, we generate

3115

// INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>

3116

if (Conv == ExtOrTruncConversion::Ext) {

3117

SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);

3118

SDValue SubRegIdx =

3119

CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

3120

return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,

3121

ImDef, NatWidthRes, SubRegIdx), 0);

3122

}

3123

3124

assert(Conv == ExtOrTruncConversion::Trunc &&(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3125, __extension__
__PRETTY_FUNCTION__))

3125

"Unknown convertion between 32 and 64 bit values.")(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3125, __extension__
__PRETTY_FUNCTION__));

3126

// For reinterpreting 64-bit values as 32-bit values, we just need to

3127

// EXTRACT_SUBREG (i.e. extract the low word).

3128

SDValue SubRegIdx =

3129

CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

3130

return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,

3131

NatWidthRes, SubRegIdx), 0);

3132

}

3133

3134

// Produce a GPR sequence for compound comparisons (<=, >=) against zero.

3135

// Handle both zero-extensions and sign-extensions.

3136

SDValue

3137

IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,

3138

ZeroCompare CmpTy) {

3139

EVT InVT = LHS.getValueType();

3140

bool Is32Bit = InVT == MVT::i32;

3141

SDValue ToExtend;

3142

3143

// Produce the value that needs to be either zero or sign extended.

3144

switch (CmpTy) {

3145

case ZeroCompare::GEZExt:

3146

case ZeroCompare::GESExt:

3147

ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,

3148

dl, InVT, LHS, LHS), 0);

3149

break;

3150

case ZeroCompare::LEZExt:

3151

case ZeroCompare::LESExt: {

3152

if (Is32Bit) {

3153

// Upper 32 bits cannot be undefined for this sequence.

3154

LHS = signExtendInputIfNeeded(LHS);

3155

SDValue Neg =

3156

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3157

ToExtend =

3158

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3159

Neg, S->getI64Imm(1, dl),

3160

S->getI64Imm(63, dl)), 0);

3161

} else {

3162

SDValue Addi =

3163

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3164

S->getI64Imm(~0ULL, dl)), 0);

3165

ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

3166

Addi, LHS), 0);

3167

}

3168

break;

3169

}

3170

}

3171

3172

// For 64-bit sequences, the extensions are the same for the GE/LE cases.

3173

if (!Is32Bit &&

3174

(CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))

3175

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3176

ToExtend, S->getI64Imm(1, dl),

3177

S->getI64Imm(63, dl)), 0);

3178

if (!Is32Bit &&

3179

(CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))

3180

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,

3181

S->getI64Imm(63, dl)), 0);

3182

3183

assert(Is32Bit && "Should have handled the 32-bit sequences above.")(static_cast <bool> (Is32Bit && "Should have handled the 32-bit sequences above."
) ? void (0) : __assert_fail ("Is32Bit && \"Should have handled the 32-bit sequences above.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3183, __extension__
__PRETTY_FUNCTION__));

3184

// For 32-bit sequences, the extensions differ between GE/LE cases.

3185

switch (CmpTy) {

3186

case ZeroCompare::GEZExt: {

3187

SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),

3188

S->getI32Imm(31, dl) };

3189

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3190

ShiftOps), 0);

3191

}

3192

case ZeroCompare::GESExt:

3193

return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,

3194

S->getI32Imm(31, dl)), 0);

3195

case ZeroCompare::LEZExt:

3196

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,

3197

S->getI32Imm(1, dl)), 0);

3198

case ZeroCompare::LESExt:

3199

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,

3200

S->getI32Imm(-1, dl)), 0);

3201

}

3202

3203

// The above case covers all the enumerators so it can't have a default clause

3204

// to avoid compiler warnings.

3205

llvm_unreachable("Unknown zero-comparison type.")::llvm::llvm_unreachable_internal("Unknown zero-comparison type."
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3205);

3206

}

3207

3208

/// Produces a zero-extended result of comparing two 32-bit values according to

3209

/// the passed condition code.

3210

SDValue

3211

IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,

3212

ISD::CondCode CC,

3213

int64_t RHSValue, SDLoc dl) {

3214

if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||

3215

CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)

3216

return SDValue();

3217

bool IsRHSZero = RHSValue == 0;

3218

bool IsRHSOne = RHSValue == 1;

3219

bool IsRHSNegOne = RHSValue == -1LL;

3220

switch (CC) {

3221

default: return SDValue();

3222

case ISD::SETEQ: {

3223

// (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)

3224

// (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)

3225

SDValue Xor = IsRHSZero ? LHS :

3226

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3227

SDValue Clz =

3228

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3229

SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),

3230

S->getI32Imm(31, dl) };

3231

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3232

ShiftOps), 0);

3233

}

3234

case ISD::SETNE: {

3235

// (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)

3236

// (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)

3237

SDValue Xor = IsRHSZero ? LHS :

3238

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3239

SDValue Clz =

3240

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3241

SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),

3242

S->getI32Imm(31, dl) };

3243

SDValue Shift =

3244

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);

3245

return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,

3246

S->getI32Imm(1, dl)), 0);

3247

}

3248

case ISD::SETGE: {

3249

// (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)

3250

// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)

3251

if(IsRHSZero)

3252

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3253

3254

// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)

3255

// by swapping inputs and falling through.

3256

std::swap(LHS, RHS);

3257

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3258

IsRHSZero = RHSConst && RHSConst->isZero();

3259

[[fallthrough]];

3260

}

3261

case ISD::SETLE: {

3262

if (CmpInGPR == ICGPR_NonExtIn)

3263

return SDValue();

3264

// (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)

3265

// (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)

3266

if(IsRHSZero) {

3267

if (CmpInGPR == ICGPR_NonExtIn)

3268

return SDValue();

3269

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3270

}

3271

3272

// The upper 32-bits of the register can't be undefined for this sequence.

3273

LHS = signExtendInputIfNeeded(LHS);

3274

RHS = signExtendInputIfNeeded(RHS);

3275

SDValue Sub =

3276

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3277

SDValue Shift =

3278

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,

3279

S->getI64Imm(1, dl), S->getI64Imm(63, dl)),

3280

0);

3281

return

3282

SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,

3283

MVT::i64, Shift, S->getI32Imm(1, dl)), 0);

3284

}

3285

case ISD::SETGT: {

3286

// (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)

3287

// (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)

3288

// (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)

3289

// Handle SETLT -1 (which is equivalent to SETGE 0).

3290

if (IsRHSNegOne)

3291

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3292

3293

if (IsRHSZero) {

3294

if (CmpInGPR == ICGPR_NonExtIn)

3295

return SDValue();

3296

// The upper 32-bits of the register can't be undefined for this sequence.

3297

LHS = signExtendInputIfNeeded(LHS);

3298

RHS = signExtendInputIfNeeded(RHS);

3299

SDValue Neg =

3300

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3301

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3302

Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);

3303

}

3304

// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as

3305

// (%b < %a) by swapping inputs and falling through.

3306

std::swap(LHS, RHS);

3307

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3308

IsRHSZero = RHSConst && RHSConst->isZero();

3309

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3310

[[fallthrough]];

3311

}

3312

case ISD::SETLT: {

3313

// (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)

3314

// (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)

3315

// (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)

3316

// Handle SETLT 1 (which is equivalent to SETLE 0).

3317

if (IsRHSOne) {

3318

if (CmpInGPR == ICGPR_NonExtIn)

3319

return SDValue();

3320

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3321

}

3322

3323

if (IsRHSZero) {

3324

SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),

3325

S->getI32Imm(31, dl) };

3326

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3327

ShiftOps), 0);

3328

}

3329

3330

if (CmpInGPR == ICGPR_NonExtIn)

3331

return SDValue();

3332

// The upper 32-bits of the register can't be undefined for this sequence.

3333

LHS = signExtendInputIfNeeded(LHS);

3334

RHS = signExtendInputIfNeeded(RHS);

3335

SDValue SUBFNode =

3336

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3337

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3338

SUBFNode, S->getI64Imm(1, dl),

3339

S->getI64Imm(63, dl)), 0);

3340

}

3341

case ISD::SETUGE:

3342

// (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)

3343

// (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)

3344

std::swap(LHS, RHS);

3345

[[fallthrough]];

3346

case ISD::SETULE: {

3347

if (CmpInGPR == ICGPR_NonExtIn)

3348

return SDValue();

3349

// The upper 32-bits of the register can't be undefined for this sequence.

3350

LHS = zeroExtendInputIfNeeded(LHS);

3351

RHS = zeroExtendInputIfNeeded(RHS);

3352

SDValue Subtract =

3353

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3354

SDValue SrdiNode =

3355

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3356

Subtract, S->getI64Imm(1, dl),

3357

S->getI64Imm(63, dl)), 0);

3358

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,

3359

S->getI32Imm(1, dl)), 0);

3360

}

3361

case ISD::SETUGT:

3362

// (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)

3363

// (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)

3364

std::swap(LHS, RHS);

3365

[[fallthrough]];

3366

case ISD::SETULT: {

3367

if (CmpInGPR == ICGPR_NonExtIn)

3368

return SDValue();

3369

// The upper 32-bits of the register can't be undefined for this sequence.

3370

LHS = zeroExtendInputIfNeeded(LHS);

3371

RHS = zeroExtendInputIfNeeded(RHS);

3372

SDValue Subtract =

3373

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3374

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3375

Subtract, S->getI64Imm(1, dl),

3376

S->getI64Imm(63, dl)), 0);

3377

}

3378

}

3379

}

3380

3381

/// Produces a sign-extended result of comparing two 32-bit values according to

3382

/// the passed condition code.

3383

SDValue

3384

IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,

3385

ISD::CondCode CC,

3386

int64_t RHSValue, SDLoc dl) {

3387

if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||

3388

CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)

3389

return SDValue();

3390

bool IsRHSZero = RHSValue == 0;

3391

bool IsRHSOne = RHSValue == 1;

3392

bool IsRHSNegOne = RHSValue == -1LL;

3393

3394

switch (CC) {

3395

default: return SDValue();

3396

case ISD::SETEQ: {

3397

// (sext (setcc %a, %b, seteq)) ->

3398

// (ashr (shl (ctlz (xor %a, %b)), 58), 63)

3399

// (sext (setcc %a, 0, seteq)) ->

3400

// (ashr (shl (ctlz %a), 58), 63)

3401

SDValue CountInput = IsRHSZero ? LHS :

3402

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3403

SDValue Cntlzw =

3404

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);

3405

SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),

3406

S->getI32Imm(5, dl), S->getI32Imm(31, dl) };

3407

SDValue Slwi =

3408

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);

3409

return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);

3410

}

3411

case ISD::SETNE: {

3412

// Bitwise xor the operands, count leading zeros, shift right by 5 bits and

3413

// flip the bit, finally take 2's complement.

3414

// (sext (setcc %a, %b, setne)) ->

3415

// (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))

3416

// Same as above, but the first xor is not needed.

3417

// (sext (setcc %a, 0, setne)) ->

3418

// (neg (xor (lshr (ctlz %a), 5), 1))

3419

SDValue Xor = IsRHSZero ? LHS :

3420

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3421

SDValue Clz =

3422

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3423

SDValue ShiftOps[] =

3424

{ Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };

3425

SDValue Shift =

3426

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);

3427

SDValue Xori =

3428

SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,

3429

S->getI32Imm(1, dl)), 0);

3430

return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);

3431

}

3432

case ISD::SETGE: {

3433

// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)

3434

// (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)

3435

if (IsRHSZero)

3436

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3437

3438

// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)

3439

// by swapping inputs and falling through.

3440

std::swap(LHS, RHS);

3441

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3442

IsRHSZero = RHSConst && RHSConst->isZero();

3443

[[fallthrough]];

3444

}

3445

case ISD::SETLE: {

3446

if (CmpInGPR == ICGPR_NonExtIn)

3447

return SDValue();

3448

// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)

3449

// (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)

3450

if (IsRHSZero)

3451

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3452

3453

// The upper 32-bits of the register can't be undefined for this sequence.

3454

LHS = signExtendInputIfNeeded(LHS);

3455

RHS = signExtendInputIfNeeded(RHS);

3456

SDValue SUBFNode =

3457

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,

3458

LHS, RHS), 0);

3459

SDValue Srdi =

3460

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3461

SUBFNode, S->getI64Imm(1, dl),

3462

S->getI64Imm(63, dl)), 0);

3463

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,

3464

S->getI32Imm(-1, dl)), 0);

3465

}

3466

case ISD::SETGT: {

3467

// (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)

3468

// (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)

3469

// (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)

3470

if (IsRHSNegOne)

3471

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3472

if (IsRHSZero) {

3473

if (CmpInGPR == ICGPR_NonExtIn)

3474

return SDValue();

3475

// The upper 32-bits of the register can't be undefined for this sequence.

3476

LHS = signExtendInputIfNeeded(LHS);

3477

RHS = signExtendInputIfNeeded(RHS);

3478

SDValue Neg =

3479

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3480

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,

3481

S->getI64Imm(63, dl)), 0);

3482

}

3483

// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as

3484

// (%b < %a) by swapping inputs and falling through.

3485

std::swap(LHS, RHS);

3486

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3487

IsRHSZero = RHSConst && RHSConst->isZero();

3488

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3489

[[fallthrough]];

3490

}

3491

case ISD::SETLT: {

3492

// (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)

3493

// (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)

3494

// (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)

3495

if (IsRHSOne) {

3496

if (CmpInGPR == ICGPR_NonExtIn)

3497

return SDValue();

3498

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3499

}

3500

if (IsRHSZero)

3501

return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,

3502

S->getI32Imm(31, dl)), 0);

3503

3504

if (CmpInGPR == ICGPR_NonExtIn)

3505

return SDValue();

3506

// The upper 32-bits of the register can't be undefined for this sequence.

3507

LHS = signExtendInputIfNeeded(LHS);

3508

RHS = signExtendInputIfNeeded(RHS);

3509

SDValue SUBFNode =

3510

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3511

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3512

SUBFNode, S->getI64Imm(63, dl)), 0);

3513

}

3514

case ISD::SETUGE:

3515

// (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)

3516

// (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)

3517

std::swap(LHS, RHS);

3518

[[fallthrough]];

3519

case ISD::SETULE: {

3520

if (CmpInGPR == ICGPR_NonExtIn)

3521

return SDValue();

3522

// The upper 32-bits of the register can't be undefined for this sequence.

3523

LHS = zeroExtendInputIfNeeded(LHS);

3524

RHS = zeroExtendInputIfNeeded(RHS);

3525

SDValue Subtract =

3526

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3527

SDValue Shift =

3528

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,

3529

S->getI32Imm(1, dl), S->getI32Imm(63,dl)),

3530

0);

3531

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,

3532

S->getI32Imm(-1, dl)), 0);

3533

}

3534

case ISD::SETUGT:

3535

// (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)

3536

// (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)

3537

std::swap(LHS, RHS);

3538

[[fallthrough]];

3539

case ISD::SETULT: {

3540

if (CmpInGPR == ICGPR_NonExtIn)

3541

return SDValue();

3542

// The upper 32-bits of the register can't be undefined for this sequence.

3543

LHS = zeroExtendInputIfNeeded(LHS);

3544

RHS = zeroExtendInputIfNeeded(RHS);

3545

SDValue Subtract =

3546

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3547

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3548

Subtract, S->getI64Imm(63, dl)), 0);

3549

}

3550

}

3551

}

3552

3553

/// Produces a zero-extended result of comparing two 64-bit values according to

3554

/// the passed condition code.

3555

SDValue

3556

IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,

3557

ISD::CondCode CC,

3558

int64_t RHSValue, SDLoc dl) {

3559

if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||

3560

CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)

3561

return SDValue();

3562

bool IsRHSZero = RHSValue == 0;

3563

bool IsRHSOne = RHSValue == 1;

3564

bool IsRHSNegOne = RHSValue == -1LL;

3565

switch (CC) {

3566

default: return SDValue();

3567

case ISD::SETEQ: {

3568

// (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)

3569

// (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)

3570

SDValue Xor = IsRHSZero ? LHS :

3571

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3572

SDValue Clz =

3573

SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);

3574

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,

3575

S->getI64Imm(58, dl),

3576

S->getI64Imm(63, dl)), 0);

3577

}

3578

case ISD::SETNE: {

3579

// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)

3580

// (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)

3581

// {addcz.reg, addcz.CA} = (addcarry %a, -1)

3582

// (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)

3583

SDValue Xor = IsRHSZero ? LHS :

3584

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3585

SDValue AC =

3586

SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,

3587

Xor, S->getI32Imm(~0U, dl)), 0);

3588

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,

3589

Xor, AC.getValue(1)), 0);

3590

}

3591

case ISD::SETGE: {

3592

// {subc.reg, subc.CA} = (subcarry %a, %b)

3593

// (zext (setcc %a, %b, setge)) ->

3594

// (adde (lshr %b, 63), (ashr %a, 63), subc.CA)

3595

// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)

3596

if (IsRHSZero)

3597

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3598

std::swap(LHS, RHS);

3599

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3600

IsRHSZero = RHSConst && RHSConst->isZero();

3601

[[fallthrough]];

3602

}

3603

case ISD::SETLE: {

3604

// {subc.reg, subc.CA} = (subcarry %b, %a)

3605

// (zext (setcc %a, %b, setge)) ->

3606

// (adde (lshr %a, 63), (ashr %b, 63), subc.CA)

3607

// (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)

3608

if (IsRHSZero)

3609

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3610

SDValue ShiftL =

3611

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3612

S->getI64Imm(1, dl),

3613

S->getI64Imm(63, dl)), 0);

3614

SDValue ShiftR =

3615

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,

3616

S->getI64Imm(63, dl)), 0);

3617

SDValue SubtractCarry =

3618

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3619

LHS, RHS), 1);

3620

return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3621

ShiftR, ShiftL, SubtractCarry), 0);

3622

}

3623

case ISD::SETGT: {

3624

// {subc.reg, subc.CA} = (subcarry %b, %a)

3625

// (zext (setcc %a, %b, setgt)) ->

3626

// (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)

3627

// (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)

3628

if (IsRHSNegOne)

3629

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3630

if (IsRHSZero) {

3631

SDValue Addi =

3632

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3633

S->getI64Imm(~0ULL, dl)), 0);

3634

SDValue Nor =

3635

SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);

3636

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,

3637

S->getI64Imm(1, dl),

3638

S->getI64Imm(63, dl)), 0);

3639

}

3640

std::swap(LHS, RHS);

3641

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3642

IsRHSZero = RHSConst && RHSConst->isZero();

3643

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3644

[[fallthrough]];

3645

}

3646

case ISD::SETLT: {

3647

// {subc.reg, subc.CA} = (subcarry %a, %b)

3648

// (zext (setcc %a, %b, setlt)) ->

3649

// (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)

3650

// (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)

3651

if (IsRHSOne)

3652

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3653

if (IsRHSZero)

3654

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3655

S->getI64Imm(1, dl),

3656

S->getI64Imm(63, dl)), 0);

3657

SDValue SRADINode =

3658

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3659

LHS, S->getI64Imm(63, dl)), 0);

3660

SDValue SRDINode =

3661

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3662

RHS, S->getI64Imm(1, dl),

3663

S->getI64Imm(63, dl)), 0);

3664

SDValue SUBFC8Carry =

3665

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3666

RHS, LHS), 1);

3667

SDValue ADDE8Node =

3668

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3669

SRDINode, SRADINode, SUBFC8Carry), 0);

3670

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

3671

ADDE8Node, S->getI64Imm(1, dl)), 0);

3672

}

3673

case ISD::SETUGE:

3674

// {subc.reg, subc.CA} = (subcarry %a, %b)

3675

// (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)

3676

std::swap(LHS, RHS);

3677

[[fallthrough]];

3678

case ISD::SETULE: {

3679

// {subc.reg, subc.CA} = (subcarry %b, %a)

3680

// (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)

3681

SDValue SUBFC8Carry =

3682

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3683

LHS, RHS), 1);

3684

SDValue SUBFE8Node =

3685

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,

3686

LHS, LHS, SUBFC8Carry), 0);

3687

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,

3688

SUBFE8Node, S->getI64Imm(1, dl)), 0);

3689

}

3690

case ISD::SETUGT:

3691

// {subc.reg, subc.CA} = (subcarry %b, %a)

3692

// (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)

3693

std::swap(LHS, RHS);

3694

[[fallthrough]];

3695

case ISD::SETULT: {

3696

// {subc.reg, subc.CA} = (subcarry %a, %b)

3697

// (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)

3698

SDValue SubtractCarry =

3699

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3700

RHS, LHS), 1);

3701

SDValue ExtSub =

3702

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,

3703

LHS, LHS, SubtractCarry), 0);

3704

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,

3705

ExtSub), 0);

3706

}

3707

}

3708

}

3709

3710

/// Produces a sign-extended result of comparing two 64-bit values according to

3711

/// the passed condition code.

3712

SDValue

3713

IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,

3714

ISD::CondCode CC,

3715

int64_t RHSValue, SDLoc dl) {

3716

if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||

3717

CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)

3718

return SDValue();

3719

bool IsRHSZero = RHSValue == 0;

3720

bool IsRHSOne = RHSValue == 1;

3721

bool IsRHSNegOne = RHSValue == -1LL;

3722

switch (CC) {

3723

default: return SDValue();

3724

case ISD::SETEQ: {

3725

// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)

3726

// (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)

3727

// {addcz.reg, addcz.CA} = (addcarry %a, -1)

3728

// (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)

3729

SDValue AddInput = IsRHSZero ? LHS :

3730

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3731

SDValue Addic =

3732

SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,

3733

AddInput, S->getI32Imm(~0U, dl)), 0);

3734

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,

3735

Addic, Addic.getValue(1)), 0);

3736

}

3737

case ISD::SETNE: {

3738

// {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))

3739

// (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)

3740

// {subfcz.reg, subfcz.CA} = (subcarry 0, %a)

3741

// (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)

3742

SDValue Xor = IsRHSZero ? LHS :

3743

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3744

SDValue SC =

3745

SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,

3746

Xor, S->getI32Imm(0, dl)), 0);

3747

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,

3748

SC, SC.getValue(1)), 0);

3749

}

3750

case ISD::SETGE: {

3751

// {subc.reg, subc.CA} = (subcarry %a, %b)

3752

// (zext (setcc %a, %b, setge)) ->

3753

// (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))

3754

// (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))

3755

if (IsRHSZero)

3756

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3757

std::swap(LHS, RHS);

3758

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3759

IsRHSZero = RHSConst && RHSConst->isZero();

3760

[[fallthrough]];

3761

}

3762

case ISD::SETLE: {

3763

// {subc.reg, subc.CA} = (subcarry %b, %a)

3764

// (zext (setcc %a, %b, setge)) ->

3765

// (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))

3766

// (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)

3767

if (IsRHSZero)

3768

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3769

SDValue ShiftR =

3770

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,

3771

S->getI64Imm(63, dl)), 0);

3772

SDValue ShiftL =

3773

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3774

S->getI64Imm(1, dl),

3775

S->getI64Imm(63, dl)), 0);

3776

SDValue SubtractCarry =

3777

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3778

LHS, RHS), 1);

3779

SDValue Adde =

3780

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3781

ShiftR, ShiftL, SubtractCarry), 0);

3782

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);

3783

}

3784

case ISD::SETGT: {

3785

// {subc.reg, subc.CA} = (subcarry %b, %a)

3786

// (zext (setcc %a, %b, setgt)) ->

3787

// -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)

3788

// (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)

3789

if (IsRHSNegOne)

3790

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3791

if (IsRHSZero) {

3792

SDValue Add =

3793

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3794

S->getI64Imm(-1, dl)), 0);

3795

SDValue Nor =

3796

SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);

3797

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,

3798

S->getI64Imm(63, dl)), 0);

3799

}

3800

std::swap(LHS, RHS);

3801

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3802

IsRHSZero = RHSConst && RHSConst->isZero();

3803

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3804

[[fallthrough]];

3805

}

3806

case ISD::SETLT: {

3807

// {subc.reg, subc.CA} = (subcarry %a, %b)

3808

// (zext (setcc %a, %b, setlt)) ->

3809

// -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)

3810

// (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)

3811

if (IsRHSOne)

3812

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3813

if (IsRHSZero) {

3814

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,

3815

S->getI64Imm(63, dl)), 0);

3816

}

3817

SDValue SRADINode =

3818

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3819

LHS, S->getI64Imm(63, dl)), 0);

3820

SDValue SRDINode =

3821

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3822

RHS, S->getI64Imm(1, dl),

3823

S->getI64Imm(63, dl)), 0);

3824

SDValue SUBFC8Carry =

3825

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3826

RHS, LHS), 1);

3827

SDValue ADDE8Node =

3828

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,

3829

SRDINode, SRADINode, SUBFC8Carry), 0);

3830

SDValue XORI8Node =

3831

SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

3832

ADDE8Node, S->getI64Imm(1, dl)), 0);

3833

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,

3834

XORI8Node), 0);

3835

}

3836

case ISD::SETUGE:

3837

// {subc.reg, subc.CA} = (subcarry %a, %b)

3838

// (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)

3839

std::swap(LHS, RHS);

3840

[[fallthrough]];

3841

case ISD::SETULE: {

3842

// {subc.reg, subc.CA} = (subcarry %b, %a)

3843

// (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)

3844

SDValue SubtractCarry =

3845

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3846

LHS, RHS), 1);

3847

SDValue ExtSub =

3848

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,

3849

LHS, SubtractCarry), 0);

3850

return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,

3851

ExtSub, ExtSub), 0);

3852

}

3853

case ISD::SETUGT:

3854

// {subc.reg, subc.CA} = (subcarry %b, %a)

3855

// (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)

3856

std::swap(LHS, RHS);

3857

[[fallthrough]];

3858

case ISD::SETULT: {

3859

// {subc.reg, subc.CA} = (subcarry %a, %b)

3860

// (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)

3861

SDValue SubCarry =

3862

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3863

RHS, LHS), 1);

3864

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,

3865

LHS, LHS, SubCarry), 0);

3866

}

3867

}

3868

}

3869

3870

/// Do all uses of this SDValue need the result in a GPR?

3871

/// This is meant to be used on values that have type i1 since

3872

/// it is somewhat meaningless to ask if values of other types

3873

/// should be kept in GPR's.

3874

static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {

3875

assert(Compare.getOpcode() == ISD::SETCC &&(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3876, __extension__
__PRETTY_FUNCTION__))

3876

"An ISD::SETCC node required here.")(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3876, __extension__
__PRETTY_FUNCTION__));

3877

3878

// For values that have a single use, the caller should obviously already have

3879

// checked if that use is an extending use. We check the other uses here.

3880

if (Compare.hasOneUse())

3881

return true;

3882

// We want the value in a GPR if it is being extended, used for a select, or

3883

// used in logical operations.

3884

for (auto *CompareUse : Compare.getNode()->uses())

3885

if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&

3886

CompareUse->getOpcode() != ISD::ZERO_EXTEND &&

3887

CompareUse->getOpcode() != ISD::SELECT &&

3888

!ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {

3889

OmittedForNonExtendUses++;

3890

return false;

3891

}

3892

return true;

3893

}

3894

3895

/// Returns an equivalent of a SETCC node but with the result the same width as

3896

/// the inputs. This can also be used for SELECT_CC if either the true or false

3897

/// values is a power of two while the other is zero.

3898

SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,

3899

SetccInGPROpts ConvOpts) {

3900

assert((Compare.getOpcode() == ISD::SETCC ||(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3902, __extension__
__PRETTY_FUNCTION__))

3901

Compare.getOpcode() == ISD::SELECT_CC) &&(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3902, __extension__
__PRETTY_FUNCTION__))

3902

"An ISD::SETCC node required here.")(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3902, __extension__
__PRETTY_FUNCTION__));

3903

3904

// Don't convert this comparison to a GPR sequence because there are uses

3905

// of the i1 result (i.e. uses that require the result in the CR).

3906

if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))

3907

return SDValue();

3908

3909

SDValue LHS = Compare.getOperand(0);

3910

SDValue RHS = Compare.getOperand(1);

3911

3912

// The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.

3913

int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;

3914

ISD::CondCode CC =

3915

cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();

3916

EVT InputVT = LHS.getValueType();

3917

if (InputVT != MVT::i32 && InputVT != MVT::i64)

3918

return SDValue();

3919

3920

if (ConvOpts == SetccInGPROpts::ZExtInvert ||

3921

ConvOpts == SetccInGPROpts::SExtInvert)

3922

CC = ISD::getSetCCInverse(CC, InputVT);

3923

3924

bool Inputs32Bit = InputVT == MVT::i32;

3925

3926

SDLoc dl(Compare);

3927

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3928

int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX(9223372036854775807L);

3929

bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||

3930

ConvOpts == SetccInGPROpts::SExtInvert;

3931

3932

if (IsSext && Inputs32Bit)

3933

return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);

3934

else if (Inputs32Bit)

3935

return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);

3936

else if (IsSext)

3937

return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);

3938

return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);

3939

}

3940

3941

} // end anonymous namespace

3942

3943

bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {

3944

if (N->getValueType(0) != MVT::i32 &&

3945

N->getValueType(0) != MVT::i64)

3946

return false;

3947

3948

// This optimization will emit code that assumes 64-bit registers

3949

// so we don't want to run it in 32-bit mode. Also don't run it

3950

// on functions that are not to be optimized.

3951

if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())

3952

return false;

3953

3954

// For POWER10, it is more profitable to use the set boolean extension

3955

// instructions rather than the integer compare elimination codegen.

3956

// Users can override this via the command line option, `--ppc-gpr-icmps`.

3957

if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())

3958

return false;

3959

3960

switch (N->getOpcode()) {

3961

default: break;

3962

case ISD::ZERO_EXTEND:

3963

case ISD::SIGN_EXTEND:

3964

case ISD::AND:

3965

case ISD::OR:

3966

case ISD::XOR: {

3967

IntegerCompareEliminator ICmpElim(CurDAG, this);

3968

if (SDNode *New = ICmpElim.Select(N)) {

3969

ReplaceNode(N, New);

3970

return true;

3971

}

3972

}

3973

}

3974

return false;

3975

}

3976

3977

bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {

3978

if (N->getValueType(0) != MVT::i32 &&

3979

N->getValueType(0) != MVT::i64)

3980

return false;

3981

3982

if (!UseBitPermRewriter)

3983

return false;

3984

3985

switch (N->getOpcode()) {

3986

default: break;

3987

case ISD::SRL:

3988

// If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that

3989

// uses the BRH instruction.

3990

if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&

3991

N->getOperand(0).getOpcode() == ISD::BSWAP) {

3992

auto &OpRight = N->getOperand(1);

3993

ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);

3994

if (SRLConst && SRLConst->getSExtValue() == 16)

3995

return false;

3996

}

3997

LLVM_FALLTHROUGH[[fallthrough]];

3998

case ISD::ROTL:

3999

case ISD::SHL:

4000

case ISD::AND:

4001

case ISD::OR: {

4002

BitPermutationSelector BPS(CurDAG);

4003

if (SDNode *New = BPS.Select(N)) {

4004

ReplaceNode(N, New);

4005

return true;

4006

}

4007

return false;

4008

}

4009

}

4010

4011

return false;

4012

}

4013

4014

/// SelectCC - Select a comparison of the specified values with the specified

4015

/// condition code, returning the CR# of the expression.

4016

SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,

4017

const SDLoc &dl, SDValue Chain) {

4018

// Always select the LHS.

4019

unsigned Opc;

4020

4021

if (LHS.getValueType() == MVT::i32) {

4022

unsigned Imm;

4023

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

4024

if (isInt32Immediate(RHS, Imm)) {

4025

// SETEQ/SETNE comparison with 16-bit immediate, fold it.

4026

if (isUInt<16>(Imm))

4027

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,

4028

getI32Imm(Imm & 0xFFFF, dl)),

4029

0);

4030

// If this is a 16-bit signed immediate, fold it.

4031

if (isInt<16>((int)Imm))

4032

return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,

4033

getI32Imm(Imm & 0xFFFF, dl)),

4034

0);

4035

4036

// For non-equality comparisons, the default code would materialize the

4037

// constant, then compare against it, like this:

4038

// lis r2, 4660

4039

// ori r2, r2, 22136

4040

// cmpw cr0, r3, r2

4041

// Since we are just comparing for equality, we can emit this instead:

4042

// xoris r0,r3,0x1234

4043

// cmplwi cr0,r0,0x5678

4044

// beq cr0,L6

4045

SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,

4046

getI32Imm(Imm >> 16, dl)), 0);

4047

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,

4048

getI32Imm(Imm & 0xFFFF, dl)), 0);

4049

}

4050

Opc = PPC::CMPLW;

4051

} else if (ISD::isUnsignedIntSetCC(CC)) {

4052

if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))

4053

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,

4054

getI32Imm(Imm & 0xFFFF, dl)), 0);

4055

Opc = PPC::CMPLW;

4056

} else {

4057

int16_t SImm;

4058

if (isIntS16Immediate(RHS, SImm))

4059

return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,

4060

getI32Imm((int)SImm & 0xFFFF,

4061

dl)),

4062

0);

4063

Opc = PPC::CMPW;

4064

}

4065

} else if (LHS.getValueType() == MVT::i64) {

4066

uint64_t Imm;

4067

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

4068

if (isInt64Immediate(RHS.getNode(), Imm)) {

4069

// SETEQ/SETNE comparison with 16-bit immediate, fold it.

4070

if (isUInt<16>(Imm))

4071

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,

4072

getI32Imm(Imm & 0xFFFF, dl)),

4073

0);

4074

// If this is a 16-bit signed immediate, fold it.

4075

if (isInt<16>(Imm))

4076

return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,

4077

getI32Imm(Imm & 0xFFFF, dl)),

4078

0);

4079

4080

// For non-equality comparisons, the default code would materialize the

4081

// constant, then compare against it, like this:

4082

// lis r2, 4660

4083

// ori r2, r2, 22136

4084

// cmpd cr0, r3, r2

4085

// Since we are just comparing for equality, we can emit this instead:

4086

// xoris r0,r3,0x1234

4087

// cmpldi cr0,r0,0x5678

4088

// beq cr0,L6

4089

if (isUInt<32>(Imm)) {

4090

SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,

4091

getI64Imm(Imm >> 16, dl)), 0);

4092

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,

4093

getI64Imm(Imm & 0xFFFF, dl)),

4094

0);

4095

}

4096

}

4097

Opc = PPC::CMPLD;

4098

} else if (ISD::isUnsignedIntSetCC(CC)) {

4099

if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))

4100

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,

4101

getI64Imm(Imm & 0xFFFF, dl)), 0);

4102

Opc = PPC::CMPLD;

4103

} else {

4104

int16_t SImm;

4105

if (isIntS16Immediate(RHS, SImm))

4106

return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,

4107

getI64Imm(SImm & 0xFFFF, dl)),

4108

0);

4109

Opc = PPC::CMPD;

4110

}

4111

} else if (LHS.getValueType() == MVT::f32) {

4112

if (Subtarget->hasSPE()) {

4113

switch (CC) {

4114

default:

4115

case ISD::SETEQ:

4116

case ISD::SETNE:

4117

Opc = PPC::EFSCMPEQ;

4118

break;

4119

case ISD::SETLT:

4120

case ISD::SETGE:

4121

case ISD::SETOLT:

4122

case ISD::SETOGE:

4123

case ISD::SETULT:

4124

case ISD::SETUGE:

4125

Opc = PPC::EFSCMPLT;

4126

break;

4127

case ISD::SETGT:

4128

case ISD::SETLE:

4129

case ISD::SETOGT:

4130

case ISD::SETOLE:

4131

case ISD::SETUGT:

4132

case ISD::SETULE:

4133

Opc = PPC::EFSCMPGT;

4134

break;

4135

}

4136

} else

4137

Opc = PPC::FCMPUS;

4138

} else if (LHS.getValueType() == MVT::f64) {

4139

if (Subtarget->hasSPE()) {

4140

switch (CC) {

4141

default:

4142

case ISD::SETEQ:

4143

case ISD::SETNE:

4144

Opc = PPC::EFDCMPEQ;

4145

break;

4146

case ISD::SETLT:

4147

case ISD::SETGE:

4148

case ISD::SETOLT:

4149

case ISD::SETOGE:

4150

case ISD::SETULT:

4151

case ISD::SETUGE:

4152

Opc = PPC::EFDCMPLT;

4153

break;

4154

case ISD::SETGT:

4155

case ISD::SETLE:

4156

case ISD::SETOGT:

4157

case ISD::SETOLE:

4158

case ISD::SETUGT:

4159

case ISD::SETULE:

4160

Opc = PPC::EFDCMPGT;

4161

break;

4162

}

4163

} else

4164

Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;

4165

} else {

4166

assert(LHS.getValueType() == MVT::f128 && "Unknown vt!")(static_cast <bool> (LHS.getValueType() == MVT::f128 &&
"Unknown vt!") ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f128 && \"Unknown vt!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4166, __extension__
__PRETTY_FUNCTION__));

4167

assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector")(static_cast <bool> (Subtarget->hasP9Vector() &&
"XSCMPUQP requires Power9 Vector") ? void (0) : __assert_fail
("Subtarget->hasP9Vector() && \"XSCMPUQP requires Power9 Vector\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4167, __extension__
__PRETTY_FUNCTION__));

4168

Opc = PPC::XSCMPUQP;

4169

}

4170

if (Chain)

4171

return SDValue(

4172

CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),

4173

0);

4174

else

4175

return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);

4176

}

4177

4178

static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,

4179

const PPCSubtarget *Subtarget) {

4180

// For SPE instructions, the result is in GT bit of the CR

4181

bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();

4182

4183

switch (CC) {

4184

case ISD::SETUEQ:

4185

case ISD::SETONE:

4186

case ISD::SETOLE:

4187

case ISD::SETOGE:

4188

llvm_unreachable("Should be lowered by legalize!")::llvm::llvm_unreachable_internal("Should be lowered by legalize!"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4188);

4189

default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4189);

4190

case ISD::SETOEQ:

4191

case ISD::SETEQ:

4192

return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;

4193

case ISD::SETUNE:

4194

case ISD::SETNE:

4195

return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;

4196

case ISD::SETOLT:

4197

case ISD::SETLT:

4198

return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;

4199

case ISD::SETULE:

4200

case ISD::SETLE:

4201

return PPC::PRED_LE;

4202

case ISD::SETOGT:

4203

case ISD::SETGT:

4204

return PPC::PRED_GT;

4205

case ISD::SETUGE:

4206

case ISD::SETGE:

4207

return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;

4208

case ISD::SETO: return PPC::PRED_NU;

4209

case ISD::SETUO: return PPC::PRED_UN;

4210

// These two are invalid for floating point. Assume we have int.

4211

case ISD::SETULT: return PPC::PRED_LT;

4212

case ISD::SETUGT: return PPC::PRED_GT;

4213

}

4214

}

4215

4216

/// getCRIdxForSetCC - Return the index of the condition register field

4217

/// associated with the SetCC condition, and whether or not the field is

4218

/// treated as inverted. That is, lt = 0; ge = 0 inverted.

4219

static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {

4220

Invert = false;

4221

switch (CC) {

4222

default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4222);

4223

case ISD::SETOLT:

4224

case ISD::SETLT: return 0; // Bit #0 = SETOLT

4225

case ISD::SETOGT:

4226

case ISD::SETGT: return 1; // Bit #1 = SETOGT

4227

case ISD::SETOEQ:

4228

case ISD::SETEQ: return 2; // Bit #2 = SETOEQ

4229

case ISD::SETUO: return 3; // Bit #3 = SETUO

4230

case ISD::SETUGE:

4231

case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE

4232

case ISD::SETULE:

4233

case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE

4234

case ISD::SETUNE:

4235

case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE

4236

case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO

4237

case ISD::SETUEQ:

4238

case ISD::SETOGE:

4239

case ISD::SETOLE:

4240

case ISD::SETONE:

4241

llvm_unreachable("Invalid branch code: should be expanded by legalize")::llvm::llvm_unreachable_internal("Invalid branch code: should be expanded by legalize"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4241);

4242

// These are invalid for floating point. Assume integer.

4243

case ISD::SETULT: return 0;

4244

case ISD::SETUGT: return 1;

4245

}

4246

}

4247

4248

// getVCmpInst: return the vector compare instruction for the specified

4249

// vector type and condition code. Since this is for altivec specific code,

4250

// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,

4251

// and v4f32).

4252

static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,

4253

bool HasVSX, bool &Swap, bool &Negate) {

4254

Swap = false;

4255

Negate = false;

4256

4257

if (VecVT.isFloatingPoint()) {

4258

/* Handle some cases by swapping input operands. */

4259

switch (CC) {

4260

case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;

4261

case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;

4262

case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;

4263

case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;

4264

case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;

4265

case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;

4266

default: break;

4267

}

4268

/* Handle some cases by negating the result. */

4269

switch (CC) {

4270

case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;

4271

case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;

4272

case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;

4273

case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;

4274

default: break;

4275

}

4276

/* We have instructions implementing the remaining cases. */

4277

switch (CC) {

4278

case ISD::SETEQ:

4279

case ISD::SETOEQ:

4280

if (VecVT == MVT::v4f32)

4281

return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;

4282

else if (VecVT == MVT::v2f64)

4283

return PPC::XVCMPEQDP;

4284

break;

4285

case ISD::SETGT:

4286

case ISD::SETOGT:

4287

if (VecVT == MVT::v4f32)

4288

return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;

4289

else if (VecVT == MVT::v2f64)

4290

return PPC::XVCMPGTDP;

4291

break;

4292

case ISD::SETGE:

4293

case ISD::SETOGE:

4294

if (VecVT == MVT::v4f32)

4295

return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;

4296

else if (VecVT == MVT::v2f64)

4297

return PPC::XVCMPGEDP;

4298

break;

4299

default:

4300

break;

4301

}

4302

llvm_unreachable("Invalid floating-point vector compare condition")::llvm::llvm_unreachable_internal("Invalid floating-point vector compare condition"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4302);

4303

} else {

4304

/* Handle some cases by swapping input operands. */

4305

switch (CC) {

4306

case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;

4307

case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;

4308

case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;

4309

case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;

4310

default: break;

4311

}

4312

/* Handle some cases by negating the result. */

4313

switch (CC) {

4314

case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;

4315

case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;

4316

case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;

4317

case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;

4318

default: break;

4319

}

4320

/* We have instructions implementing the remaining cases. */

4321

switch (CC) {

4322

case ISD::SETEQ:

4323

case ISD::SETUEQ:

4324

if (VecVT == MVT::v16i8)

4325

return PPC::VCMPEQUB;

4326

else if (VecVT == MVT::v8i16)

4327

return PPC::VCMPEQUH;

4328

else if (VecVT == MVT::v4i32)

4329

return PPC::VCMPEQUW;

4330

else if (VecVT == MVT::v2i64)

4331

return PPC::VCMPEQUD;

4332

else if (VecVT == MVT::v1i128)

4333

return PPC::VCMPEQUQ;

4334

break;

4335

case ISD::SETGT:

4336

if (VecVT == MVT::v16i8)

4337

return PPC::VCMPGTSB;

4338

else if (VecVT == MVT::v8i16)

4339

return PPC::VCMPGTSH;

4340

else if (VecVT == MVT::v4i32)

4341

return PPC::VCMPGTSW;

4342

else if (VecVT == MVT::v2i64)

4343

return PPC::VCMPGTSD;

4344

else if (VecVT == MVT::v1i128)

4345

return PPC::VCMPGTSQ;

4346

break;

4347

case ISD::SETUGT:

4348

if (VecVT == MVT::v16i8)

4349

return PPC::VCMPGTUB;

4350

else if (VecVT == MVT::v8i16)

4351

return PPC::VCMPGTUH;

4352

else if (VecVT == MVT::v4i32)

4353

return PPC::VCMPGTUW;

4354

else if (VecVT == MVT::v2i64)

4355

return PPC::VCMPGTUD;

4356

else if (VecVT == MVT::v1i128)

4357

return PPC::VCMPGTUQ;

4358

break;

4359

default:

4360

break;

4361

}

4362

llvm_unreachable("Invalid integer vector compare condition")::llvm::llvm_unreachable_internal("Invalid integer vector compare condition"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4362);

4363

}

4364

}

4365

4366

bool PPCDAGToDAGISel::trySETCC(SDNode *N) {

4367

SDLoc dl(N);

4368

unsigned Imm;

4369

bool IsStrict = N->isStrictFPOpcode();

4370

ISD::CondCode CC =

4371

cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();

4372

EVT PtrVT =

4373

CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());

4374

bool isPPC64 = (PtrVT == MVT::i64);

4375

SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();

4376

4377

SDValue LHS = N->getOperand(IsStrict ? 1 : 0);

4378

SDValue RHS = N->getOperand(IsStrict ? 2 : 1);

4379

4380

if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {

4381

// We can codegen setcc op, imm very efficiently compared to a brcond.

4382

// Check for those cases here.

4383

// setcc op, 0

4384

if (Imm == 0) {

4385

SDValue Op = LHS;

4386

switch (CC) {

4387

default: break;

4388

case ISD::SETEQ: {

4389

Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);

4390

SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),

4391

getI32Imm(31, dl) };

4392

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4393

return true;

4394

}

4395

case ISD::SETNE: {

4396

if (isPPC64) break;

4397

SDValue AD =

4398

SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4399

Op, getI32Imm(~0U, dl)), 0);

4400

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));

4401

return true;

4402

}

4403

case ISD::SETLT: {

4404

SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),

4405

getI32Imm(31, dl) };

4406

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4407

return true;

4408

}

4409

case ISD::SETGT: {

4410

SDValue T =

4411

SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);

4412

T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);

4413

SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),

4414

getI32Imm(31, dl) };

4415

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4416

return true;

4417

}

4418

}

4419

} else if (Imm == ~0U) { // setcc op, -1

4420

SDValue Op = LHS;

4421

switch (CC) {

4422

default: break;

4423

case ISD::SETEQ:

4424

if (isPPC64) break;

4425

Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4426

Op, getI32Imm(1, dl)), 0);

4427

CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,

4428

SDValue(CurDAG->getMachineNode(PPC::LI, dl,

4429

MVT::i32,

4430

getI32Imm(0, dl)),

4431

0), Op.getValue(1));

4432

return true;

4433

case ISD::SETNE: {

4434

if (isPPC64) break;

4435

Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);

4436

SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4437

Op, getI32Imm(~0U, dl));

4438

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,

4439

SDValue(AD, 1));

4440

return true;

4441

}

4442

case ISD::SETLT: {

4443

SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,

4444

getI32Imm(1, dl)), 0);

4445

SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,

4446

Op), 0);

4447

SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),

4448

getI32Imm(31, dl) };

4449

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4450

return true;

4451

}

4452

case ISD::SETGT: {

4453

SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),

4454

getI32Imm(31, dl) };

4455

Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

4456

CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));

4457

return true;

4458

}

4459

}

4460

}

4461

}

4462

4463

// Altivec Vector compare instructions do not set any CR register by default and

4464

// vector compare operations return the same type as the operands.

4465

if (!IsStrict && LHS.getValueType().isVector()) {

4466

if (Subtarget->hasSPE())

4467

return false;

4468

4469

EVT VecVT = LHS.getValueType();

4470

bool Swap, Negate;

4471

unsigned int VCmpInst =

4472

getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);

4473

if (Swap)

4474

std::swap(LHS, RHS);

4475

4476

EVT ResVT = VecVT.changeVectorElementTypeToInteger();

4477

if (Negate) {

4478

SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);

4479

CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,

4480

ResVT, VCmp, VCmp);

4481

return true;

4482

}

4483

4484

CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);

4485

return true;

4486

}

4487

4488

if (Subtarget->useCRBits())

4489

return false;

4490

4491

bool Inv;

4492

unsigned Idx = getCRIdxForSetCC(CC, Inv);

4493

SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);

4494

if (IsStrict)

4495

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));

4496

SDValue IntCR;

4497

4498

// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that

4499

// The correct compare instruction is already set by SelectCC()

4500

if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {

4501

Idx = 1;

4502

}

4503

4504

// Force the ccreg into CR7.

4505

SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);

4506

4507

SDValue InGlue; // Null incoming flag value.

4508

CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,

4509

InGlue).getValue(1);

4510

4511

IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,

4512

CCReg), 0);

4513

4514

SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),

4515

getI32Imm(31, dl), getI32Imm(31, dl) };

4516

if (!Inv) {

4517

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4518

return true;

4519

}

4520

4521

// Get the specified bit.

4522

SDValue Tmp =

4523

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

4524

CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));

4525

return true;

4526

}

4527

4528

/// Does this node represent a load/store node whose address can be represented

4529

/// with a register plus an immediate that's a multiple of \p Val:

4530

bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {

4531

LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);

4532

StoreSDNode *STN = dyn_cast<StoreSDNode>(N);

4533

MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);

4534

SDValue AddrOp;

4535

if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))

4536

AddrOp = N->getOperand(1);

4537

else if (STN)

4538

AddrOp = STN->getOperand(2);

4539

4540

// If the address points a frame object or a frame object with an offset,

4541

// we need to check the object alignment.

4542

short Imm = 0;

4543

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(

4544

AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :

4545

AddrOp)) {

4546

// If op0 is a frame index that is under aligned, we can't do it either,

4547

// because it is translated to r31 or r1 + slot + offset. We won't know the

4548

// slot number until the stack frame is finalized.

4549

const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();

4550

unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();

4551

if ((SlotAlign % Val) != 0)

4552

return false;

4553

4554

// If we have an offset, we need further check on the offset.

4555

if (AddrOp.getOpcode() != ISD::ADD)

4556

return true;

4557

}

4558

4559

if (AddrOp.getOpcode() == ISD::ADD)

4560

return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);

4561

4562

// If the address comes from the outside, the offset will be zero.

4563

return AddrOp.getOpcode() == ISD::CopyFromReg;

4564

}

4565

4566

void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {

4567

// Transfer memoperands.

4568

MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();

4569

CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});

4570

}

4571

4572

static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,

4573

bool &NeedSwapOps, bool &IsUnCmp) {

4574

4575

assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.")(static_cast <bool> (N->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SELECT_CC && \"Expecting a SELECT_CC here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4575, __extension__
__PRETTY_FUNCTION__));

4576

4577

SDValue LHS = N->getOperand(0);

4578

SDValue RHS = N->getOperand(1);

4579

SDValue TrueRes = N->getOperand(2);

4580

SDValue FalseRes = N->getOperand(3);

4581

ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);

4582

if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&

4583

N->getSimpleValueType(0) != MVT::i32))

4584

return false;

4585

4586

// We are looking for any of:

4587

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)

4588

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)

4589

// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)

4590

// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)

4591

int64_t TrueResVal = TrueConst->getSExtValue();

4592

if ((TrueResVal < -1 || TrueResVal > 1) ||

4593

(TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||

4594

(TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||

4595

(TrueResVal == 0 &&

4596

(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))

4597

return false;

4598

4599

SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC

4600

? FalseRes

4601

: FalseRes.getOperand(0);

4602

bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;

4603

if (SetOrSelCC.getOpcode() != ISD::SETCC &&

4604

SetOrSelCC.getOpcode() != ISD::SELECT_CC)

4605

return false;

4606

4607

// Without this setb optimization, the outer SELECT_CC will be manually

4608

// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass

4609

// transforms pseudo instruction to isel instruction. When there are more than

4610

// one use for result like zext/sext, with current optimization we only see

4611

// isel is replaced by setb but can't see any significant gain. Since

4612

// setb has longer latency than original isel, we should avoid this. Another

4613

// point is that setb requires comparison always kept, it can break the

4614

// opportunity to get the comparison away if we have in future.

4615

if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))

4616

return false;

4617

4618

SDValue InnerLHS = SetOrSelCC.getOperand(0);

4619

SDValue InnerRHS = SetOrSelCC.getOperand(1);

4620

ISD::CondCode InnerCC =

4621

cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();

4622

// If the inner comparison is a select_cc, make sure the true/false values are

4623

// 1/-1 and canonicalize it if needed.

4624

if (InnerIsSel) {

4625

ConstantSDNode *SelCCTrueConst =

4626

dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));

4627

ConstantSDNode *SelCCFalseConst =

4628

dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));

4629

if (!SelCCTrueConst || !SelCCFalseConst)

4630

return false;

4631

int64_t SelCCTVal = SelCCTrueConst->getSExtValue();

4632

int64_t SelCCFVal = SelCCFalseConst->getSExtValue();

4633

// The values must be -1/1 (requiring a swap) or 1/-1.

4634

if (SelCCTVal == -1 && SelCCFVal == 1) {

4635

std::swap(InnerLHS, InnerRHS);

4636

} else if (SelCCTVal != 1 || SelCCFVal != -1)

4637

return false;

4638

}

4639

4640

// Canonicalize unsigned case

4641

if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {

4642

IsUnCmp = true;

4643

InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;

4644

}

4645

4646

bool InnerSwapped = false;

4647

if (LHS == InnerRHS && RHS == InnerLHS)

4648

InnerSwapped = true;

4649

else if (LHS != InnerLHS || RHS != InnerRHS)

4650

return false;

4651

4652

switch (CC) {

4653

// (select_cc lhs, rhs, 0, \

4654

// (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)

4655

case ISD::SETEQ:

4656

if (!InnerIsSel)

4657

return false;

4658

if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)

4659

return false;

4660

NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;

4661

break;

4662

4663

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

4664

// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)

4665

// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)

4666

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

4667

// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)

4668

// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)

4669

case ISD::SETULT:

4670

if (!IsUnCmp && InnerCC != ISD::SETNE)

4671

return false;

4672

IsUnCmp = true;

4673

[[fallthrough]];

4674

case ISD::SETLT:

4675

if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||

4676

(InnerCC == ISD::SETLT && InnerSwapped))

4677

NeedSwapOps = (TrueResVal == 1);

4678

else

4679

return false;

4680

break;

4681

4682

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

4683

// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)

4684

// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)

4685

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

4686

// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)

4687

// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)

4688

case ISD::SETUGT:

4689

if (!IsUnCmp && InnerCC != ISD::SETNE)

4690

return false;

4691

IsUnCmp = true;

4692

[[fallthrough]];

4693

case ISD::SETGT:

4694

if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||

4695

(InnerCC == ISD::SETGT && InnerSwapped))

4696

NeedSwapOps = (TrueResVal == -1);

4697

else

4698

return false;

4699

break;

4700

4701

default:

4702

return false;

4703

}

4704

4705

LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Found a node that can be lowered to a SETB: "
; } } while (false);

4706

LLVM_DEBUG(N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { N->dump(); } } while (false);

4707

4708

return true;

4709

}

4710

4711

// Return true if it's a software square-root/divide operand.

4712

static bool isSWTestOp(SDValue N) {

4713

if (N.getOpcode() == PPCISD::FTSQRT)

4714

return true;

4715

if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||

4716

N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)

4717

return false;

4718

switch (N.getConstantOperandVal(0)) {

4719

case Intrinsic::ppc_vsx_xvtdivdp:

4720

case Intrinsic::ppc_vsx_xvtdivsp:

4721

case Intrinsic::ppc_vsx_xvtsqrtdp:

4722

case Intrinsic::ppc_vsx_xvtsqrtsp:

4723

return true;

4724

}

4725

return false;

4726

}

4727

4728

bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {

4729

assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.")(static_cast <bool> (N->getOpcode() == ISD::BR_CC &&
"ISD::BR_CC is expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"ISD::BR_CC is expected.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4729, __extension__
__PRETTY_FUNCTION__));

4730

// We are looking for following patterns, where `truncate to i1` actually has

4731

// the same semantic with `and 1`.

4732

// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)

4733

// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)

4734

// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)

4735

// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)

4736

// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)

4737

// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)

4738

// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)

4739

// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)

4740

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

4741

if (CC != ISD::SETEQ && CC != ISD::SETNE)

4742

return false;

4743

4744

SDValue CmpRHS = N->getOperand(3);

4745

if (!isa<ConstantSDNode>(CmpRHS) ||

4746

cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)

4747

return false;

4748

4749

SDValue CmpLHS = N->getOperand(2);

4750

if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))

4751

return false;

4752

4753

unsigned PCC = 0;

4754

bool IsCCNE = CC == ISD::SETNE;

4755

if (CmpLHS.getOpcode() == ISD::AND &&

4756

isa<ConstantSDNode>(CmpLHS.getOperand(1)))

4757

switch (CmpLHS.getConstantOperandVal(1)) {

4758

case 1:

4759

PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;

4760

break;

4761

case 2:

4762

PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;

4763

break;

4764

case 4:

4765

PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;

4766

break;

4767

case 8:

4768

PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;

4769

break;

4770

default:

4771

return false;

4772

}

4773

else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&

4774

CmpLHS.getValueType() == MVT::i1)

4775

PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;

4776

4777

if (PCC) {

4778

SDLoc dl(N);

4779

SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),

4780

N->getOperand(0)};

4781

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

4782

return true;

4783

}

4784

return false;

4785

}

4786

4787

bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {

4788

// Sometimes the promoted value of the intrinsic is ANDed by some non-zero

4789

// value, for example when crbits is disabled. If so, select the

4790

// loop_decrement intrinsics now.

4791

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

4792

SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);

4793

4794

if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||

4795

isNullConstant(LHS.getOperand(1)))

4796

return false;

4797

4798

if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||

4799

cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() !=

4800

Intrinsic::loop_decrement)

4801

return false;

4802

4803

if (!isa<ConstantSDNode>(RHS))

4804

return false;

4805

4806

assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(static_cast <bool> ((CC == ISD::SETEQ || CC == ISD::SETNE
) && "Counter decrement comparison is not EQ or NE") ?
void (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4807, __extension__
__PRETTY_FUNCTION__))

4807

"Counter decrement comparison is not EQ or NE")(static_cast <bool> ((CC == ISD::SETEQ || CC == ISD::SETNE
) && "Counter decrement comparison is not EQ or NE") ?
void (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4807, __extension__
__PRETTY_FUNCTION__));

4808

4809

SDValue OldDecrement = LHS.getOperand(0);

4810

assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!")(static_cast <bool> (OldDecrement.hasOneUse() &&
"loop decrement has more than one use!") ? void (0) : __assert_fail
("OldDecrement.hasOneUse() && \"loop decrement has more than one use!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4810, __extension__
__PRETTY_FUNCTION__));

4811

4812

SDLoc DecrementLoc(OldDecrement);

4813

SDValue ChainInput = OldDecrement.getOperand(0);

4814

SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)

4815

: getI32Imm(1, DecrementLoc)};

4816

unsigned DecrementOpcode =

4817

Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;

4818

SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,

4819

MVT::i1, DecrementOps);

4820

4821

unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();

4822

bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);

4823

unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;

4824

4825

ReplaceUses(LHS.getValue(0), LHS.getOperand(1));

4826

CurDAG->RemoveDeadNode(LHS.getNode());

4827

4828

// Mark the old loop_decrement intrinsic as dead.

4829

ReplaceUses(OldDecrement.getValue(1), ChainInput);

4830

CurDAG->RemoveDeadNode(OldDecrement.getNode());

4831

4832

SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,

4833

ChainInput, N->getOperand(0));

4834

4835

CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),

4836

N->getOperand(4), Chain);

4837

return true;

4838

}

4839

4840

bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {

4841

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4841, __extension__
__PRETTY_FUNCTION__));

4842

unsigned Imm;

4843

if (!isInt32Immediate(N->getOperand(1), Imm))

4844

return false;

4845

4846

SDLoc dl(N);

4847

SDValue Val = N->getOperand(0);

4848

unsigned SH, MB, ME;

4849

// If this is an and of a value rotated between 0 and 31 bits and then and'd

4850

// with a mask, emit rlwinm

4851

if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {

4852

Val = Val.getOperand(0);

4853

SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),

4854

getI32Imm(ME, dl)};

4855

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4856

return true;

4857

}

4858

4859

// If this is just a masked value where the input is not handled, and

4860

// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm

4861

if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {

4862

SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),

4863

getI32Imm(ME, dl)};

4864

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4865

return true;

4866

}

4867

4868

// AND X, 0 -> 0, not "rlwinm 32".

4869

if (Imm == 0) {

4870

ReplaceUses(SDValue(N, 0), N->getOperand(1));

4871

return true;

4872

}

4873

4874

return false;

4875

}

4876

4877

bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {

4878

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4878, __extension__
__PRETTY_FUNCTION__));

4879

uint64_t Imm64;

4880

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))

4881

return false;

4882

4883

unsigned MB, ME;

4884

if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {

4885

// MB ME

4886

// +----------------------+

4887

// |xxxxxxxxxxx00011111000|

4888

// +----------------------+

4889

// 0 32 64

4890

// We can only do it if the MB is larger than 32 and MB <= ME

4891

// as RLWINM will replace the contents of [0 - 32) with [32 - 64) even

4892

// we didn't rotate it.

4893

SDLoc dl(N);

4894

SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),

4895

getI64Imm(ME - 32, dl)};

4896

CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);

4897

return true;

4898

}

4899

4900

return false;

4901

}

4902

4903

bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {

4904

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4904, __extension__
__PRETTY_FUNCTION__));

4905

uint64_t Imm64;

4906

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))

4907

return false;

4908

4909

// Do nothing if it is 16-bit imm as the pattern in the .td file handle

4910

// it well with "andi.".

4911

if (isUInt<16>(Imm64))

4912

return false;

4913

4914

SDLoc Loc(N);

4915

SDValue Val = N->getOperand(0);

4916

4917

// Optimized with two rldicl's as follows:

4918

// Add missing bits on left to the mask and check that the mask is a

4919

// wrapped run of ones, i.e.

4920

// Change pattern |0001111100000011111111|

4921

// to |1111111100000011111111|.

4922

unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);

4923

if (NumOfLeadingZeros != 0)

4924

Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);

4925

4926

unsigned MB, ME;

4927

if (!isRunOfOnes64(Imm64, MB, ME))

4928

return false;

4929

4930

// ME MB MB-ME+63

4931

// +----------------------+ +----------------------+

4932

// |1111111100000011111111| -> |0000001111111111111111|

4933

// +----------------------+ +----------------------+

4934

// 0 63 0 63

4935

// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.

4936

unsigned OnesOnLeft = ME + 1;

4937

unsigned ZerosInBetween = (MB - ME + 63) & 63;

4938

// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear

4939

// on the left the bits that are already zeros in the mask.

4940

Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,

4941

getI64Imm(OnesOnLeft, Loc),

4942

getI64Imm(ZerosInBetween, Loc)),

4943

0);

4944

// MB-ME+63 ME MB

4945

// +----------------------+ +----------------------+

4946

// |0000001111111111111111| -> |0001111100000011111111|

4947

// +----------------------+ +----------------------+

4948

// 0 63 0 63

4949

// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the

4950

// left the number of ones we previously added.

4951

SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),

4952

getI64Imm(NumOfLeadingZeros, Loc)};

4953

CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);

4954

return true;

4955

}

4956

4957

bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {

4958

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4958, __extension__
__PRETTY_FUNCTION__));

4959

unsigned Imm;

4960

if (!isInt32Immediate(N->getOperand(1), Imm))

4961

return false;

4962

4963

SDValue Val = N->getOperand(0);

4964

unsigned Imm2;

4965

// ISD::OR doesn't get all the bitfield insertion fun.

4966

// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a

4967

// bitfield insert.

4968

if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))

4969

return false;

4970

4971

// The idea here is to check whether this is equivalent to:

4972

// (c1 & m) | (x & ~m)

4973

// where m is a run-of-ones mask. The logic here is that, for each bit in

4974

// c1 and c2:

4975

// - if both are 1, then the output will be 1.

4976

// - if both are 0, then the output will be 0.

4977

// - if the bit in c1 is 0, and the bit in c2 is 1, then the output will

4978

// come from x.

4979

// - if the bit in c1 is 1, and the bit in c2 is 0, then the output will

4980

// be 0.

4981

// If that last condition is never the case, then we can form m from the

4982

// bits that are the same between c1 and c2.

4983

unsigned MB, ME;

4984

if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {

4985

SDLoc dl(N);

4986

SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),

4987

getI32Imm(MB, dl), getI32Imm(ME, dl)};

4988

ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));

4989

return true;

4990

}

4991

4992

return false;

4993

}

4994

4995

bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {

4996

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4996, __extension__
__PRETTY_FUNCTION__));

4997

uint64_t Imm64;

4998

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))

4999

return false;

5000

5001

// If this is a 64-bit zero-extension mask, emit rldicl.

5002

unsigned MB = 64 - llvm::countr_one(Imm64);

5003

unsigned SH = 0;

5004

unsigned Imm;

5005

SDValue Val = N->getOperand(0);

5006

SDLoc dl(N);

5007

5008

if (Val.getOpcode() == ISD::ANY_EXTEND) {

5009

auto Op0 = Val.getOperand(0);

5010

if (Op0.getOpcode() == ISD::SRL &&

5011

isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {

5012

5013

auto ResultType = Val.getNode()->getValueType(0);

5014

auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);

5015

SDValue IDVal(ImDef, 0);

5016

5017

Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,

5018

IDVal, Op0.getOperand(0),

5019

getI32Imm(1, dl)),

5020

0);

5021

SH = 64 - Imm;

5022

}

5023

}

5024

5025

// If the operand is a logical right shift, we can fold it into this

5026

// instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)

5027

// for n <= mb. The right shift is really a left rotate followed by a

5028

// mask, and this mask is a more-restrictive sub-mask of the mask implied

5029

// by the shift.

5030

if (Val.getOpcode() == ISD::SRL &&

5031

isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {

5032

assert(Imm < 64 && "Illegal shift amount")(static_cast <bool> (Imm < 64 && "Illegal shift amount"
) ? void (0) : __assert_fail ("Imm < 64 && \"Illegal shift amount\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5032, __extension__
__PRETTY_FUNCTION__));

5033

Val = Val.getOperand(0);

5034

SH = 64 - Imm;

5035

}

5036

5037

SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};

5038

CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);

5039

return true;

5040

}

5041

5042

bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {

5043

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5043, __extension__
__PRETTY_FUNCTION__));

5044

uint64_t Imm64;

5045

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||

5046

!isMask_64(~Imm64))

5047

return false;

5048

5049

// If this is a negated 64-bit zero-extension mask,

5050

// i.e. the immediate is a sequence of ones from most significant side

5051

// and all zero for reminder, we should use rldicr.

5052

unsigned MB = 63 - llvm::countr_one(~Imm64);

5053

unsigned SH = 0;

5054

SDLoc dl(N);

5055

SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};

5056

CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);

5057

return true;

5058

}

5059

5060

bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {

5061

assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"ISD::OR SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"ISD::OR SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5061, __extension__
__PRETTY_FUNCTION__));

5062

uint64_t Imm64;

5063

unsigned MB, ME;

5064

SDValue N0 = N->getOperand(0);

5065

5066

// We won't get fewer instructions if the imm is 32-bit integer.

5067

// rldimi requires the imm to have consecutive ones with both sides zero.

5068

// Also, make sure the first Op has only one use, otherwise this may increase

5069

// register pressure since rldimi is destructive.

5070

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||

5071

isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())

5072

return false;

5073

5074

unsigned SH = 63 - ME;

5075

SDLoc Dl(N);

5076

// Use select64Imm for making LI instr instead of directly putting Imm64

5077

SDValue Ops[] = {

5078

N->getOperand(0),

5079

SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),

5080

getI32Imm(SH, Dl), getI32Imm(MB, Dl)};

5081

CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);

5082

return true;

5083

}

5084

5085

// Select - Convert the specified operand from a target-independent to a

5086

// target-specific node if it hasn't already been changed.

5087

void PPCDAGToDAGISel::Select(SDNode *N) {

5088

SDLoc dl(N);

5089

if (N->isMachineOpcode()) {

5090

N->setNodeId(-1);

5091

return; // Already selected.

5092

}

5093

5094

// In case any misguided DAG-level optimizations form an ADD with a

5095

// TargetConstant operand, crash here instead of miscompiling (by selecting

5096

// an r+r add instead of some kind of r+i add).

5097

if (N->getOpcode() == ISD::ADD &&

5098

N->getOperand(1).getOpcode() == ISD::TargetConstant)

5099

llvm_unreachable("Invalid ADD with TargetConstant operand")::llvm::llvm_unreachable_internal("Invalid ADD with TargetConstant operand"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5099);

5100

5101

// Try matching complex bit permutations before doing anything else.

5102

if (tryBitPermutation(N))

5103

return;

5104

5105

// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).

5106

if (tryIntCompareInGPR(N))

5107

return;

5108

5109

switch (N->getOpcode()) {

5110

default: break;

5111

5112

case ISD::Constant:

5113

if (N->getValueType(0) == MVT::i64) {

5114

ReplaceNode(N, selectI64Imm(CurDAG, N));

5115

return;

5116

}

5117

break;

5118

5119

case ISD::INTRINSIC_VOID: {

5120

auto IntrinsicID = N->getConstantOperandVal(1);

5121

if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&

5122

IntrinsicID != Intrinsic::ppc_trapd &&

5123

IntrinsicID != Intrinsic::ppc_trap)

5124

break;

5125

unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||

5126

IntrinsicID == Intrinsic::ppc_trapd)

5127

? PPC::TDI

5128

: PPC::TWI;

5129

SmallVector<SDValue, 4> OpsWithMD;

5130

unsigned MDIndex;

5131

if (IntrinsicID == Intrinsic::ppc_tdw ||

5132

IntrinsicID == Intrinsic::ppc_tw) {

5133

SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};

5134

int16_t SImmOperand2;

5135

int16_t SImmOperand3;

5136

int16_t SImmOperand4;

5137

bool isOperand2IntS16Immediate =

5138

isIntS16Immediate(N->getOperand(2), SImmOperand2);

5139

bool isOperand3IntS16Immediate =

5140

isIntS16Immediate(N->getOperand(3), SImmOperand3);

5141

// We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +

5142

// reg or imm + imm. The imm + imm form will be optimized to either an

5143

// unconditional trap or a nop in a later pass.

5144

if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)

5145

Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;

5146

else if (isOperand3IntS16Immediate)

5147

// The 2nd and 3rd operands are reg + imm.

5148

Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);

5149

else {

5150

// The 2nd and 3rd operands are imm + reg.

5151

bool isOperand4IntS16Immediate =

5152

isIntS16Immediate(N->getOperand(4), SImmOperand4);

5153

(void)isOperand4IntS16Immediate;

5154

assert(isOperand4IntS16Immediate &&(static_cast <bool> (isOperand4IntS16Immediate &&
"The 4th operand is not an Immediate") ? void (0) : __assert_fail
("isOperand4IntS16Immediate && \"The 4th operand is not an Immediate\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5155, __extension__
__PRETTY_FUNCTION__))

5155

"The 4th operand is not an Immediate")(static_cast <bool> (isOperand4IntS16Immediate &&
"The 4th operand is not an Immediate") ? void (0) : __assert_fail
("isOperand4IntS16Immediate && \"The 4th operand is not an Immediate\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5155, __extension__
__PRETTY_FUNCTION__));

5156

// We need to flip the condition immediate TO.

5157

int16_t TO = int(SImmOperand4) & 0x1F;

5158

// We swap the first and second bit of TO if they are not same.

5159

if ((TO & 0x1) != ((TO & 0x2) >> 1))

5160

TO = (TO & 0x1) ? TO + 1 : TO - 1;

5161

// We swap the fourth and fifth bit of TO if they are not same.

5162

if ((TO & 0x8) != ((TO & 0x10) >> 1))

5163

TO = (TO & 0x8) ? TO + 8 : TO - 8;

5164

Ops[0] = getI32Imm(TO, dl);

5165

Ops[1] = N->getOperand(3);

5166

Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);

5167

}

5168

OpsWithMD = {Ops[0], Ops[1], Ops[2]};

5169

MDIndex = 5;

5170

} else {

5171

OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};

5172

MDIndex = 3;

5173

}

5174

5175

if (N->getNumOperands() > MDIndex) {

5176

SDValue MDV = N->getOperand(MDIndex);

5177

const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();

5178

assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!")(static_cast <bool> (MD->getNumOperands() != 0 &&
"Empty MDNode in operands!") ? void (0) : __assert_fail ("MD->getNumOperands() != 0 && \"Empty MDNode in operands!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5178, __extension__
__PRETTY_FUNCTION__));

5179

assert((isa<MDString>(MD->getOperand(0)) && cast<MDString>((static_cast <bool> ((isa<MDString>(MD->getOperand
(0)) && cast<MDString>( MD->getOperand(0))->
getString().equals("ppc-trap-reason")) && "Unsupported annotation data type!"
) ? void (0) : __assert_fail ("(isa<MDString>(MD->getOperand(0)) && cast<MDString>( MD->getOperand(0))->getString().equals(\"ppc-trap-reason\")) && \"Unsupported annotation data type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5181, __extension__
__PRETTY_FUNCTION__))

5180

MD->getOperand(0))->getString().equals("ppc-trap-reason"))(static_cast <bool> ((isa<MDString>(MD->getOperand
(0)) && cast<MDString>( MD->getOperand(0))->
getString().equals("ppc-trap-reason")) && "Unsupported annotation data type!"
) ? void (0) : __assert_fail ("(isa<MDString>(MD->getOperand(0)) && cast<MDString>( MD->getOperand(0))->getString().equals(\"ppc-trap-reason\")) && \"Unsupported annotation data type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5181, __extension__
__PRETTY_FUNCTION__))

5181

&& "Unsupported annotation data type!")(static_cast <bool> ((isa<MDString>(MD->getOperand
(0)) && cast<MDString>( MD->getOperand(0))->
getString().equals("ppc-trap-reason")) && "Unsupported annotation data type!"
) ? void (0) : __assert_fail ("(isa<MDString>(MD->getOperand(0)) && cast<MDString>( MD->getOperand(0))->getString().equals(\"ppc-trap-reason\")) && \"Unsupported annotation data type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5181, __extension__
__PRETTY_FUNCTION__));

5182

for (unsigned i = 1; i < MD->getNumOperands(); i++) {

5183

assert(isa<MDString>(MD->getOperand(i)) &&(static_cast <bool> (isa<MDString>(MD->getOperand
(i)) && "Invalid data type for annotation ppc-trap-reason!"
) ? void (0) : __assert_fail ("isa<MDString>(MD->getOperand(i)) && \"Invalid data type for annotation ppc-trap-reason!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5184, __extension__
__PRETTY_FUNCTION__))

5184

"Invalid data type for annotation ppc-trap-reason!")(static_cast <bool> (isa<MDString>(MD->getOperand
(i)) && "Invalid data type for annotation ppc-trap-reason!"
) ? void (0) : __assert_fail ("isa<MDString>(MD->getOperand(i)) && \"Invalid data type for annotation ppc-trap-reason!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5184, __extension__
__PRETTY_FUNCTION__));

5185

OpsWithMD.push_back(

5186

getI32Imm(std::stoi(cast<MDString>(

5187

MD->getOperand(i))->getString().str()), dl));

5188

}

5189

}

5190

OpsWithMD.push_back(N->getOperand(0)); // chain

5191

CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);

5192

return;

5193

}

5194

5195

case ISD::INTRINSIC_WO_CHAIN: {

5196

// We emit the PPC::FSELS instruction here because of type conflicts with

5197

// the comparison operand. The FSELS instruction is defined to use an 8-byte

5198

// comparison like the FSELD version. The fsels intrinsic takes a 4-byte

5199

// value for the comparison. When selecting through a .td file, a type

5200

// error is raised. Must check this first so we never break on the

5201

// !Subtarget->isISA3_1() check.

5202

auto IntID = N->getConstantOperandVal(0);

5203

if (IntID == Intrinsic::ppc_fsels) {

5204

SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};

5205

CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);

5206

return;

5207

}

5208

5209

if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {

5210

auto Pred = N->getConstantOperandVal(1);

5211

unsigned Opcode =

5212

IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;

5213

unsigned SubReg = 0;

5214

unsigned ShiftVal = 0;

5215

bool Reverse = false;

5216

switch (Pred) {

5217

case 0:

5218

SubReg = PPC::sub_eq;

5219

ShiftVal = 1;

5220

break;

5221

case 1:

5222

SubReg = PPC::sub_eq;

5223

ShiftVal = 1;

5224

Reverse = true;

5225

break;

5226

case 2:

5227

SubReg = PPC::sub_lt;

5228

ShiftVal = 3;

5229

break;

5230

case 3:

5231

SubReg = PPC::sub_lt;

5232

ShiftVal = 3;

5233

Reverse = true;

5234

break;

5235

case 4:

5236

SubReg = PPC::sub_gt;

5237

ShiftVal = 2;

5238

break;

5239

case 5:

5240

SubReg = PPC::sub_gt;

5241

ShiftVal = 2;

5242

Reverse = true;

5243

break;

5244

case 6:

5245

SubReg = PPC::sub_un;

5246

break;

5247

case 7:

5248

SubReg = PPC::sub_un;

5249

Reverse = true;

5250

break;

5251

}

5252

5253

EVT VTs[] = {MVT::v16i8, MVT::Glue};

5254

SDValue Ops[] = {N->getOperand(2), N->getOperand(3),

5255

CurDAG->getTargetConstant(0, dl, MVT::i32)};

5256

SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);

5257

SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);

5258

// On Power10, we can use SETBC[R]. On prior architectures, we have to use

5259

// MFOCRF and shift/negate the value.

5260

if (Subtarget->isISA3_1()) {

5261

SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);

5262

SDValue CRBit = SDValue(

5263

CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,

5264

CR6Reg, SubRegIdx, BCDOp.getValue(1)),

5265

0);

5266

CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,

5267

CRBit);

5268

} else {

5269

SDValue Move =

5270

SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,

5271

BCDOp.getValue(1)),

5272

0);

5273

SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),

5274

getI32Imm(31, dl), getI32Imm(31, dl)};

5275

if (!Reverse)

5276

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5277

else {

5278

SDValue Shift = SDValue(

5279

CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

5280

CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));

5281

}

5282

}

5283

return;

5284

}

5285

5286

if (!Subtarget->isISA3_1())

5287

break;

5288

unsigned Opcode = 0;

5289

switch (IntID) {

5290

default:

5291

break;

5292

case Intrinsic::ppc_altivec_vstribr_p:

5293

Opcode = PPC::VSTRIBR_rec;

5294

break;

5295

case Intrinsic::ppc_altivec_vstribl_p:

5296

Opcode = PPC::VSTRIBL_rec;

5297

break;

5298

case Intrinsic::ppc_altivec_vstrihr_p:

5299

Opcode = PPC::VSTRIHR_rec;

5300

break;

5301

case Intrinsic::ppc_altivec_vstrihl_p:

5302

Opcode = PPC::VSTRIHL_rec;

5303

break;

5304

}

5305

if (!Opcode)

5306

break;

5307

5308

// Generate the appropriate vector string isolate intrinsic to match.

5309

EVT VTs[] = {MVT::v16i8, MVT::Glue};

5310

SDValue VecStrOp =

5311

SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);

5312

// Vector string isolate instructions update the EQ bit of CR6.

5313

// Generate a SETBC instruction to extract the bit and place it in a GPR.

5314

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);

5315

SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);

5316

SDValue CRBit = SDValue(

5317

CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,

5318

CR6Reg, SubRegIdx, VecStrOp.getValue(1)),

5319

0);

5320

CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);

5321

return;

5322

}

5323

5324

case ISD::SETCC:

5325

case ISD::STRICT_FSETCC:

5326

case ISD::STRICT_FSETCCS:

5327

if (trySETCC(N))

5328

return;

5329

break;

5330

// These nodes will be transformed into GETtlsADDR32 node, which

5331

// later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT

5332

case PPCISD::ADDI_TLSLD_L_ADDR:

5333

case PPCISD::ADDI_TLSGD_L_ADDR: {

5334

const Module *Mod = MF->getFunction().getParent();

5335

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||

5336

!Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||

5337

Mod->getPICLevel() == PICLevel::SmallPIC)

5338

break;

5339

// Attach global base pointer on GETtlsADDR32 node in order to

5340

// generate secure plt code for TLS symbols.

5341

getGlobalBaseReg();

5342

} break;

5343

case PPCISD::CALL: {

5344

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||

5345

!TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||

5346

!Subtarget->isTargetELF())

5347

break;

5348

5349

SDValue Op = N->getOperand(1);

5350

5351

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {

5352

if (GA->getTargetFlags() == PPCII::MO_PLT)

5353

getGlobalBaseReg();

5354

}

5355

else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {

5356

if (ES->getTargetFlags() == PPCII::MO_PLT)

5357

getGlobalBaseReg();

5358

}

5359

}

5360

break;

5361

5362

case PPCISD::GlobalBaseReg:

5363

ReplaceNode(N, getGlobalBaseReg());

5364

return;

5365

5366

case ISD::FrameIndex:

5367

selectFrameIndex(N, N);

5368

return;

5369

5370

case PPCISD::MFOCRF: {

5371

SDValue InGlue = N->getOperand(1);

5372

ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,

5373

N->getOperand(0), InGlue));

5374

return;

5375

}

5376

5377

case PPCISD::READ_TIME_BASE:

5378

ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,

5379

MVT::Other, N->getOperand(0)));

5380

return;

5381

5382

case PPCISD::SRA_ADDZE: {

5383

SDValue N0 = N->getOperand(0);

5384

SDValue ShiftAmt =

5385

CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->

5386

getConstantIntValue(), dl,

5387

N->getValueType(0));

5388

if (N->getValueType(0) == MVT::i64) {

5389

SDNode *Op =

5390

CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,

5391

N0, ShiftAmt);

5392

CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),

5393

SDValue(Op, 1));

5394

return;

5395

} else {

5396

assert(N->getValueType(0) == MVT::i32 &&(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5397, __extension__
__PRETTY_FUNCTION__))

5397

"Expecting i64 or i32 in PPCISD::SRA_ADDZE")(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5397, __extension__
__PRETTY_FUNCTION__));

5398

SDNode *Op =

5399

CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,

5400

N0, ShiftAmt);

5401

CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),

5402

SDValue(Op, 1));

5403

return;

5404

}

5405

}

5406

5407

case ISD::STORE: {

5408

// Change TLS initial-exec D-form stores to X-form stores.

5409

StoreSDNode *ST = cast<StoreSDNode>(N);

5410

if (EnableTLSOpt && Subtarget->isELFv2ABI() &&

5411

ST->getAddressingMode() != ISD::PRE_INC)

5412

if (tryTLSXFormStore(ST))

5413

return;

5414

break;

5415

}

5416

case ISD::LOAD: {

5417

// Handle preincrement loads.

5418

LoadSDNode *LD = cast<LoadSDNode>(N);

5419

EVT LoadedVT = LD->getMemoryVT();

5420

5421

// Normal loads are handled by code generated from the .td file.

5422

if (LD->getAddressingMode() != ISD::PRE_INC) {

5423

// Change TLS initial-exec D-form loads to X-form loads.

5424

if (EnableTLSOpt && Subtarget->isELFv2ABI())

5425

if (tryTLSXFormLoad(LD))

5426

return;

5427

break;

5428

}

5429

5430

SDValue Offset = LD->getOffset();

5431

if (Offset.getOpcode() == ISD::TargetConstant ||

5432

Offset.getOpcode() == ISD::TargetGlobalAddress) {

5433

5434

unsigned Opcode;

5435

bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;

5436

if (LD->getValueType(0) != MVT::i64) {

5437

// Handle PPC32 integer and normal FP loads.

5438

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5438, __extension__
__PRETTY_FUNCTION__));

5439

switch (LoadedVT.getSimpleVT().SimpleTy) {

5440

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5440);

5441

case MVT::f64: Opcode = PPC::LFDU; break;

5442

case MVT::f32: Opcode = PPC::LFSU; break;

5443

case MVT::i32: Opcode = PPC::LWZU; break;

5444

case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;

5445

case MVT::i1:

5446

case MVT::i8: Opcode = PPC::LBZU; break;

5447

}

5448

} else {

5449

assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5449, __extension__
__PRETTY_FUNCTION__));

5450

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5450, __extension__
__PRETTY_FUNCTION__));

5451

switch (LoadedVT.getSimpleVT().SimpleTy) {

5452

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5452);

5453

case MVT::i64: Opcode = PPC::LDU; break;

5454

case MVT::i32: Opcode = PPC::LWZU8; break;

5455

case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;

5456

case MVT::i1:

5457

case MVT::i8: Opcode = PPC::LBZU8; break;

5458

}

5459

}

5460

5461

SDValue Chain = LD->getChain();

5462

SDValue Base = LD->getBasePtr();

5463

SDValue Ops[] = { Offset, Base, Chain };

5464

SDNode *MN = CurDAG->getMachineNode(

5465

Opcode, dl, LD->getValueType(0),

5466

PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);

5467

transferMemOperands(N, MN);

5468

ReplaceNode(N, MN);

5469

return;

5470

} else {

5471

unsigned Opcode;

5472

bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;

5473

if (LD->getValueType(0) != MVT::i64) {

5474

// Handle PPC32 integer and normal FP loads.

5475

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5475, __extension__
__PRETTY_FUNCTION__));

5476

switch (LoadedVT.getSimpleVT().SimpleTy) {

5477

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5477);

5478

case MVT::f64: Opcode = PPC::LFDUX; break;

5479

case MVT::f32: Opcode = PPC::LFSUX; break;

5480

case MVT::i32: Opcode = PPC::LWZUX; break;

5481

case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;

5482

case MVT::i1:

5483

case MVT::i8: Opcode = PPC::LBZUX; break;

5484

}

5485

} else {

5486

assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5486, __extension__
__PRETTY_FUNCTION__));

5487

assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5488, __extension__
__PRETTY_FUNCTION__))

5488

"Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5488, __extension__
__PRETTY_FUNCTION__));

5489

switch (LoadedVT.getSimpleVT().SimpleTy) {

5490

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5490);

5491

case MVT::i64: Opcode = PPC::LDUX; break;

5492

case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;

5493

case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;

5494

case MVT::i1:

5495

case MVT::i8: Opcode = PPC::LBZUX8; break;

5496

}

5497

}

5498

5499

SDValue Chain = LD->getChain();

5500

SDValue Base = LD->getBasePtr();

5501

SDValue Ops[] = { Base, Offset, Chain };

5502

SDNode *MN = CurDAG->getMachineNode(

5503

Opcode, dl, LD->getValueType(0),

5504

PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);

5505

transferMemOperands(N, MN);

5506

ReplaceNode(N, MN);

5507

return;

5508

}

5509

}

5510

5511

case ISD::AND:

5512

// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr

5513

if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||

5514

tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))

5515

return;

5516

5517

// Other cases are autogenerated.

5518

break;

5519

case ISD::OR: {

5520

if (N->getValueType(0) == MVT::i32)

5521

if (tryBitfieldInsert(N))

5522

return;

5523

5524

int16_t Imm;

5525

if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

5526

isIntS16Immediate(N->getOperand(1), Imm)) {

5527

KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));

5528

5529

// If this is equivalent to an add, then we can fold it with the

5530

// FrameIndex calculation.

5531

if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {

5532

selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);

5533

return;

5534

}

5535

}

5536

5537

// If this is 'or' against an imm with consecutive ones and both sides zero,

5538

// try to emit rldimi

5539

if (tryAsSingleRLDIMI(N))

5540

return;

5541

5542

// OR with a 32-bit immediate can be handled by ori + oris

5543

// without creating an immediate in a GPR.

5544

uint64_t Imm64 = 0;

5545

bool IsPPC64 = Subtarget->isPPC64();

5546

if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&

5547

(Imm64 & ~0xFFFFFFFFuLL) == 0) {

5548

// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.

5549

uint64_t ImmHi = Imm64 >> 16;

5550

uint64_t ImmLo = Imm64 & 0xFFFF;

5551

if (ImmHi != 0 && ImmLo != 0) {

5552

SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,

5553

N->getOperand(0),

5554

getI16Imm(ImmLo, dl));

5555

SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};

5556

CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);

5557

return;

5558

}

5559

}

5560

5561

// Other cases are autogenerated.

5562

break;

5563

}

5564

case ISD::XOR: {

5565

// XOR with a 32-bit immediate can be handled by xori + xoris

5566

// without creating an immediate in a GPR.

5567

uint64_t Imm64 = 0;

5568

bool IsPPC64 = Subtarget->isPPC64();

5569

if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&

5570

(Imm64 & ~0xFFFFFFFFuLL) == 0) {

5571

// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.

5572

uint64_t ImmHi = Imm64 >> 16;

5573

uint64_t ImmLo = Imm64 & 0xFFFF;

5574

if (ImmHi != 0 && ImmLo != 0) {

5575

SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

5576

N->getOperand(0),

5577

getI16Imm(ImmLo, dl));

5578

SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};

5579

CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);

5580

return;

5581

}

5582

}

5583

5584

break;

5585

}

5586

case ISD::ADD: {

5587

int16_t Imm;

5588

if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

5589

isIntS16Immediate(N->getOperand(1), Imm)) {

5590

selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);

5591

return;

5592

}

5593

5594

break;

5595

}

5596

case ISD::SHL: {

5597

unsigned Imm, SH, MB, ME;

5598

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&

5599

isRotateAndMask(N, Imm, true, SH, MB, ME)) {

5600

SDValue Ops[] = { N->getOperand(0).getOperand(0),

5601

getI32Imm(SH, dl), getI32Imm(MB, dl),

5602

getI32Imm(ME, dl) };

5603

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5604

return;

5605

}

5606

5607

// Other cases are autogenerated.

5608

break;

5609

}

5610

case ISD::SRL: {

5611

unsigned Imm, SH, MB, ME;

5612

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&

5613

isRotateAndMask(N, Imm, true, SH, MB, ME)) {

5614

SDValue Ops[] = { N->getOperand(0).getOperand(0),

5615

getI32Imm(SH, dl), getI32Imm(MB, dl),

5616

getI32Imm(ME, dl) };

5617

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5618

return;

5619

}

5620

5621

// Other cases are autogenerated.

5622

break;

5623

}

5624

case ISD::MUL: {

5625

SDValue Op1 = N->getOperand(1);

5626

if (Op1.getOpcode() != ISD::Constant ||

5627

(Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))

5628

break;

5629

5630

// If the multiplier fits int16, we can handle it with mulli.

5631

int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();

5632

unsigned Shift = llvm::countr_zero<uint64_t>(Imm);

5633

if (isInt<16>(Imm) || !Shift)

5634

break;

5635

5636

// If the shifted value fits int16, we can do this transformation:

5637

// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to

5638

// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).

5639

uint64_t ImmSh = Imm >> Shift;

5640

if (!isInt<16>(ImmSh))

5641

break;

5642

5643

uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);

5644

if (Op1.getValueType() == MVT::i64) {

5645

SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);

5646

SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,

5647

N->getOperand(0), SDImm);

5648

5649

SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),

5650

getI32Imm(63 - Shift, dl)};

5651

CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);

5652

return;

5653

} else {

5654

SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);

5655

SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,

5656

N->getOperand(0), SDImm);

5657

5658

SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),

5659

getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};

5660

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5661

return;

5662

}

5663

break;

5664

}

5665

// FIXME: Remove this once the ANDI glue bug is fixed:

5666

case PPCISD::ANDI_rec_1_EQ_BIT:

5667

case PPCISD::ANDI_rec_1_GT_BIT: {

5668

if (!ANDIGlueBug)

5669

break;

5670

5671

EVT InVT = N->getOperand(0).getValueType();

5672

assert((InVT == MVT::i64 || InVT == MVT::i32) &&(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5673, __extension__
__PRETTY_FUNCTION__))

5673

"Invalid input type for ANDI_rec_1_EQ_BIT")(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5673, __extension__
__PRETTY_FUNCTION__));

5674

5675

unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;

5676

SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,

5677

N->getOperand(0),

5678

CurDAG->getTargetConstant(1, dl, InVT)),

5679

0);

5680

SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);

5681

SDValue SRIdxVal = CurDAG->getTargetConstant(

5682

N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,

5683

dl, MVT::i32);

5684

5685

CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,

5686

SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);

5687

return;

5688

}

5689

case ISD::SELECT_CC: {

5690

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();

5691

EVT PtrVT =

5692

CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());

5693

bool isPPC64 = (PtrVT == MVT::i64);

5694

5695

// If this is a select of i1 operands, we'll pattern match it.

5696

if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)

5697

break;

5698

5699

if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {

5700

bool NeedSwapOps = false;

5701

bool IsUnCmp = false;

5702

if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {

5703

SDValue LHS = N->getOperand(0);

5704

SDValue RHS = N->getOperand(1);

5705

if (NeedSwapOps)

5706

std::swap(LHS, RHS);

5707

5708

// Make use of SelectCC to generate the comparison to set CR bits, for

5709

// equality comparisons having one literal operand, SelectCC probably

5710

// doesn't need to materialize the whole literal and just use xoris to

5711

// check it first, it leads the following comparison result can't

5712

// exactly represent GT/LT relationship. So to avoid this we specify

5713

// SETGT/SETUGT here instead of SETEQ.

5714

SDValue GenCC =

5715

SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);

5716

CurDAG->SelectNodeTo(

5717

N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,

5718

N->getValueType(0), GenCC);

5719

NumP9Setb++;

5720

return;

5721

}

5722

}

5723

5724

// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc

5725

if (!isPPC64 && isNullConstant(N->getOperand(1)) &&

5726

isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&

5727

CC == ISD::SETNE &&

5728

// FIXME: Implement this optzn for PPC64.

5729

N->getValueType(0) == MVT::i32) {

5730

SDNode *Tmp =

5731

CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

5732

N->getOperand(0), getI32Imm(~0U, dl));

5733

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),

5734

N->getOperand(0), SDValue(Tmp, 1));

5735

return;

5736

}

5737

5738

SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);

5739

5740

if (N->getValueType(0) == MVT::i1) {

5741

// An i1 select is: (c & t) | (!c & f).

5742

bool Inv;

5743

unsigned Idx = getCRIdxForSetCC(CC, Inv);

5744

5745

unsigned SRI;

5746

switch (Idx) {

5747

default: llvm_unreachable("Invalid CC index")::llvm::llvm_unreachable_internal("Invalid CC index", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5747);

5748

case 0: SRI = PPC::sub_lt; break;

5749

case 1: SRI = PPC::sub_gt; break;

5750

case 2: SRI = PPC::sub_eq; break;

5751

case 3: SRI = PPC::sub_un; break;

5752

}

5753

5754

SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);

5755

5756

SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,

5757

CCBit, CCBit), 0);

5758

SDValue C = Inv ? NotCCBit : CCBit,

5759

NotC = Inv ? CCBit : NotCCBit;

5760

5761

SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,

5762

C, N->getOperand(2)), 0);

5763

SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,

5764

NotC, N->getOperand(3)), 0);

5765

5766

CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);

5767

return;

5768

}

5769

5770

unsigned BROpc =

5771

getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);

5772

5773

unsigned SelectCCOp;

5774

if (N->getValueType(0) == MVT::i32)

5775

SelectCCOp = PPC::SELECT_CC_I4;

5776

else if (N->getValueType(0) == MVT::i64)

5777

SelectCCOp = PPC::SELECT_CC_I8;

5778

else if (N->getValueType(0) == MVT::f32) {

5779

if (Subtarget->hasP8Vector())

5780

SelectCCOp = PPC::SELECT_CC_VSSRC;

5781

else if (Subtarget->hasSPE())

5782

SelectCCOp = PPC::SELECT_CC_SPE4;

5783

else

5784

SelectCCOp = PPC::SELECT_CC_F4;

5785

} else if (N->getValueType(0) == MVT::f64) {

5786

if (Subtarget->hasVSX())

5787

SelectCCOp = PPC::SELECT_CC_VSFRC;

5788

else if (Subtarget->hasSPE())

5789

SelectCCOp = PPC::SELECT_CC_SPE;

5790

else

5791

SelectCCOp = PPC::SELECT_CC_F8;

5792

} else if (N->getValueType(0) == MVT::f128)

5793

SelectCCOp = PPC::SELECT_CC_F16;

5794

else if (Subtarget->hasSPE())

5795

SelectCCOp = PPC::SELECT_CC_SPE;

5796

else if (N->getValueType(0) == MVT::v2f64 ||

5797

N->getValueType(0) == MVT::v2i64)

5798

SelectCCOp = PPC::SELECT_CC_VSRC;

5799

else

5800

SelectCCOp = PPC::SELECT_CC_VRRC;

5801

5802

SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),

5803

getI32Imm(BROpc, dl) };

5804

CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);

5805

return;

5806

}

5807

case ISD::VECTOR_SHUFFLE:

5808

if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||

5809

N->getValueType(0) == MVT::v2i64)) {

5810

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

5811

5812

SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),

5813

Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);

5814

unsigned DM[2];

5815

5816

for (int i = 0; i < 2; ++i)

5817

if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)

5818

DM[i] = 0;

5819

else

5820

DM[i] = 1;

5821

5822

if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&

5823

Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&

5824

isa<LoadSDNode>(Op1.getOperand(0))) {

5825

LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));

5826

SDValue Base, Offset;

5827

5828

if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&

5829

(LD->getMemoryVT() == MVT::f64 ||

5830

LD->getMemoryVT() == MVT::i64) &&

5831

SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {

5832

SDValue Chain = LD->getChain();

5833

SDValue Ops[] = { Base, Offset, Chain };

5834

MachineMemOperand *MemOp = LD->getMemOperand();

5835

SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,

5836

N->getValueType(0), Ops);

5837

CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});

5838

return;

5839

}

5840

}

5841

5842

// For little endian, we must swap the input operands and adjust

5843

// the mask elements (reverse and invert them).

5844

if (Subtarget->isLittleEndian()) {

5845

std::swap(Op1, Op2);

5846

unsigned tmp = DM[0];

5847

DM[0] = 1 - DM[1];

5848

DM[1] = 1 - tmp;

5849

}

5850

5851

SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,

5852

MVT::i32);

5853

SDValue Ops[] = { Op1, Op2, DMV };

5854

CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);

5855

return;

5856

}

5857

5858

break;

5859

case PPCISD::BDNZ:

5860

case PPCISD::BDZ: {

5861

bool IsPPC64 = Subtarget->isPPC64();

5862

SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };

5863

CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ

5864

? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)

5865

: (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),

5866

MVT::Other, Ops);

5867

return;

5868

}

5869

case PPCISD::COND_BRANCH: {

5870

// Op #0 is the Chain.

5871

// Op #1 is the PPC::PRED_* number.

5872

// Op #2 is the CR#

5873

// Op #3 is the Dest MBB

5874

// Op #4 is the Flag.

5875

// Prevent PPC::PRED_* from being selected into LI.

5876

unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();

5877

if (EnableBranchHint)

5878

PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));

5879

5880

SDValue Pred = getI32Imm(PCC, dl);

5881

SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),

5882

N->getOperand(0), N->getOperand(4) };

5883

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

5884

return;

5885

}

5886

case ISD::BR_CC: {

5887

if (tryFoldSWTestBRCC(N))

5888

return;

5889

if (trySelectLoopCountIntrinsic(N))

5890

return;

5891

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

5892

unsigned PCC =

5893

getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);

5894

5895

if (N->getOperand(2).getValueType() == MVT::i1) {

5896

unsigned Opc;

5897

bool Swap;

5898

switch (PCC) {

5899

default: llvm_unreachable("Unexpected Boolean-operand predicate")::llvm::llvm_unreachable_internal("Unexpected Boolean-operand predicate"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5899);

5900

case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;

5901

case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;

5902

case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;

5903

case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;

5904

case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;

5905

case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;

5906

}

5907

5908

// A signed comparison of i1 values produces the opposite result to an

5909

// unsigned one if the condition code includes less-than or greater-than.

5910

// This is because 1 is the most negative signed i1 number and the most

5911

// positive unsigned i1 number. The CR-logical operations used for such

5912

// comparisons are non-commutative so for signed comparisons vs. unsigned

5913

// ones, the input operands just need to be swapped.

5914

if (ISD::isSignedIntSetCC(CC))

5915

Swap = !Swap;

5916

5917

SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,

5918

N->getOperand(Swap ? 3 : 2),

5919

N->getOperand(Swap ? 2 : 3)), 0);

5920

CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),

5921

N->getOperand(0));

5922

return;

5923

}

5924

5925

if (EnableBranchHint)

5926

PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));

5927

5928

SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);

5929

SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,

5930

N->getOperand(4), N->getOperand(0) };

5931

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

5932

return;

5933

}

5934

case ISD::BRIND: {

5935

// FIXME: Should custom lower this.

5936

SDValue Chain = N->getOperand(0);

5937

SDValue Target = N->getOperand(1);

5938

unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;

5939

unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;

5940

Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,

5941

Chain), 0);

5942

CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);

5943

return;

5944

}

5945

case PPCISD::TOC_ENTRY: {

5946

const bool isPPC64 = Subtarget->isPPC64();

5947

const bool isELFABI = Subtarget->isSVR4ABI();

5948

const bool isAIXABI = Subtarget->isAIXABI();

5949

5950

// PowerPC only support small, medium and large code model.

5951

const CodeModel::Model CModel = TM.getCodeModel();

5952

assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel
== CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models."
) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5953, __extension__
__PRETTY_FUNCTION__))

5953

"PowerPC doesn't support tiny or kernel code models.")(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel
== CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models."
) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5953, __extension__
__PRETTY_FUNCTION__));

5954

5955

if (isAIXABI && CModel == CodeModel::Medium)

5956

report_fatal_error("Medium code model is not supported on AIX.");

5957

5958

// For 64-bit ELF small code model, we allow SelectCodeCommon to handle

5959

// this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX

5960

// small code model, we need to check for a toc-data attribute.

5961

if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)

5962

break;

5963

5964

auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,

5965

EVT OperandTy) {

5966

SDValue GA = TocEntry->getOperand(0);

5967

SDValue TocBase = TocEntry->getOperand(1);

5968

SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);

5969

transferMemOperands(TocEntry, MN);

5970

ReplaceNode(TocEntry, MN);

5971

};

5972

5973

// Handle 32-bit small code model.

5974

if (!isPPC64 && CModel == CodeModel::Small) {

5975

// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either

5976

// PPC::ADDItoc, or PPC::LWZtoc

5977

if (isELFABI) {

5978

assert(TM.isPositionIndependent() &&(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5980, __extension__
__PRETTY_FUNCTION__))

5979

"32-bit ELF can only have TOC entries in position independent"(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5980, __extension__
__PRETTY_FUNCTION__))

5980

" code.")(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5980, __extension__
__PRETTY_FUNCTION__));

5981

// 32-bit ELF always uses a small code model toc access.

5982

replaceWith(PPC::LWZtoc, N, MVT::i32);

5983

return;

5984

}

5985

5986

assert(isAIXABI && "ELF ABI already handled")(static_cast <bool> (isAIXABI && "ELF ABI already handled"
) ? void (0) : __assert_fail ("isAIXABI && \"ELF ABI already handled\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5986, __extension__
__PRETTY_FUNCTION__));

5987

5988

if (hasTocDataAttr(N->getOperand(0),

5989

CurDAG->getDataLayout().getPointerSize())) {

5990

replaceWith(PPC::ADDItoc, N, MVT::i32);

5991

return;

5992

}

5993

5994

replaceWith(PPC::LWZtoc, N, MVT::i32);

5995

return;

5996

}

5997

5998

if (isPPC64 && CModel == CodeModel::Small) {

5999

assert(isAIXABI && "ELF ABI handled in common SelectCode")(static_cast <bool> (isAIXABI && "ELF ABI handled in common SelectCode"
) ? void (0) : __assert_fail ("isAIXABI && \"ELF ABI handled in common SelectCode\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5999, __extension__
__PRETTY_FUNCTION__));

6000

6001

if (hasTocDataAttr(N->getOperand(0),

6002

CurDAG->getDataLayout().getPointerSize())) {

6003

replaceWith(PPC::ADDItoc8, N, MVT::i64);

6004

return;

6005

}

6006

// Break if it doesn't have toc data attribute. Proceed with common

6007

// SelectCode.

6008

break;

6009

}

6010

6011

assert(CModel != CodeModel::Small && "All small code models handled.")(static_cast <bool> (CModel != CodeModel::Small &&
"All small code models handled.") ? void (0) : __assert_fail
("CModel != CodeModel::Small && \"All small code models handled.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6011, __extension__
__PRETTY_FUNCTION__));

6012

6013

assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"(static_cast <bool> ((isPPC64 || (isAIXABI && !
isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."
) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6014, __extension__
__PRETTY_FUNCTION__))

6014

" ELF/AIX or 32-bit AIX in the following.")(static_cast <bool> ((isPPC64 || (isAIXABI && !
isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."
) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6014, __extension__
__PRETTY_FUNCTION__));

6015

6016

// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode

6017

// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We

6018

// generate two instructions as described below. The first source operand

6019

// is a symbol reference. If it must be toc-referenced according to

6020

// Subtarget, we generate:

6021

// [32-bit AIX]

6022

// LWZtocL(@sym, ADDIStocHA(%r2, @sym))

6023

// [64-bit ELF/AIX]

6024

// LDtocL(@sym, ADDIStocHA8(%x2, @sym))

6025

// Otherwise we generate:

6026

// ADDItocL(ADDIStocHA8(%x2, @sym), @sym)

6027

SDValue GA = N->getOperand(0);

6028

SDValue TOCbase = N->getOperand(1);

6029

6030

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

6031

SDNode *Tmp = CurDAG->getMachineNode(

6032

isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);

6033

6034

if (PPCLowering->isAccessedAsGotIndirect(GA)) {

6035

// If it is accessed as got-indirect, we need an extra LWZ/LD to load

6036

// the address.

6037

SDNode *MN = CurDAG->getMachineNode(

6038

isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));

6039

6040

transferMemOperands(N, MN);

6041

ReplaceNode(N, MN);

6042

return;

6043

}

6044

6045

// Build the address relative to the TOC-pointer.

6046

ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,

6047

SDValue(Tmp, 0), GA));

6048

return;

6049

}

6050

case PPCISD::PPC32_PICGOT:

6051

// Generate a PIC-safe GOT reference.

6052

assert(Subtarget->is32BitELFABI() &&(static_cast <bool> (Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void
(0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6053, __extension__
__PRETTY_FUNCTION__))

6053

"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4")(static_cast <bool> (Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void
(0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6053, __extension__
__PRETTY_FUNCTION__));

6054

CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,

6055

PPCLowering->getPointerTy(CurDAG->getDataLayout()),

6056

MVT::i32);

6057

return;

6058

6059

case PPCISD::VADD_SPLAT: {

6060

// This expands into one of three sequences, depending on whether

6061

// the first operand is odd or even, positive or negative.

6062

assert(isa<ConstantSDNode>(N->getOperand(0)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6064, __extension__
__PRETTY_FUNCTION__))

6063

isa<ConstantSDNode>(N->getOperand(1)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6064, __extension__
__PRETTY_FUNCTION__))

6064

"Invalid operand on VADD_SPLAT!")(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6064, __extension__
__PRETTY_FUNCTION__));

6065

6066

int Elt = N->getConstantOperandVal(0);

6067

int EltSize = N->getConstantOperandVal(1);

6068

unsigned Opc1, Opc2, Opc3;

6069

EVT VT;

6070

6071

if (EltSize == 1) {

6072

Opc1 = PPC::VSPLTISB;

6073

Opc2 = PPC::VADDUBM;

6074

Opc3 = PPC::VSUBUBM;

6075

VT = MVT::v16i8;

6076

} else if (EltSize == 2) {

6077

Opc1 = PPC::VSPLTISH;

6078

Opc2 = PPC::VADDUHM;

6079

Opc3 = PPC::VSUBUHM;

6080

VT = MVT::v8i16;

6081

} else {

6082

assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!")(static_cast <bool> (EltSize == 4 && "Invalid element size on VADD_SPLAT!"
) ? void (0) : __assert_fail ("EltSize == 4 && \"Invalid element size on VADD_SPLAT!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6082, __extension__
__PRETTY_FUNCTION__));

6083

Opc1 = PPC::VSPLTISW;

6084

Opc2 = PPC::VADDUWM;

6085

Opc3 = PPC::VSUBUWM;

6086

VT = MVT::v4i32;

6087

}

6088

6089

if ((Elt & 1) == 0) {

6090

// Elt is even, in the range [-32,-18] + [16,30].

6091

//

6092

// Convert: VADD_SPLAT elt, size

6093

// Into: tmp = VSPLTIS[BHW] elt

6094

// VADDU[BHW]M tmp, tmp

6095

// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4

6096

SDValue EltVal = getI32Imm(Elt >> 1, dl);

6097

SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

6098

SDValue TmpVal = SDValue(Tmp, 0);

6099

ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));

6100

return;

6101

} else if (Elt > 0) {

6102

// Elt is odd and positive, in the range [17,31].

6103

//

6104

// Convert: VADD_SPLAT elt, size

6105

// Into: tmp1 = VSPLTIS[BHW] elt-16

6106

// tmp2 = VSPLTIS[BHW] -16

6107

// VSUBU[BHW]M tmp1, tmp2

6108

SDValue EltVal = getI32Imm(Elt - 16, dl);

6109

SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

6110

EltVal = getI32Imm(-16, dl);

6111

SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

6112

ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),

6113

SDValue(Tmp2, 0)));

6114

return;

6115

} else {

6116

// Elt is odd and negative, in the range [-31,-17].

6117

//

6118

// Convert: VADD_SPLAT elt, size

6119

// Into: tmp1 = VSPLTIS[BHW] elt+16

6120

// tmp2 = VSPLTIS[BHW] -16

6121

// VADDU[BHW]M tmp1, tmp2

6122

SDValue EltVal = getI32Imm(Elt + 16, dl);

6123

SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

6124

EltVal = getI32Imm(-16, dl);

6125

SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

6126

ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),

6127

SDValue(Tmp2, 0)));

6128

return;

6129

}

6130

}

6131

case PPCISD::LD_SPLAT: {

6132

// Here we want to handle splat load for type v16i8 and v8i16 when there is

6133

// no direct move, we don't need to use stack for this case. If target has

6134

// direct move, we should be able to get the best selection in the .td file.

6135

if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())

6136

break;

6137

6138

EVT Type = N->getValueType(0);

6139

if (Type != MVT::v16i8 && Type != MVT::v8i16)

6140

break;

6141

6142

// If the alignment for the load is 16 or bigger, we don't need the

6143

// permutated mask to get the required value. The value must be the 0

6144

// element in big endian target or 7/15 in little endian target in the

6145

// result vsx register of lvx instruction.

6146

// Select the instruction in the .td file.

6147

if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&

6148

isOffsetMultipleOf(N, 16))

6149

break;

6150

6151

SDValue ZeroReg =

6152

CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,

6153

Subtarget->isPPC64() ? MVT::i64 : MVT::i32);

6154

unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;

6155

// v16i8 LD_SPLAT addr

6156

// ======>

6157

// Mask = LVSR/LVSL 0, addr

6158

// LoadLow = LVX 0, addr

6159

// Perm = VPERM LoadLow, LoadLow, Mask

6160

// Splat = VSPLTB 15/0, Perm

6161

//

6162

// v8i16 LD_SPLAT addr

6163

// ======>

6164

// Mask = LVSR/LVSL 0, addr

6165

// LoadLow = LVX 0, addr

6166

// LoadHigh = LVX (LI, 1), addr

6167

// Perm = VPERM LoadLow, LoadHigh, Mask

6168

// Splat = VSPLTH 7/0, Perm

6169

unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;

6170

unsigned SplatElemIndex =

6171

Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;

6172

6173

SDNode *Mask = CurDAG->getMachineNode(

6174

Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,

6175

N->getOperand(1));

6176

6177

SDNode *LoadLow =

6178

CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,

6179

{ZeroReg, N->getOperand(1), N->getOperand(0)});

6180

6181

SDNode *LoadHigh = LoadLow;

6182

if (Type == MVT::v8i16) {

6183

LoadHigh = CurDAG->getMachineNode(

6184

PPC::LVX, dl, MVT::v16i8, MVT::Other,

6185

{SDValue(CurDAG->getMachineNode(

6186

LIOpcode, dl, MVT::i32,

6187

CurDAG->getTargetConstant(1, dl, MVT::i8)),

6188

0),

6189

N->getOperand(1), SDValue(LoadLow, 1)});

6190

}

6191

6192

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));

6193

transferMemOperands(N, LoadHigh);

6194

6195

SDNode *Perm =

6196

CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),

6197

SDValue(LoadHigh, 0), SDValue(Mask, 0));

6198

CurDAG->SelectNodeTo(N, SplatOp, Type,

6199

CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),

6200

SDValue(Perm, 0));

6201

return;

6202

}

6203

}

6204

6205

SelectCode(N);

6206

}

6207

6208

// If the target supports the cmpb instruction, do the idiom recognition here.

6209

// We don't do this as a DAG combine because we don't want to do it as nodes

6210

// are being combined (because we might miss part of the eventual idiom). We

6211

// don't want to do it during instruction selection because we want to reuse

6212

// the logic for lowering the masking operations already part of the

6213

// instruction selector.

6214

SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {

6215

SDLoc dl(N);

6216

6217

assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Only OR nodes are supported for CMPB") ? void (0) : __assert_fail
("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6218, __extension__
__PRETTY_FUNCTION__))

6218

"Only OR nodes are supported for CMPB")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Only OR nodes are supported for CMPB") ? void (0) : __assert_fail
("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6218, __extension__
__PRETTY_FUNCTION__));

6219

6220

SDValue Res;

6221

if (!Subtarget->hasCMPB())

6222

return Res;

6223

6224

if (N->getValueType(0) != MVT::i32 &&

6225

N->getValueType(0) != MVT::i64)

6226

return Res;

6227

6228

EVT VT = N->getValueType(0);

6229

6230

SDValue RHS, LHS;

6231

bool BytesFound[8] = {false, false, false, false, false, false, false, false};

6232

uint64_t Mask = 0, Alt = 0;

6233

6234

auto IsByteSelectCC = [this](SDValue O, unsigned &b,

6235

uint64_t &Mask, uint64_t &Alt,

6236

SDValue &LHS, SDValue &RHS) {

6237

if (O.getOpcode() != ISD::SELECT_CC)

6238

return false;

6239

ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();

6240

6241

if (!isa<ConstantSDNode>(O.getOperand(2)) ||

6242

!isa<ConstantSDNode>(O.getOperand(3)))

6243

return false;

6244

6245

uint64_t PM = O.getConstantOperandVal(2);

6246

uint64_t PAlt = O.getConstantOperandVal(3);

6247

for (b = 0; b < 8; ++b) {

6248

uint64_t Mask = UINT64_C(0xFF)0xFFUL << (8*b);

6249

if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)

6250

break;

6251

}

6252

6253

if (b == 8)

6254

return false;

6255

Mask |= PM;

6256

Alt |= PAlt;

6257

6258

if (!isa<ConstantSDNode>(O.getOperand(1)) ||

6259

O.getConstantOperandVal(1) != 0) {

6260

SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);

6261

if (Op0.getOpcode() == ISD::TRUNCATE)

6262

Op0 = Op0.getOperand(0);

6263

if (Op1.getOpcode() == ISD::TRUNCATE)

6264

Op1 = Op1.getOperand(0);

6265

6266

if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&

6267

Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&

6268

isa<ConstantSDNode>(Op0.getOperand(1))) {

6269

6270

unsigned Bits = Op0.getValueSizeInBits();

6271

if (b != Bits/8-1)

6272

return false;

6273

if (Op0.getConstantOperandVal(1) != Bits-8)

6274

return false;

6275

6276

LHS = Op0.getOperand(0);

6277

RHS = Op1.getOperand(0);

6278

return true;

6279

}

6280

6281

// When we have small integers (i16 to be specific), the form present

6282

// post-legalization uses SETULT in the SELECT_CC for the

6283

// higher-order byte, depending on the fact that the

6284

// even-higher-order bytes are known to all be zero, for example:

6285

// select_cc (xor $lhs, $rhs), 256, 65280, 0, setult

6286

// (so when the second byte is the same, because all higher-order

6287

// bits from bytes 3 and 4 are known to be zero, the result of the

6288

// xor can be at most 255)

6289

if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&

6290

isa<ConstantSDNode>(O.getOperand(1))) {

6291

6292

uint64_t ULim = O.getConstantOperandVal(1);

6293

if (ULim != (UINT64_C(1)1UL << b*8))

6294

return false;

6295

6296

// Now we need to make sure that the upper bytes are known to be

6297

// zero.

6298

unsigned Bits = Op0.getValueSizeInBits();

6299

if (!CurDAG->MaskedValueIsZero(

6300

Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))

6301

return false;

6302

6303

LHS = Op0.getOperand(0);

6304

RHS = Op0.getOperand(1);

6305

return true;

6306

}

6307

6308

return false;

6309

}

6310

6311

if (CC != ISD::SETEQ)

6312

return false;

6313

6314

SDValue Op = O.getOperand(0);

6315

if (Op.getOpcode() == ISD::AND) {

6316

if (!isa<ConstantSDNode>(Op.getOperand(1)))

6317

return false;

6318

if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF)0xFFUL << (8*b)))

6319

return false;

6320

6321

SDValue XOR = Op.getOperand(0);

6322

if (XOR.getOpcode() == ISD::TRUNCATE)

6323

XOR = XOR.getOperand(0);

6324

if (XOR.getOpcode() != ISD::XOR)

6325

return false;

6326

6327

LHS = XOR.getOperand(0);

6328

RHS = XOR.getOperand(1);

6329

return true;

6330

} else if (Op.getOpcode() == ISD::SRL) {

6331

if (!isa<ConstantSDNode>(Op.getOperand(1)))

6332

return false;

6333

unsigned Bits = Op.getValueSizeInBits();

6334

if (b != Bits/8-1)

6335

return false;

6336

if (Op.getConstantOperandVal(1) != Bits-8)

6337

return false;

6338

6339

SDValue XOR = Op.getOperand(0);

6340

if (XOR.getOpcode() == ISD::TRUNCATE)

6341

XOR = XOR.getOperand(0);

6342

if (XOR.getOpcode() != ISD::XOR)

6343

return false;

6344

6345

LHS = XOR.getOperand(0);

6346

RHS = XOR.getOperand(1);

6347

return true;

6348

}

6349

6350

return false;

6351

};

6352

6353

SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));

6354

while (!Queue.empty()) {

6355

SDValue V = Queue.pop_back_val();

6356

6357

for (const SDValue &O : V.getNode()->ops()) {

6358

unsigned b = 0;

6359

uint64_t M = 0, A = 0;

6360

SDValue OLHS, ORHS;

6361

if (O.getOpcode() == ISD::OR) {

6362

Queue.push_back(O);

6363

} else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {

6364

if (!LHS) {

6365

LHS = OLHS;

6366

RHS = ORHS;

6367

BytesFound[b] = true;

6368

Mask |= M;

6369

Alt |= A;

6370

} else if ((LHS == ORHS && RHS == OLHS) ||

6371

(RHS == ORHS && LHS == OLHS)) {

6372

BytesFound[b] = true;

6373

Mask |= M;

6374

Alt |= A;

6375

} else {

6376

return Res;

6377

}

6378

} else {

6379

return Res;

6380

}

6381

}

6382

}

6383

6384

unsigned LastB = 0, BCnt = 0;

6385

for (unsigned i = 0; i < 8; ++i)

6386

if (BytesFound[LastB]) {

6387

++BCnt;

6388

LastB = i;

6389

}

6390

6391

if (!LastB || BCnt < 2)

6392

return Res;

6393

6394

// Because we'll be zero-extending the output anyway if don't have a specific

6395

// value for each input byte (via the Mask), we can 'anyext' the inputs.

6396

if (LHS.getValueType() != VT) {

6397

LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);

6398

RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);

6399

}

6400

6401

Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);

6402

6403

bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1)-1L;

6404

if (NonTrivialMask && !Alt) {

6405

// Res = Mask & CMPB

6406

Res = CurDAG->getNode(ISD::AND, dl, VT, Res,

6407

CurDAG->getConstant(Mask, dl, VT));

6408

} else if (Alt) {

6409

// Res = (CMPB & Mask) | (~CMPB & Alt)

6410

// Which, as suggested here:

6411

// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge

6412

// can be written as:

6413

// Res = Alt ^ ((Alt ^ Mask) & CMPB)

6414

// useful because the (Alt ^ Mask) can be pre-computed.

6415

Res = CurDAG->getNode(ISD::AND, dl, VT, Res,

6416

CurDAG->getConstant(Mask ^ Alt, dl, VT));

6417

Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,

6418

CurDAG->getConstant(Alt, dl, VT));

6419

}

6420

6421

return Res;

6422

}

6423

6424

// When CR bit registers are enabled, an extension of an i1 variable to a i32

6425

// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus

6426

// involves constant materialization of a 0 or a 1 or both. If the result of

6427

// the extension is then operated upon by some operator that can be constant

6428

// folded with a constant 0 or 1, and that constant can be materialized using

6429

// only one instruction (like a zero or one), then we should fold in those

6430

// operations with the select.

6431

void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {

6432

if (!Subtarget->useCRBits())

6433

return;

6434

6435

if (N->getOpcode() != ISD::ZERO_EXTEND &&

6436

N->getOpcode() != ISD::SIGN_EXTEND &&

6437

N->getOpcode() != ISD::ANY_EXTEND)

6438

return;

6439

6440

if (N->getOperand(0).getValueType() != MVT::i1)

6441

return;

6442

6443

if (!N->hasOneUse())

6444

return;

6445

6446

SDLoc dl(N);

6447

EVT VT = N->getValueType(0);

6448

SDValue Cond = N->getOperand(0);

6449

SDValue ConstTrue =

6450

CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);

6451

SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);

6452

6453

do {

6454

SDNode *User = *N->use_begin();

6455

if (User->getNumOperands() != 2)

6456

break;

6457

6458

auto TryFold = [this, N, User, dl](SDValue Val) {

6459

SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);

6460

SDValue O0 = UserO0.getNode() == N ? Val : UserO0;

6461

SDValue O1 = UserO1.getNode() == N ? Val : UserO1;

6462

6463

return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,

6464

User->getValueType(0), {O0, O1});

6465

};

6466

6467

// FIXME: When the semantics of the interaction between select and undef

6468

// are clearly defined, it may turn out to be unnecessary to break here.

6469

SDValue TrueRes = TryFold(ConstTrue);

6470

if (!TrueRes || TrueRes.isUndef())

6471

break;

6472

SDValue FalseRes = TryFold(ConstFalse);

6473

if (!FalseRes || FalseRes.isUndef())

6474

break;

6475

6476

// For us to materialize these using one instruction, we must be able to

6477

// represent them as signed 16-bit integers.

6478

uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),

6479

False = cast<ConstantSDNode>(FalseRes)->getZExtValue();

6480

if (!isInt<16>(True) || !isInt<16>(False))

6481

break;

6482

6483

// We can replace User with a new SELECT node, and try again to see if we

6484

// can fold the select with its user.

6485

Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);

6486

N = User;

6487

ConstTrue = TrueRes;

6488

ConstFalse = FalseRes;

6489

} while (N->hasOneUse());

6490

}

6491

6492

void PPCDAGToDAGISel::PreprocessISelDAG() {

6493

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

6494

6495

bool MadeChange = false;

6496

while (Position != CurDAG->allnodes_begin()) {

6497

SDNode *N = &*--Position;

6498

if (N->use_empty())

6499

continue;

6500

6501

SDValue Res;

6502

switch (N->getOpcode()) {

6503

default: break;

6504

case ISD::OR:

6505

Res = combineToCMPB(N);

6506

break;

6507

}

6508

6509

if (!Res)

6510

foldBoolExts(Res, N);

6511

6512

if (Res) {

6513

LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "PPC DAG preprocessing replacing:\nOld: "
; } } while (false);

6514

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { N->dump(CurDAG); } } while (false);

6515

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nNew: "; } } while (false);

6516

LLVM_DEBUG(Res.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { Res.getNode()->dump(CurDAG); } } while (false
);

6517

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

6518

6519

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);

6520

MadeChange = true;

6521

}

6522

}

6523

6524

if (MadeChange)

6525

CurDAG->RemoveDeadNodes();

6526

}

6527

6528

/// PostprocessISelDAG - Perform some late peephole optimizations

6529

/// on the DAG representation.

6530

void PPCDAGToDAGISel::PostprocessISelDAG() {

6531

// Skip peepholes at -O0.

6532

if (TM.getOptLevel() == CodeGenOpt::None)

6533

return;

6534

6535

PeepholePPC64();

6536

PeepholeCROps();

6537

PeepholePPC64ZExt();

6538

}

6539

6540

// Check if all users of this node will become isel where the second operand

6541

// is the constant zero. If this is so, and if we can negate the condition,

6542

// then we can flip the true and false operands. This will allow the zero to

6543

// be folded with the isel so that we don't need to materialize a register

6544

// containing zero.

6545

bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {

6546

for (const SDNode *User : N->uses()) {

6547

if (!User->isMachineOpcode())

6548

return false;

6549

if (User->getMachineOpcode() != PPC::SELECT_I4 &&

6550

User->getMachineOpcode() != PPC::SELECT_I8)

6551

return false;

6552

6553

SDNode *Op1 = User->getOperand(1).getNode();

6554

SDNode *Op2 = User->getOperand(2).getNode();

6555

// If we have a degenerate select with two equal operands, swapping will

6556

// not do anything, and we may run into an infinite loop.

6557

if (Op1 == Op2)

6558

return false;

6559

6560

if (!Op2->isMachineOpcode())

6561

return false;

6562

6563

if (Op2->getMachineOpcode() != PPC::LI &&

6564

Op2->getMachineOpcode() != PPC::LI8)

6565

return false;

6566

6567

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));

6568

if (!C)

6569

return false;

6570

6571

if (!C->isZero())

6572

return false;

6573

}

6574

6575

return true;

6576

}

6577

6578

void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {

6579

SmallVector<SDNode *, 4> ToReplace;

6580

for (SDNode *User : N->uses()) {

6581

assert((User->getMachineOpcode() == PPC::SELECT_I4 ||(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6583, __extension__
__PRETTY_FUNCTION__))

6582

User->getMachineOpcode() == PPC::SELECT_I8) &&(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6583, __extension__
__PRETTY_FUNCTION__))

6583

"Must have all select users")(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 6583, __extension__
__PRETTY_FUNCTION__));

6584

ToReplace.push_back(User);

6585

}

6586

6587

for (SDNode *User : ToReplace) {

6588

SDNode *ResNode =

6589

CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),

6590

User->getValueType(0), User->getOperand(0),

6591

User->getOperand(2),

6592

User->getOperand(1));

6593

6594

LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "CR Peephole replacing:\nOld: "
; } } while (false);

6595

LLVM_DEBUG(User->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { User->dump(CurDAG); } } while (false);

6596

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nNew: "; } } while (false);

6597

LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { ResNode->dump(CurDAG); } } while (false);

6598

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

6599

6600

ReplaceUses(User, ResNode);

6601

}

6602

}

6603

6604

void PPCDAGToDAGISel::PeepholeCROps() {

6605

bool IsModified;

6606

do {

6607

IsModified = false;

6608

for (SDNode &Node : CurDAG->allnodes()) {

6609

MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);

6610

if (!MachineNode || MachineNode->use_empty())

6611

continue;

6612

SDNode *ResNode = MachineNode;

6613

6614

bool Op1Set = false, Op1Unset = false,

6615

Op1Not = false,

6616

Op2Set = false, Op2Unset = false,

6617

Op2Not = false;

6618

6619

unsigned Opcode = MachineNode->getMachineOpcode();

6620

switch (Opcode) {

6621

default: break;

6622

case PPC::CRAND:

6623

case PPC::CRNAND:

6624

case PPC::CROR:

6625

case PPC::CRXOR:

6626

case PPC::CRNOR:

6627

case PPC::CREQV:

6628

case PPC::CRANDC:

6629

case PPC::CRORC: {

6630

SDValue Op = MachineNode->getOperand(1);

6631

if (Op.isMachineOpcode()) {

6632

if (Op.getMachineOpcode() == PPC::CRSET)

6633

Op2Set = true;

6634

else if (Op.getMachineOpcode() == PPC::CRUNSET)

6635

Op2Unset = true;

6636

else if ((Op.getMachineOpcode() == PPC::CRNOR &&

6637

Op.getOperand(0) == Op.getOperand(1)) ||

6638

Op.getMachineOpcode() == PPC::CRNOT)

6639

Op2Not = true;

6640

}

6641

[[fallthrough]];

6642

}

6643

case PPC::BC:

6644

case PPC::BCn:

6645

case PPC::SELECT_I4:

6646

case PPC::SELECT_I8:

6647

case PPC::SELECT_F4:

6648

case PPC::SELECT_F8:

6649

case PPC::SELECT_SPE:

6650

case PPC::SELECT_SPE4:

6651

case PPC::SELECT_VRRC:

6652

case PPC::SELECT_VSFRC:

6653

case PPC::SELECT_VSSRC:

6654

case PPC::SELECT_VSRC: {

6655

SDValue Op = MachineNode->getOperand(0);

6656

if (Op.isMachineOpcode()) {

6657

if (Op.getMachineOpcode() == PPC::CRSET)

6658

Op1Set = true;

6659

else if (Op.getMachineOpcode() == PPC::CRUNSET)

6660

Op1Unset = true;

6661

else if ((Op.getMachineOpcode() == PPC::CRNOR &&

6662

Op.getOperand(0) == Op.getOperand(1)) ||

6663

Op.getMachineOpcode() == PPC::CRNOT)

6664

Op1Not = true;

6665

}

6666

}

6667

break;

6668

}

6669

6670

bool SelectSwap = false;

6671

switch (Opcode) {

6672

default: break;

6673

case PPC::CRAND:

6674

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6675

// x & x = x

6676

ResNode = MachineNode->getOperand(0).getNode();

6677

else if (Op1Set)

6678

// 1 & y = y

6679

ResNode = MachineNode->getOperand(1).getNode();

6680

else if (Op2Set)

6681

// x & 1 = x

6682

ResNode = MachineNode->getOperand(0).getNode();

6683

else if (Op1Unset || Op2Unset)

6684

// x & 0 = 0 & y = 0

6685

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6686

MVT::i1);

6687

else if (Op1Not)

6688

// ~x & y = andc(y, x)

6689

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6690

MVT::i1, MachineNode->getOperand(1),

6691

MachineNode->getOperand(0).

6692

getOperand(0));

6693

else if (Op2Not)

6694

// x & ~y = andc(x, y)

6695

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6696

MVT::i1, MachineNode->getOperand(0),

6697

MachineNode->getOperand(1).

6698

getOperand(0));

6699

else if (AllUsersSelectZero(MachineNode)) {

6700

ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),

6701

MVT::i1, MachineNode->getOperand(0),

6702

MachineNode->getOperand(1));

6703

SelectSwap = true;

6704

}

6705

break;

6706

case PPC::CRNAND:

6707

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6708

// nand(x, x) -> nor(x, x)

6709

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6710

MVT::i1, MachineNode->getOperand(0),

6711

MachineNode->getOperand(0));

6712

else if (Op1Set)

6713

// nand(1, y) -> nor(y, y)

6714

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6715

MVT::i1, MachineNode->getOperand(1),

6716

MachineNode->getOperand(1));

6717

else if (Op2Set)

6718

// nand(x, 1) -> nor(x, x)

6719

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6720

MVT::i1, MachineNode->getOperand(0),

6721

MachineNode->getOperand(0));

6722

else if (Op1Unset || Op2Unset)

6723

// nand(x, 0) = nand(0, y) = 1

6724

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6725

MVT::i1);

6726

else if (Op1Not)

6727

// nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)

6728

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6729

MVT::i1, MachineNode->getOperand(0).

6730

getOperand(0),

6731

MachineNode->getOperand(1));

6732

else if (Op2Not)

6733

// nand(x, ~y) = ~x | y = orc(y, x)

6734

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6735

MVT::i1, MachineNode->getOperand(1).

6736

getOperand(0),

6737

MachineNode->getOperand(0));

6738

else if (AllUsersSelectZero(MachineNode)) {

6739

ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),

6740

MVT::i1, MachineNode->getOperand(0),

6741

MachineNode->getOperand(1));

6742

SelectSwap = true;

6743

}

6744

break;

6745

case PPC::CROR:

6746

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6747

// x | x = x

6748

ResNode = MachineNode->getOperand(0).getNode();

6749

else if (Op1Set || Op2Set)

6750

// x | 1 = 1 | y = 1

6751

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6752

MVT::i1);

6753

else if (Op1Unset)

6754

// 0 | y = y

6755

ResNode = MachineNode->getOperand(1).getNode();

6756

else if (Op2Unset)

6757

// x | 0 = x

6758

ResNode = MachineNode->getOperand(0).getNode();

6759

else if (Op1Not)

6760

// ~x | y = orc(y, x)

6761

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6762

MVT::i1, MachineNode->getOperand(1),

6763

MachineNode->getOperand(0).

6764

getOperand(0));

6765

else if (Op2Not)

6766

// x | ~y = orc(x, y)

6767

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6768

MVT::i1, MachineNode->getOperand(0),

6769

MachineNode->getOperand(1).

6770

getOperand(0));

6771

else if (AllUsersSelectZero(MachineNode)) {

6772

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6773

MVT::i1, MachineNode->getOperand(0),

6774

MachineNode->getOperand(1));

6775

SelectSwap = true;

6776

}

6777

break;

6778

case PPC::CRXOR:

6779

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6780

// xor(x, x) = 0

6781

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6782

MVT::i1);

6783

else if (Op1Set)

6784

// xor(1, y) -> nor(y, y)

6785

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6786

MVT::i1, MachineNode->getOperand(1),

6787

MachineNode->getOperand(1));

6788

else if (Op2Set)

6789

// xor(x, 1) -> nor(x, x)

6790

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6791

MVT::i1, MachineNode->getOperand(0),

6792

MachineNode->getOperand(0));

6793

else if (Op1Unset)

6794

// xor(0, y) = y

6795

ResNode = MachineNode->getOperand(1).getNode();

6796

else if (Op2Unset)

6797

// xor(x, 0) = x

6798

ResNode = MachineNode->getOperand(0).getNode();

6799

else if (Op1Not)

6800

// xor(~x, y) = eqv(x, y)

6801

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6802

MVT::i1, MachineNode->getOperand(0).

6803

getOperand(0),

6804

MachineNode->getOperand(1));

6805

else if (Op2Not)

6806

// xor(x, ~y) = eqv(x, y)

6807

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6808

MVT::i1, MachineNode->getOperand(0),

6809

MachineNode->getOperand(1).

6810

getOperand(0));

6811

else if (AllUsersSelectZero(MachineNode)) {

6812

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6813

MVT::i1, MachineNode->getOperand(0),

6814

MachineNode->getOperand(1));

6815

SelectSwap = true;

6816

}

6817

break;

6818

case PPC::CRNOR:

6819

if (Op1Set || Op2Set)

6820

// nor(1, y) -> 0

6821

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6822

MVT::i1);

6823

else if (Op1Unset)

6824

// nor(0, y) = ~y -> nor(y, y)

6825

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6826

MVT::i1, MachineNode->getOperand(1),

6827

MachineNode->getOperand(1));

6828

else if (Op2Unset)

6829

// nor(x, 0) = ~x

6830

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6831

MVT::i1, MachineNode->getOperand(0),

6832

MachineNode->getOperand(0));

6833

else if (Op1Not)

6834

// nor(~x, y) = andc(x, y)

6835

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6836

MVT::i1, MachineNode->getOperand(0).

6837

getOperand(0),

6838

MachineNode->getOperand(1));

6839

else if (Op2Not)

6840

// nor(x, ~y) = andc(y, x)

6841

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6842

MVT::i1, MachineNode->getOperand(1).

6843

getOperand(0),

6844

MachineNode->getOperand(0));

6845

else if (AllUsersSelectZero(MachineNode)) {

6846

ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),

6847

MVT::i1, MachineNode->getOperand(0),

6848

MachineNode->getOperand(1));

6849

SelectSwap = true;

6850

}

6851

break;

6852

case PPC::CREQV:

6853

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6854

// eqv(x, x) = 1

6855

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6856

MVT::i1);

6857

else if (Op1Set)

6858

// eqv(1, y) = y

6859

ResNode = MachineNode->getOperand(1).getNode();

6860

else if (Op2Set)

6861

// eqv(x, 1) = x

6862

ResNode = MachineNode->getOperand(0).getNode();

6863

else if (Op1Unset)

6864

// eqv(0, y) = ~y -> nor(y, y)

6865

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6866

MVT::i1, MachineNode->getOperand(1),

6867

MachineNode->getOperand(1));

6868

else if (Op2Unset)

6869

// eqv(x, 0) = ~x

6870

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6871

MVT::i1, MachineNode->getOperand(0),

6872

MachineNode->getOperand(0));

6873

else if (Op1Not)

6874

// eqv(~x, y) = xor(x, y)

6875

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6876

MVT::i1, MachineNode->getOperand(0).

6877

getOperand(0),

6878

MachineNode->getOperand(1));

6879

else if (Op2Not)

6880

// eqv(x, ~y) = xor(x, y)

6881

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6882

MVT::i1, MachineNode->getOperand(0),

6883

MachineNode->getOperand(1).

6884

getOperand(0));

6885

else if (AllUsersSelectZero(MachineNode)) {

6886

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6887

MVT::i1, MachineNode->getOperand(0),

6888

MachineNode->getOperand(1));

6889

SelectSwap = true;

6890

}

6891

break;

6892

case PPC::CRANDC:

6893

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6894

// andc(x, x) = 0

6895

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6896

MVT::i1);

6897

else if (Op1Set)

6898

// andc(1, y) = ~y

6899

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6900

MVT::i1, MachineNode->getOperand(1),

6901

MachineNode->getOperand(1));

6902

else if (Op1Unset || Op2Set)

6903

// andc(0, y) = andc(x, 1) = 0

6904

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6905

MVT::i1);

6906

else if (Op2Unset)

6907

// andc(x, 0) = x

6908

ResNode = MachineNode->getOperand(0).getNode();

6909

else if (Op1Not)

6910

// andc(~x, y) = ~(x | y) = nor(x, y)

6911

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6912

MVT::i1, MachineNode->getOperand(0).

6913

getOperand(0),

6914

MachineNode->getOperand(1));

6915

else if (Op2Not)

6916

// andc(x, ~y) = x & y

6917

ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),

6918

MVT::i1, MachineNode->getOperand(0),

6919

MachineNode->getOperand(1).

6920

getOperand(0));

6921

else if (AllUsersSelectZero(MachineNode)) {

6922

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6923

MVT::i1, MachineNode->getOperand(1),

6924

MachineNode->getOperand(0));

6925

SelectSwap = true;

6926

}

6927

break;

6928

case PPC::CRORC:

6929

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6930

// orc(x, x) = 1

6931

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6932

MVT::i1);

6933

else if (Op1Set || Op2Unset)

6934

// orc(1, y) = orc(x, 0) = 1

6935

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6936

MVT::i1);

6937

else if (Op2Set)

6938

// orc(x, 1) = x

6939

ResNode = MachineNode->getOperand(0).getNode();

6940

else if (Op1Unset)

6941

// orc(0, y) = ~y

6942

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6943

MVT::i1, MachineNode->getOperand(1),

6944

MachineNode->getOperand(1));

6945

else if (Op1Not)

6946

// orc(~x, y) = ~(x & y) = nand(x, y)

6947

ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),

6948

MVT::i1, MachineNode->getOperand(0).

6949

getOperand(0),

6950

MachineNode->getOperand(1));

6951

else if (Op2Not)

6952

// orc(x, ~y) = x | y

6953

ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),

6954

MVT::i1, MachineNode->getOperand(0),

6955

MachineNode->getOperand(1).

6956

getOperand(0));

6957

else if (AllUsersSelectZero(MachineNode)) {

6958

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6959

MVT::i1, MachineNode->getOperand(1),

6960

MachineNode->getOperand(0));

6961

SelectSwap = true;

6962

}

6963

break;

6964

case PPC::SELECT_I4:

6965

case PPC::SELECT_I8:

6966

case PPC::SELECT_F4:

6967

case PPC::SELECT_F8:

6968

case PPC::SELECT_SPE:

6969

case PPC::SELECT_SPE4:

6970

case PPC::SELECT_VRRC:

6971

case PPC::SELECT_VSFRC:

6972

case PPC::SELECT_VSSRC:

6973

case PPC::SELECT_VSRC:

6974

if (Op1Set)

6975

ResNode = MachineNode->getOperand(1).getNode();

6976

else if (Op1Unset)

6977

ResNode = MachineNode->getOperand(2).getNode();

6978

else if (Op1Not)

6979

ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),

6980

SDLoc(MachineNode),

6981

MachineNode->getValueType(0),

6982

MachineNode->getOperand(0).

6983

getOperand(0),

6984

MachineNode->getOperand(2),

6985

MachineNode->getOperand(1));

6986

break;

6987

case PPC::BC:

6988

case PPC::BCn:

6989

if (Op1Not)

6990

ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :

6991

PPC::BC,

6992

SDLoc(MachineNode),

6993

MVT::Other,

6994

MachineNode->getOperand(0).

6995

getOperand(0),

6996

MachineNode->getOperand(1),

6997

MachineNode->getOperand(2));

6998

// FIXME: Handle Op1Set, Op1Unset here too.

6999

break;

7000

}

7001

7002

// If we're inverting this node because it is used only by selects that

7003

// we'd like to swap, then swap the selects before the node replacement.

7004

if (SelectSwap)

7005

SwapAllSelectUsers(MachineNode);

7006

7007

if (ResNode != MachineNode) {

7008

LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "CR Peephole replacing:\nOld: "
; } } while (false);

7009

LLVM_DEBUG(MachineNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { MachineNode->dump(CurDAG); } } while (false
);

7010

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nNew: "; } } while (false);

7011

LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { ResNode->dump(CurDAG); } } while (false);

7012

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

7013

7014

ReplaceUses(MachineNode, ResNode);

7015

IsModified = true;

7016

}

7017

}

7018

if (IsModified)

7019

CurDAG->RemoveDeadNodes();

7020

} while (IsModified);

7021

}

7022

7023

// Gather the set of 32-bit operations that are known to have their

7024

// higher-order 32 bits zero, where ToPromote contains all such operations.

7025

static bool PeepholePPC64ZExtGather(SDValue Op32,

7026

SmallPtrSetImpl<SDNode *> &ToPromote) {

7027

if (!Op32.isMachineOpcode())

7028

return false;

7029

7030

// First, check for the "frontier" instructions (those that will clear the

7031

// higher-order 32 bits.

7032

7033

// For RLWINM and RLWNM, we need to make sure that the mask does not wrap

7034

// around. If it does not, then these instructions will clear the

7035

// higher-order bits.

7036

if ((Op32.getMachineOpcode() == PPC::RLWINM ||

7037

Op32.getMachineOpcode() == PPC::RLWNM) &&

7038

Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {

7039

ToPromote.insert(Op32.getNode());

7040

return true;

7041

}

7042

7043

// SLW and SRW always clear the higher-order bits.

7044

if (Op32.getMachineOpcode() == PPC::SLW ||

7045

Op32.getMachineOpcode() == PPC::SRW) {

7046

ToPromote.insert(Op32.getNode());

7047

return true;

7048

}

7049

7050

// For LI and LIS, we need the immediate to be positive (so that it is not

7051

// sign extended).

7052

if (Op32.getMachineOpcode() == PPC::LI ||

7053

Op32.getMachineOpcode() == PPC::LIS) {

7054

if (!isUInt<15>(Op32.getConstantOperandVal(0)))

7055

return false;

7056

7057

ToPromote.insert(Op32.getNode());

7058

return true;

7059

}

7060

7061

// LHBRX and LWBRX always clear the higher-order bits.

7062

if (Op32.getMachineOpcode() == PPC::LHBRX ||

7063

Op32.getMachineOpcode() == PPC::LWBRX) {

7064

ToPromote.insert(Op32.getNode());

7065

return true;

7066

}

7067

7068

// CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.

7069

if (Op32.getMachineOpcode() == PPC::CNTLZW ||

7070

Op32.getMachineOpcode() == PPC::CNTTZW) {

7071

ToPromote.insert(Op32.getNode());

7072

return true;

7073

}

7074

7075

// Next, check for those instructions we can look through.

7076

7077

// Assuming the mask does not wrap around, then the higher-order bits are

7078

// taken directly from the first operand.

7079

if (Op32.getMachineOpcode() == PPC::RLWIMI &&

7080

Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {

7081

SmallPtrSet<SDNode *, 16> ToPromote1;

7082

if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))

7083

return false;

7084

7085

ToPromote.insert(Op32.getNode());

7086

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

7087

return true;

7088

}

7089

7090

// For OR, the higher-order bits are zero if that is true for both operands.

7091

// For SELECT_I4, the same is true (but the relevant operand numbers are

7092

// shifted by 1).

7093

if (Op32.getMachineOpcode() == PPC::OR ||

7094

Op32.getMachineOpcode() == PPC::SELECT_I4) {

7095

unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;

7096

SmallPtrSet<SDNode *, 16> ToPromote1;

7097

if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))

7098

return false;

7099

if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))

7100

return false;

7101

7102

ToPromote.insert(Op32.getNode());

7103

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

7104

return true;

7105

}

7106

7107

// For ORI and ORIS, we need the higher-order bits of the first operand to be

7108

// zero, and also for the constant to be positive (so that it is not sign

7109

// extended).

7110

if (Op32.getMachineOpcode() == PPC::ORI ||

7111

Op32.getMachineOpcode() == PPC::ORIS) {

7112

SmallPtrSet<SDNode *, 16> ToPromote1;

7113

if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))

7114

return false;

7115

if (!isUInt<15>(Op32.getConstantOperandVal(1)))

7116

return false;

7117

7118

ToPromote.insert(Op32.getNode());

7119

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

7120

return true;

7121

}

7122

7123

// The higher-order bits of AND are zero if that is true for at least one of

7124

// the operands.

7125

if (Op32.getMachineOpcode() == PPC::AND) {

7126

SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;

7127

bool Op0OK =

7128

PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);

7129

bool Op1OK =

7130

PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);

7131

if (!Op0OK && !Op1OK)

7132

return false;

7133

7134

ToPromote.insert(Op32.getNode());

7135

7136

if (Op0OK)

7137

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

7138

7139

if (Op1OK)

7140

ToPromote.insert(ToPromote2.begin(), ToPromote2.end());

7141

7142

return true;

7143

}

7144

7145

// For ANDI and ANDIS, the higher-order bits are zero if either that is true

7146

// of the first operand, or if the second operand is positive (so that it is

7147

// not sign extended).

7148

if (Op32.getMachineOpcode() == PPC::ANDI_rec ||

7149

Op32.getMachineOpcode() == PPC::ANDIS_rec) {

7150

SmallPtrSet<SDNode *, 16> ToPromote1;

7151

bool Op0OK =

7152

PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);

7153

bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));

7154

if (!Op0OK && !Op1OK)

7155

return false;

7156

7157

ToPromote.insert(Op32.getNode());

7158

7159

if (Op0OK)

7160

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

7161

7162

return true;

7163

}

7164

7165

return false;

7166

}

7167

7168

void PPCDAGToDAGISel::PeepholePPC64ZExt() {

7169

if (!Subtarget->isPPC64())

7170

return;

7171

7172

// When we zero-extend from i32 to i64, we use a pattern like this:

7173

// def : Pat<(i64 (zext i32:$in)),

7174

// (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),

7175

// 0, 32)>;

7176

// There are several 32-bit shift/rotate instructions, however, that will

7177

// clear the higher-order bits of their output, rendering the RLDICL

7178

// unnecessary. When that happens, we remove it here, and redefine the

7179

// relevant 32-bit operation to be a 64-bit operation.

7180

7181

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

7182

7183

bool MadeChange = false;

7184

while (Position != CurDAG->allnodes_begin()) {

7185

SDNode *N = &*--Position;

7186

// Skip dead nodes and any non-machine opcodes.

7187

if (N->use_empty() || !N->isMachineOpcode())

7188

continue;

7189

7190

if (N->getMachineOpcode() != PPC::RLDICL)

7191

continue;

7192

7193

if (N->getConstantOperandVal(1) != 0 ||

7194

N->getConstantOperandVal(2) != 32)

7195

continue;

7196

7197

SDValue ISR = N->getOperand(0);

7198

if (!ISR.isMachineOpcode() ||

7199

ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)

7200

continue;

7201

7202

if (!ISR.hasOneUse())

7203

continue;

7204

7205

if (ISR.getConstantOperandVal(2) != PPC::sub_32)

7206

continue;

7207

7208

SDValue IDef = ISR.getOperand(0);

7209

if (!IDef.isMachineOpcode() ||

7210

IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)

7211

continue;

7212

7213

// We now know that we're looking at a canonical i32 -> i64 zext. See if we

7214

// can get rid of it.

7215

7216

SDValue Op32 = ISR->getOperand(1);

7217

if (!Op32.isMachineOpcode())

7218

continue;

7219

7220

// There are some 32-bit instructions that always clear the high-order 32

7221

// bits, there are also some instructions (like AND) that we can look

7222

// through.

7223

SmallPtrSet<SDNode *, 16> ToPromote;

7224

if (!PeepholePPC64ZExtGather(Op32, ToPromote))

7225

continue;

7226

7227

// If the ToPromote set contains nodes that have uses outside of the set

7228

// (except for the original INSERT_SUBREG), then abort the transformation.

7229

bool OutsideUse = false;

7230

for (SDNode *PN : ToPromote) {

7231

for (SDNode *UN : PN->uses()) {

7232

if (!ToPromote.count(UN) && UN != ISR.getNode()) {

7233

OutsideUse = true;

7234

break;

7235

}

7236

}

7237

7238

if (OutsideUse)

7239

break;

7240

}

7241

if (OutsideUse)

7242

continue;

7243

7244

MadeChange = true;

7245

7246

// We now know that this zero extension can be removed by promoting to

7247

// nodes in ToPromote to 64-bit operations, where for operations in the

7248

// frontier of the set, we need to insert INSERT_SUBREGs for their

7249

// operands.

7250

for (SDNode *PN : ToPromote) {

7251

unsigned NewOpcode;

7252

switch (PN->getMachineOpcode()) {

7253

default:

7254

llvm_unreachable("Don't know the 64-bit variant of this instruction")::llvm::llvm_unreachable_internal("Don't know the 64-bit variant of this instruction"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 7254);

7255

case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;

7256

case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;

7257

case PPC::SLW: NewOpcode = PPC::SLW8; break;

7258

case PPC::SRW: NewOpcode = PPC::SRW8; break;

7259

case PPC::LI: NewOpcode = PPC::LI8; break;

7260

case PPC::LIS: NewOpcode = PPC::LIS8; break;

7261

case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;

7262

case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;

7263

case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;

7264

case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;

7265

case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;

7266

case PPC::OR: NewOpcode = PPC::OR8; break;

7267

case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;

7268

case PPC::ORI: NewOpcode = PPC::ORI8; break;

7269

case PPC::ORIS: NewOpcode = PPC::ORIS8; break;

7270

case PPC::AND: NewOpcode = PPC::AND8; break;

7271

case PPC::ANDI_rec:

7272

NewOpcode = PPC::ANDI8_rec;

7273

break;

7274

case PPC::ANDIS_rec:

7275

NewOpcode = PPC::ANDIS8_rec;

7276

break;

7277

}

7278

7279

// Note: During the replacement process, the nodes will be in an

7280

// inconsistent state (some instructions will have operands with values

7281

// of the wrong type). Once done, however, everything should be right

7282

// again.

7283

7284

SmallVector<SDValue, 4> Ops;

7285

for (const SDValue &V : PN->ops()) {

7286

if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&

7287

!isa<ConstantSDNode>(V)) {

7288

SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };

7289

SDNode *ReplOp =

7290

CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),

7291

ISR.getNode()->getVTList(), ReplOpOps);

7292

Ops.push_back(SDValue(ReplOp, 0));

7293

} else {

7294

Ops.push_back(V);

7295

}

7296

}

7297

7298

// Because all to-be-promoted nodes only have users that are other

7299

// promoted nodes (or the original INSERT_SUBREG), we can safely replace

7300

// the i32 result value type with i64.

7301

7302

SmallVector<EVT, 2> NewVTs;

7303

SDVTList VTs = PN->getVTList();

7304

for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)

7305

if (VTs.VTs[i] == MVT::i32)

7306

NewVTs.push_back(MVT::i64);

7307

else

7308

NewVTs.push_back(VTs.VTs[i]);

7309

7310

LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "
; } } while (false);

7311

LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { PN->dump(CurDAG); } } while (false);

7312

7313

CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);

7314

7315

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nNew: "; } } while (false);

7316

LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { PN->dump(CurDAG); } } while (false);

7317

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

7318

}

7319

7320

// Now we replace the original zero extend and its associated INSERT_SUBREG

7321

// with the value feeding the INSERT_SUBREG (which has now been promoted to

7322

// return an i64).

7323

7324

LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "
; } } while (false);

7325

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { N->dump(CurDAG); } } while (false);

7326

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nNew: "; } } while (false);

7327

LLVM_DEBUG(Op32.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { Op32.getNode()->dump(CurDAG); } } while (false
);

7328

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

7329

7330

ReplaceUses(N, Op32.getNode());

7331

}

7332

7333

if (MadeChange)

7334

CurDAG->RemoveDeadNodes();

7335

}

7336

7337

static bool isVSXSwap(SDValue N) {

7338

if (!N->isMachineOpcode())

7339

return false;

7340

unsigned Opc = N->getMachineOpcode();

7341

7342

// Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate

7343

// operand is 2.

7344

if (Opc == PPC::XXPERMDIs) {

7345

return isa<ConstantSDNode>(N->getOperand(1)) &&

7346

N->getConstantOperandVal(1) == 2;

7347

} else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {

7348

return N->getOperand(0) == N->getOperand(1) &&

7349

isa<ConstantSDNode>(N->getOperand(2)) &&

7350

N->getConstantOperandVal(2) == 2;

7351

}

7352

7353

return false;

7354

}

7355

7356

// TODO: Make this complete and replace with a table-gen bit.

7357

static bool isLaneInsensitive(SDValue N) {

7358

if (!N->isMachineOpcode())

7359

return false;

7360

unsigned Opc = N->getMachineOpcode();

7361

7362

switch (Opc) {

7363

default:

7364

return false;

7365

case PPC::VAVGSB:

7366

case PPC::VAVGUB:

7367

case PPC::VAVGSH:

7368

case PPC::VAVGUH:

7369

case PPC::VAVGSW:

7370

case PPC::VAVGUW:

7371

case PPC::VMAXFP:

7372

case PPC::VMAXSB:

7373

case PPC::VMAXUB:

7374

case PPC::VMAXSH:

7375

case PPC::VMAXUH:

7376

case PPC::VMAXSW:

7377

case PPC::VMAXUW:

7378

case PPC::VMINFP:

7379

case PPC::VMINSB:

7380

case PPC::VMINUB:

7381

case PPC::VMINSH:

7382

case PPC::VMINUH:

7383

case PPC::VMINSW:

7384

case PPC::VMINUW:

7385

case PPC::VADDFP:

7386

case PPC::VADDUBM:

7387

case PPC::VADDUHM:

7388

case PPC::VADDUWM:

7389

case PPC::VSUBFP:

7390

case PPC::VSUBUBM:

7391

case PPC::VSUBUHM:

7392

case PPC::VSUBUWM:

7393

case PPC::VAND:

7394

case PPC::VANDC:

7395

case PPC::VOR:

7396

case PPC::VORC:

7397

case PPC::VXOR:

7398

case PPC::VNOR:

7399

case PPC::VMULUWM:

7400

return true;

7401

}

7402

}

7403

7404

// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is

7405

// lane-insensitive.

7406

static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {

7407

// Our desired xxswap might be source of COPY_TO_REGCLASS.

7408

// TODO: Can we put this a common method for DAG?

7409

auto SkipRCCopy = [](SDValue V) {

7410

while (V->isMachineOpcode() &&

7411

V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {

7412

// All values in the chain should have single use.

7413

if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))

7414

return SDValue();

7415

V = V->getOperand(0);

7416

}

7417

return V.hasOneUse() ? V : SDValue();

7418

};

7419

7420

SDValue VecOp = SkipRCCopy(N->getOperand(0));

7421

if (!VecOp || !isLaneInsensitive(VecOp))

7422

return;

7423

7424

SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),

7425

RHS = SkipRCCopy(VecOp.getOperand(1));

7426

if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))

7427

return;

7428

7429

// These swaps may still have chain-uses here, count on dead code elimination

7430

// in following passes to remove them.

7431

DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));

7432

DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));

7433

DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));

7434

}

7435

7436

void PPCDAGToDAGISel::PeepholePPC64() {

7437

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

7438

7439

while (Position != CurDAG->allnodes_begin()) {

7440

SDNode *N = &*--Position;

7441

// Skip dead nodes and any non-machine opcodes.

7442

if (N->use_empty() || !N->isMachineOpcode())

7443

continue;

7444

7445

if (isVSXSwap(SDValue(N, 0)))

7446

reduceVSXSwap(N, CurDAG);

7447

7448

unsigned FirstOp;

7449

unsigned StorageOpcode = N->getMachineOpcode();

7450

bool RequiresMod4Offset = false;

7451

7452

switch (StorageOpcode) {

7453

default: continue;

7454

7455

case PPC::LWA:

7456

case PPC::LD:

7457

case PPC::DFLOADf64:

7458

case PPC::DFLOADf32:

7459

RequiresMod4Offset = true;

7460

[[fallthrough]];

7461

case PPC::LBZ:

7462

case PPC::LBZ8:

7463

case PPC::LFD:

7464

case PPC::LFS:

7465

case PPC::LHA:

7466

case PPC::LHA8:

7467

case PPC::LHZ:

7468

case PPC::LHZ8:

7469

case PPC::LWZ:

7470

case PPC::LWZ8:

7471

FirstOp = 0;

7472

break;

7473

7474

case PPC::STD:

7475

case PPC::DFSTOREf64:

7476

case PPC::DFSTOREf32:

7477

RequiresMod4Offset = true;

7478

[[fallthrough]];

7479

case PPC::STB:

7480

case PPC::STB8:

7481

case PPC::STFD:

7482

case PPC::STFS:

7483

case PPC::STH:

7484

case PPC::STH8:

7485

case PPC::STW:

7486

case PPC::STW8:

7487

FirstOp = 1;

7488

break;

7489

}

7490

7491

// If this is a load or store with a zero offset, or within the alignment,

7492

// we may be able to fold an add-immediate into the memory operation.

7493

// The check against alignment is below, as it can't occur until we check

7494

// the arguments to N

7495

if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))

7496

continue;

7497

7498

SDValue Base = N->getOperand(FirstOp + 1);

7499

if (!Base.isMachineOpcode())

7500

continue;

7501

7502

unsigned Flags = 0;

7503

bool ReplaceFlags = true;

7504

7505

// When the feeding operation is an add-immediate of some sort,

7506

// determine whether we need to add relocation information to the

7507

// target flags on the immediate operand when we fold it into the

7508

// load instruction.

7509

//

7510

// For something like ADDItocL, the relocation information is

7511

// inferred from the opcode; when we process it in the AsmPrinter,

7512

// we add the necessary relocation there. A load, though, can receive

7513

// relocation from various flavors of ADDIxxx, so we need to carry

7514

// the relocation information in the target flags.

7515

switch (Base.getMachineOpcode()) {

7516

default: continue;

7517

7518

case PPC::ADDI8:

7519

case PPC::ADDI:

7520

// In some cases (such as TLS) the relocation information

7521

// is already in place on the operand, so copying the operand

7522

// is sufficient.

7523

ReplaceFlags = false;

7524

// For these cases, the immediate may not be divisible by 4, in

7525

// which case the fold is illegal for DS-form instructions. (The

7526

// other cases provide aligned addresses and are always safe.)

7527

if (RequiresMod4Offset &&

7528

(!isa<ConstantSDNode>(Base.getOperand(1)) ||

7529

Base.getConstantOperandVal(1) % 4 != 0))

7530

continue;

7531

break;

7532

case PPC::ADDIdtprelL:

7533

Flags = PPCII::MO_DTPREL_LO;

7534

break;

7535

case PPC::ADDItlsldL:

7536

Flags = PPCII::MO_TLSLD_LO;

7537

break;

7538

case PPC::ADDItocL:

7539

Flags = PPCII::MO_TOC_LO;

7540

break;

7541

}

7542

7543

SDValue ImmOpnd = Base.getOperand(1);

7544

7545

// On PPC64, the TOC base pointer is guaranteed by the ABI only to have

7546

// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,

7547

// we might have needed different @ha relocation values for the offset

7548

// pointers).

7549

int MaxDisplacement = 7;

7550

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {

7551

const GlobalValue *GV = GA->getGlobal();

7552

Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());

7553

MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);

7554

}

7555

7556

bool UpdateHBase = false;

7557

SDValue HBase = Base.getOperand(0);

7558

7559

int Offset = N->getConstantOperandVal(FirstOp);

7560

if (ReplaceFlags) {

7561

if (Offset < 0 || Offset > MaxDisplacement) {

7562

// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only

7563

// one use, then we can do this for any offset, we just need to also

7564

// update the offset (i.e. the symbol addend) on the addis also.

7565

if (Base.getMachineOpcode() != PPC::ADDItocL)

7566

continue;

7567

7568

if (!HBase.isMachineOpcode() ||

7569

HBase.getMachineOpcode() != PPC::ADDIStocHA8)

7570

continue;

7571

7572

if (!Base.hasOneUse() || !HBase.hasOneUse())

7573

continue;

7574

7575

SDValue HImmOpnd = HBase.getOperand(1);

7576

if (HImmOpnd != ImmOpnd)

7577

continue;

7578

7579

UpdateHBase = true;

7580

}

7581

} else {

7582

// If we're directly folding the addend from an addi instruction, then:

7583

// 1. In general, the offset on the memory access must be zero.

7584

// 2. If the addend is a constant, then it can be combined with a

7585

// non-zero offset, but only if the result meets the encoding

7586

// requirements.

7587

if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {

7588

Offset += C->getSExtValue();

7589

7590

if (RequiresMod4Offset && (Offset % 4) != 0)

7591

continue;

7592

7593

if (!isInt<16>(Offset))

7594

continue;

7595

7596

ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),

7597

ImmOpnd.getValueType());

7598

} else if (Offset != 0) {

7599

continue;

7600

}

7601

}

7602

7603

// We found an opportunity. Reverse the operands from the add

7604

// immediate and substitute them into the load or store. If

7605

// needed, update the target flags for the immediate operand to

7606

// reflect the necessary relocation information.

7607

LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Folding add-immediate into mem-op:\nBase: "
; } } while (false);

7608

LLVM_DEBUG(Base->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { Base->dump(CurDAG); } } while (false);

7609

LLVM_DEBUG(dbgs() << "\nN: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\nN: "; } } while (false);

7610

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { N->dump(CurDAG); } } while (false);

7611

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "\n"; } } while (false);

7612

7613

// If the relocation information isn't already present on the

7614

// immediate operand, add it now.

7615

if (ReplaceFlags) {

7616

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {

7617

SDLoc dl(GA);

7618

const GlobalValue *GV = GA->getGlobal();

7619

Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());

7620

// We can't perform this optimization for data whose alignment

7621

// is insufficient for the instruction encoding.

7622

if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {

7623

LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-isel")) { dbgs() << "Rejected this candidate for alignment.\n\n"
; } } while (false);

7624

continue;

7625

}

7626

ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);

7627

} else if (ConstantPoolSDNode *CP =

7628

dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {

7629

const Constant *C = CP->getConstVal();

7630

ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),

7631

Offset, Flags);

7632

}

7633

}

7634

7635

if (FirstOp == 1) // Store

7636

(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,

7637

Base.getOperand(0), N->getOperand(3));

7638

else // Load

7639

(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),

7640

N->getOperand(2));

7641

7642

if (UpdateHBase)

7643

(void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),

7644

ImmOpnd);

7645

7646

// The add-immediate may now be dead, in which case remove it.

7647

if (Base.getNode()->use_empty())

7648

CurDAG->RemoveDeadNode(Base.getNode());

7649

}

7650

}

7651

7652

/// createPPCISelDag - This pass converts a legalized DAG into a

7653

/// PowerPC-specific DAG, ready for instruction scheduling.

7654

///

7655

FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,

7656

CodeGenOpt::Level OptLevel) {

7657

return new PPCDAGToDAGISel(TM, OptLevel);

7658

}

File:	build/source/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Warning:	line 1994, column 15 Value stored to 'I' is never read

Bug Summary

Annotated Source Code