/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

1

//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//

2

//

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6

//

7

//===----------------------------------------------------------------------===//

8

//

9

// This file defines a pattern matching instruction selector for PowerPC,

10

// converting from a legalized dag to a PPC dag.

11

//

12

//===----------------------------------------------------------------------===//

13

14

#include "MCTargetDesc/PPCMCTargetDesc.h"

15

#include "MCTargetDesc/PPCPredicates.h"

16

#include "PPC.h"

17

#include "PPCISelLowering.h"

18

#include "PPCMachineFunctionInfo.h"

19

#include "PPCSubtarget.h"

20

#include "PPCTargetMachine.h"

21

#include "llvm/ADT/APInt.h"

22

#include "llvm/ADT/DenseMap.h"

23

#include "llvm/ADT/STLExtras.h"

24

#include "llvm/ADT/SmallPtrSet.h"

25

#include "llvm/ADT/SmallVector.h"

26

#include "llvm/ADT/Statistic.h"

27

#include "llvm/Analysis/BranchProbabilityInfo.h"

28

#include "llvm/CodeGen/FunctionLoweringInfo.h"

29

#include "llvm/CodeGen/ISDOpcodes.h"

30

#include "llvm/CodeGen/MachineBasicBlock.h"

31

#include "llvm/CodeGen/MachineFunction.h"

32

#include "llvm/CodeGen/MachineInstrBuilder.h"

33

#include "llvm/CodeGen/MachineRegisterInfo.h"

34

#include "llvm/CodeGen/SelectionDAG.h"

35

#include "llvm/CodeGen/SelectionDAGISel.h"

36

#include "llvm/CodeGen/SelectionDAGNodes.h"

37

#include "llvm/CodeGen/TargetInstrInfo.h"

38

#include "llvm/CodeGen/TargetRegisterInfo.h"

39

#include "llvm/CodeGen/ValueTypes.h"

40

#include "llvm/IR/BasicBlock.h"

41

#include "llvm/IR/DebugLoc.h"

42

#include "llvm/IR/Function.h"

43

#include "llvm/IR/GlobalValue.h"

44

#include "llvm/IR/InlineAsm.h"

45

#include "llvm/IR/InstrTypes.h"

46

#include "llvm/IR/IntrinsicsPowerPC.h"

47

#include "llvm/IR/Module.h"

48

#include "llvm/Support/Casting.h"

49

#include "llvm/Support/CodeGen.h"

50

#include "llvm/Support/CommandLine.h"

51

#include "llvm/Support/Compiler.h"

52

#include "llvm/Support/Debug.h"

53

#include "llvm/Support/ErrorHandling.h"

54

#include "llvm/Support/KnownBits.h"

55

#include "llvm/Support/MachineValueType.h"

56

#include "llvm/Support/MathExtras.h"

57

#include "llvm/Support/raw_ostream.h"

58

#include <algorithm>

59

#include <cassert>

60

#include <cstdint>

61

#include <iterator>

62

#include <limits>

63

#include <memory>

64

#include <new>

65

#include <tuple>

66

#include <utility>

67

68

using namespace llvm;

69

70

#define DEBUG_TYPE"ppc-codegen" "ppc-codegen"

71

72

STATISTIC(NumSextSetcc,static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
}

73

"Number of (sext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
};

74

STATISTIC(NumZextSetcc,static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
}

75

"Number of (zext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
};

76

STATISTIC(SignExtensionsAdded,static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."}

77

"Number of sign extensions for compare inputs added.")static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."};

78

STATISTIC(ZeroExtensionsAdded,static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."}

79

"Number of zero extensions for compare inputs added.")static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."};

80

STATISTIC(NumLogicOpsOnComparison,static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen"
, "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
}

81

"Number of logical ops on i1 values calculated in GPR.")static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen"
, "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
};

82

STATISTIC(OmittedForNonExtendUses,static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen"
, "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
}

83

"Number of compares not eliminated as they have non-extending uses.")static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen"
, "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
};

84

STATISTIC(NumP9Setb,static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb"
, "Number of compares lowered to setb."}

85

"Number of compares lowered to setb.")static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb"
, "Number of compares lowered to setb."};

86

87

// FIXME: Remove this once the bug has been fixed!

88

cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",

89

cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);

90

91

static cl::opt<bool>

92

UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),

93

cl::desc("use aggressive ppc isel for bit permutations"),

94

cl::Hidden);

95

static cl::opt<bool> BPermRewriterNoMasking(

96

"ppc-bit-perm-rewriter-stress-rotates",

97

cl::desc("stress rotate selection in aggressive ppc isel for "

98

"bit permutations"),

99

cl::Hidden);

100

101

static cl::opt<bool> EnableBranchHint(

102

"ppc-use-branch-hint", cl::init(true),

103

cl::desc("Enable static hinting of branches on ppc"),

104

cl::Hidden);

105

106

static cl::opt<bool> EnableTLSOpt(

107

"ppc-tls-opt", cl::init(true),

108

cl::desc("Enable tls optimization peephole"),

109

cl::Hidden);

110

111

enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,

112

ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,

113

ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };

114

115

static cl::opt<ICmpInGPRType> CmpInGPR(

116

"ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),

117

cl::desc("Specify the types of comparisons to emit GPR-only code for."),

118

cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons.")llvm::cl::OptionEnumValue { "none", int(ICGPR_None), "Do not modify integer comparisons."
},

119

clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs.")llvm::cl::OptionEnumValue { "all", int(ICGPR_All), "All possible int comparisons in GPRs."
},

120

clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i32", int(ICGPR_I32), "Only i32 comparisons in GPRs."
},

121

clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i64", int(ICGPR_I64), "Only i64 comparisons in GPRs."
},

122

clEnumValN(ICGPR_NonExtIn, "nonextin",llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
}

123

"Only comparisons where inputs don't need [sz]ext.")llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
},

124

clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result.")llvm::cl::OptionEnumValue { "zext", int(ICGPR_Zext), "Only comparisons with zext result."
},

125

clEnumValN(ICGPR_ZextI32, "zexti32",llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
}

126

"Only i32 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
},

127

clEnumValN(ICGPR_ZextI64, "zexti64",llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
}

128

"Only i64 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
},

129

clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result.")llvm::cl::OptionEnumValue { "sext", int(ICGPR_Sext), "Only comparisons with sext result."
},

130

clEnumValN(ICGPR_SextI32, "sexti32",llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
}

131

"Only i32 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
},

132

clEnumValN(ICGPR_SextI64, "sexti64",llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}

133

"Only i64 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}));

134

namespace {

135

136

//===--------------------------------------------------------------------===//

137

/// PPCDAGToDAGISel - PPC specific code to select PPC machine

138

/// instructions for SelectionDAG operations.

139

///

140

class PPCDAGToDAGISel : public SelectionDAGISel {

141

const PPCTargetMachine &TM;

142

const PPCSubtarget *Subtarget = nullptr;

143

const PPCTargetLowering *PPCLowering = nullptr;

144

unsigned GlobalBaseReg = 0;

145

146

public:

147

explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)

148

: SelectionDAGISel(tm, OptLevel), TM(tm) {}

149

150

bool runOnMachineFunction(MachineFunction &MF) override {

151

// Make sure we re-emit a set of the global base reg if necessary

152

GlobalBaseReg = 0;

153

Subtarget = &MF.getSubtarget<PPCSubtarget>();

154

PPCLowering = Subtarget->getTargetLowering();

155

if (Subtarget->hasROPProtect()) {

156

// Create a place on the stack for the ROP Protection Hash.

157

// The ROP Protection Hash will always be 8 bytes and aligned to 8

158

// bytes.

159

MachineFrameInfo &MFI = MF.getFrameInfo();

160

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

161

const int Result = MFI.CreateStackObject(8, Align(8), false);

162

FI->setROPProtectionHashSaveIndex(Result);

163

}

164

SelectionDAGISel::runOnMachineFunction(MF);

165

166

return true;

167

}

168

169

void PreprocessISelDAG() override;

170

void PostprocessISelDAG() override;

171

172

/// getI16Imm - Return a target constant with the specified value, of type

173

/// i16.

174

inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {

175

return CurDAG->getTargetConstant(Imm, dl, MVT::i16);

176

}

177

178

/// getI32Imm - Return a target constant with the specified value, of type

179

/// i32.

180

inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {

181

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

182

}

183

184

/// getI64Imm - Return a target constant with the specified value, of type

185

/// i64.

186

inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {

187

return CurDAG->getTargetConstant(Imm, dl, MVT::i64);

188

}

189

190

/// getSmallIPtrImm - Return a target constant of pointer type.

191

inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {

192

return CurDAG->getTargetConstant(

193

Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));

194

}

195

196

/// isRotateAndMask - Returns true if Mask and Shift can be folded into a

197

/// rotate and mask opcode and mask operation.

198

static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,

199

unsigned &SH, unsigned &MB, unsigned &ME);

200

201

/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC

202

/// base register. Return the virtual register that holds this value.

203

SDNode *getGlobalBaseReg();

204

205

void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);

206

207

// Select - Convert the specified operand from a target-independent to a

208

// target-specific node if it hasn't already been changed.

209

void Select(SDNode *N) override;

210

211

bool tryBitfieldInsert(SDNode *N);

212

bool tryBitPermutation(SDNode *N);

213

bool tryIntCompareInGPR(SDNode *N);

214

215

// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into

216

// an X-Form load instruction with the offset being a relocation coming from

217

// the PPCISD::ADD_TLS.

218

bool tryTLSXFormLoad(LoadSDNode *N);

219

// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into

220

// an X-Form store instruction with the offset being a relocation coming from

221

// the PPCISD::ADD_TLS.

222

bool tryTLSXFormStore(StoreSDNode *N);

223

/// SelectCC - Select a comparison of the specified values with the

224

/// specified condition code, returning the CR# of the expression.

225

SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,

226

const SDLoc &dl, SDValue Chain = SDValue());

227

228

/// SelectAddrImmOffs - Return true if the operand is valid for a preinc

229

/// immediate field. Note that the operand at this point is already the

230

/// result of a prior SelectAddressRegImm call.

231

bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {

232

if (N.getOpcode() == ISD::TargetConstant ||

233

N.getOpcode() == ISD::TargetGlobalAddress) {

234

Out = N;

235

return true;

236

}

237

238

return false;

239

}

240

241

/// SelectDSForm - Returns true if address N can be represented by the

242

/// addressing mode of DSForm instructions (a base register, plus a signed

243

/// 16-bit displacement that is a multiple of 4.

244

bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

245

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

246

Align(4)) == PPC::AM_DSForm;

247

}

248

249

/// SelectDQForm - Returns true if address N can be represented by the

250

/// addressing mode of DQForm instructions (a base register, plus a signed

251

/// 16-bit displacement that is a multiple of 16.

252

bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

253

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

254

Align(16)) == PPC::AM_DQForm;

255

}

256

257

/// SelectDForm - Returns true if address N can be represented by

258

/// the addressing mode of DForm instructions (a base register, plus a

259

/// signed 16-bit immediate.

260

bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

261

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

262

None) == PPC::AM_DForm;

263

}

264

265

/// SelectXForm - Returns true if address N can be represented by the

266

/// addressing mode of XForm instructions (an indexed [r+r] operation).

267

bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

268

return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,

269

None) == PPC::AM_XForm;

270

}

271

272

/// SelectForceXForm - Given the specified address, force it to be

273

/// represented as an indexed [r+r] operation (an XForm instruction).

274

bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,

275

SDValue &Base) {

276

return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==

277

PPC::AM_XForm;

278

}

279

280

/// SelectAddrIdx - Given the specified address, check to see if it can be

281

/// represented as an indexed [r+r] operation.

282

/// This is for xform instructions whose associated displacement form is D.

283

/// The last parameter \p 0 means associated D form has no requirment for 16

284

/// bit signed displacement.

285

/// Returns false if it can be represented by [r+imm], which are preferred.

286

bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {

287

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);

288

}

289

290

/// SelectAddrIdx4 - Given the specified address, check to see if it can be

291

/// represented as an indexed [r+r] operation.

292

/// This is for xform instructions whose associated displacement form is DS.

293

/// The last parameter \p 4 means associated DS form 16 bit signed

294

/// displacement must be a multiple of 4.

295

/// Returns false if it can be represented by [r+imm], which are preferred.

296

bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {

297

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,

298

Align(4));

299

}

300

301

/// SelectAddrIdx16 - Given the specified address, check to see if it can be

302

/// represented as an indexed [r+r] operation.

303

/// This is for xform instructions whose associated displacement form is DQ.

304

/// The last parameter \p 16 means associated DQ form 16 bit signed

305

/// displacement must be a multiple of 16.

306

/// Returns false if it can be represented by [r+imm], which are preferred.

307

bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {

308

return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,

309

Align(16));

310

}

311

312

/// SelectAddrIdxOnly - Given the specified address, force it to be

313

/// represented as an indexed [r+r] operation.

314

bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {

315

return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);

316

}

317

318

/// SelectAddrImm - Returns true if the address N can be represented by

319

/// a base register plus a signed 16-bit displacement [r+imm].

320

/// The last parameter \p 0 means D form has no requirment for 16 bit signed

321

/// displacement.

322

bool SelectAddrImm(SDValue N, SDValue &Disp,

323

SDValue &Base) {

324

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);

325

}

326

327

/// SelectAddrImmX4 - Returns true if the address N can be represented by

328

/// a base register plus a signed 16-bit displacement that is a multiple of

329

/// 4 (last parameter). Suitable for use by STD and friends.

330

bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {

331

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));

332

}

333

334

/// SelectAddrImmX16 - Returns true if the address N can be represented by

335

/// a base register plus a signed 16-bit displacement that is a multiple of

336

/// 16(last parameter). Suitable for use by STXV and friends.

337

bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {

338

return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,

339

Align(16));

340

}

341

342

/// SelectAddrImmX34 - Returns true if the address N can be represented by

343

/// a base register plus a signed 34-bit displacement. Suitable for use by

344

/// PSTXVP and friends.

345

bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {

346

return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);

347

}

348

349

// Select an address into a single register.

350

bool SelectAddr(SDValue N, SDValue &Base) {

351

Base = N;

352

return true;

353

}

354

355

bool SelectAddrPCRel(SDValue N, SDValue &Base) {

356

return PPCLowering->SelectAddressPCRel(N, Base);

357

}

358

359

/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for

360

/// inline asm expressions. It is always correct to compute the value into

361

/// a register. The case of adding a (possibly relocatable) constant to a

362

/// register can be improved, but it is wrong to substitute Reg+Reg for

363

/// Reg in an asm, because the load or store opcode would have to change.

364

bool SelectInlineAsmMemoryOperand(const SDValue &Op,

365

unsigned ConstraintID,

366

std::vector<SDValue> &OutOps) override {

367

switch(ConstraintID) {

368

default:

369

errs() << "ConstraintID: " << ConstraintID << "\n";

370

llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 370);

371

case InlineAsm::Constraint_es:

372

case InlineAsm::Constraint_m:

373

case InlineAsm::Constraint_o:

374

case InlineAsm::Constraint_Q:

375

case InlineAsm::Constraint_Z:

376

case InlineAsm::Constraint_Zy:

377

// We need to make sure that this one operand does not end up in r0

378

// (because we might end up lowering this as 0(%op)).

379

const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();

380

const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);

381

SDLoc dl(Op);

382

SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);

383

SDValue NewOp =

384

SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,

385

dl, Op.getValueType(),

386

Op, RC), 0);

387

388

OutOps.push_back(NewOp);

389

return false;

390

}

391

return true;

392

}

393

394

StringRef getPassName() const override {

395

return "PowerPC DAG->DAG Pattern Instruction Selection";

396

}

397

398

// Include the pieces autogenerated from the target description.

399

#include "PPCGenDAGISel.inc"

400

401

private:

402

bool trySETCC(SDNode *N);

403

bool tryFoldSWTestBRCC(SDNode *N);

404

bool tryAsSingleRLDICL(SDNode *N);

405

bool tryAsSingleRLDICR(SDNode *N);

406

bool tryAsSingleRLWINM(SDNode *N);

407

bool tryAsSingleRLWINM8(SDNode *N);

408

bool tryAsSingleRLWIMI(SDNode *N);

409

bool tryAsPairOfRLDICL(SDNode *N);

410

bool tryAsSingleRLDIMI(SDNode *N);

411

412

void PeepholePPC64();

413

void PeepholePPC64ZExt();

414

void PeepholeCROps();

415

416

SDValue combineToCMPB(SDNode *N);

417

void foldBoolExts(SDValue &Res, SDNode *&N);

418

419

bool AllUsersSelectZero(SDNode *N);

420

void SwapAllSelectUsers(SDNode *N);

421

422

bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;

423

void transferMemOperands(SDNode *N, SDNode *Result);

424

};

425

426

} // end anonymous namespace

427

428

/// getGlobalBaseReg - Output the instructions required to put the

429

/// base address to use for accessing globals into a register.

430

///

431

SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {

432

if (!GlobalBaseReg) {

433

const TargetInstrInfo &TII = *Subtarget->getInstrInfo();

434

// Insert the set of GlobalBaseReg into the first MBB of the function

435

MachineBasicBlock &FirstMBB = MF->front();

436

MachineBasicBlock::iterator MBBI = FirstMBB.begin();

437

const Module *M = MF->getFunction().getParent();

438

DebugLoc dl;

439

440

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {

441

if (Subtarget->isTargetELF()) {

442

GlobalBaseReg = PPC::R30;

443

if (!Subtarget->isSecurePlt() &&

444

M->getPICLevel() == PICLevel::SmallPIC) {

445

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));

446

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

447

MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);

448

} else {

449

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));

450

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

451

Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);

452

BuildMI(FirstMBB, MBBI, dl,

453

TII.get(PPC::UpdateGBR), GlobalBaseReg)

454

.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);

455

MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);

456

}

457

} else {

458

GlobalBaseReg =

459

RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);

460

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));

461

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);

462

}

463

} else {

464

// We must ensure that this sequence is dominated by the prologue.

465

// FIXME: This is a bit of a big hammer since we don't get the benefits

466

// of shrink-wrapping whenever we emit this instruction. Considering

467

// this is used in any function where we emit a jump table, this may be

468

// a significant limitation. We should consider inserting this in the

469

// block where it is used and then commoning this sequence up if it

470

// appears in multiple places.

471

// Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of

472

// MovePCtoLR8.

473

MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);

474

GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);

475

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));

476

BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);

477

}

478

}

479

return CurDAG->getRegister(GlobalBaseReg,

480

PPCLowering->getPointerTy(CurDAG->getDataLayout()))

481

.getNode();

482

}

483

484

// Check if a SDValue has the toc-data attribute.

485

static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {

486

GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);

487

if (!GA)

488

return false;

489

490

const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());

491

if (!GV)

492

return false;

493

494

if (!GV->hasAttribute("toc-data"))

495

return false;

496

497

// TODO: These asserts should be updated as more support for the toc data

498

// transformation is added (64 bit, struct support, etc.).

499

500

assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
"the toc data transformation.") ? void (0) : __assert_fail (
"PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 501, __extension__ __PRETTY_FUNCTION__))

501

"the toc data transformation.")(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
"the toc data transformation.") ? void (0) : __assert_fail (
"PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 501, __extension__ __PRETTY_FUNCTION__));

502

503

assert(PointerSize >= GV->getAlign().valueOrOne().value() &&(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes "
"not supported by the toc data transformation.") ? void (0) :
__assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 505, __extension__ __PRETTY_FUNCTION__))

504

"GlobalVariables with an alignment requirement stricter then 4-bytes "(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes "
"not supported by the toc data transformation.") ? void (0) :
__assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 505, __extension__ __PRETTY_FUNCTION__))

505

"not supported by the toc data transformation.")(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes "
"not supported by the toc data transformation.") ? void (0) :
__assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 505, __extension__ __PRETTY_FUNCTION__));

506

507

Type *GVType = GV->getValueType();

508

509

assert(GVType->isSized() && "A GlobalVariable's size must be known to be "(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 510, __extension__ __PRETTY_FUNCTION__))

510

"supported by the toc data transformation.")(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 510, __extension__ __PRETTY_FUNCTION__));

511

512

if (GVType->isVectorTy())

513

report_fatal_error("A GlobalVariable of Vector type is not currently "

514

"supported by the toc data transformation.");

515

516

if (GVType->isArrayTy())

517

report_fatal_error("A GlobalVariable of Array type is not currently "

518

"supported by the toc data transformation.");

519

520

if (GVType->isStructTy())

521

report_fatal_error("A GlobalVariable of Struct type is not currently "

522

"supported by the toc data transformation.");

523

524

assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 526, __extension__ __PRETTY_FUNCTION__))

525

"A GlobalVariable with size larger than 32 bits is not currently "(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 526, __extension__ __PRETTY_FUNCTION__))

526

"supported by the toc data transformation.")(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 526, __extension__ __PRETTY_FUNCTION__));

527

528

if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())

529

report_fatal_error("A GlobalVariable with private or local linkage is not "

530

"currently supported by the toc data transformation.");

531

532

assert(!GV->hasCommonLinkage() &&(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))

533

"Tentative definitions cannot have the mapping class XMC_TD.")(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 533, __extension__ __PRETTY_FUNCTION__));

534

535

return true;

536

}

537

538

/// isInt32Immediate - This method tests to see if the node is a 32-bit constant

539

/// operand. If so Imm will receive the 32-bit value.

540

static bool isInt32Immediate(SDNode *N, unsigned &Imm) {

541

if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {

542

Imm = cast<ConstantSDNode>(N)->getZExtValue();

543

return true;

544

}

545

return false;

546

}

547

548

/// isInt64Immediate - This method tests to see if the node is a 64-bit constant

549

/// operand. If so Imm will receive the 64-bit value.

550

static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {

551

if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {

552

Imm = cast<ConstantSDNode>(N)->getZExtValue();

553

return true;

554

}

555

return false;

556

}

557

558

// isInt32Immediate - This method tests to see if a constant operand.

559

// If so Imm will receive the 32 bit value.

560

static bool isInt32Immediate(SDValue N, unsigned &Imm) {

561

return isInt32Immediate(N.getNode(), Imm);

562

}

563

564

/// isInt64Immediate - This method tests to see if the value is a 64-bit

565

/// constant operand. If so Imm will receive the 64-bit value.

566

static bool isInt64Immediate(SDValue N, uint64_t &Imm) {

567

return isInt64Immediate(N.getNode(), Imm);

568

}

569

570

static unsigned getBranchHint(unsigned PCC,

571

const FunctionLoweringInfo &FuncInfo,

572

const SDValue &DestMBB) {

573

assert(isa<BasicBlockSDNode>(DestMBB))(static_cast <bool> (isa<BasicBlockSDNode>(DestMBB
)) ? void (0) : __assert_fail ("isa<BasicBlockSDNode>(DestMBB)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 573, __extension__ __PRETTY_FUNCTION__));

574

575

if (!FuncInfo.BPI) return PPC::BR_NO_HINT;

576

577

const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();

578

const Instruction *BBTerm = BB->getTerminator();

579

580

if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;

581

582

const BasicBlock *TBB = BBTerm->getSuccessor(0);

583

const BasicBlock *FBB = BBTerm->getSuccessor(1);

584

585

auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);

586

auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);

587

588

// We only want to handle cases which are easy to predict at static time, e.g.

589

// C++ throw statement, that is very likely not taken, or calling never

590

// returned function, e.g. stdlib exit(). So we set Threshold to filter

591

// unwanted cases.

592

//

593

// Below is LLVM branch weight table, we only want to handle case 1, 2

594

//

595

// Case Taken:Nontaken Example

596

// 1. Unreachable 1048575:1 C++ throw, stdlib exit(),

597

// 2. Invoke-terminating 1:1048575

598

// 3. Coldblock 4:64 __builtin_expect

599

// 4. Loop Branch 124:4 For loop

600

// 5. PH/ZH/FPH 20:12

601

const uint32_t Threshold = 10000;

602

603

if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))

604

return PPC::BR_NO_HINT;

605

606

LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

607

<< "::" << BB->getName() << "'\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

608

<< " -> " << TBB->getName() << ": " << TProb << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)

609

<< " -> " << FBB->getName() << ": " << FProb << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false);

610

611

const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);

612

613

// If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,

614

// because we want 'TProb' stands for 'branch probability' to Dest BasicBlock

615

if (BBDN->getBasicBlock()->getBasicBlock() != TBB)

616

std::swap(TProb, FProb);

617

618

return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;

619

}

620

621

// isOpcWithIntImmediate - This method tests to see if the node is a specific

622

// opcode and that it has a immediate integer right operand.

623

// If so Imm will receive the 32 bit value.

624

static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {

625

return N->getOpcode() == Opc

626

&& isInt32Immediate(N->getOperand(1).getNode(), Imm);

627

}

628

629

void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {

630

SDLoc dl(SN);

631

int FI = cast<FrameIndexSDNode>(N)->getIndex();

632

SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));

633

unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;

634

if (SN->hasOneUse())

635

CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,

636

getSmallIPtrImm(Offset, dl));

637

else

638

ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,

639

getSmallIPtrImm(Offset, dl)));

640

}

641

642

bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,

643

bool isShiftMask, unsigned &SH,

644

unsigned &MB, unsigned &ME) {

645

// Don't even go down this path for i64, since different logic will be

646

// necessary for rldicl/rldicr/rldimi.

647

if (N->getValueType(0) != MVT::i32)

648

return false;

649

650

unsigned Shift = 32;

651

unsigned Indeterminant = ~0; // bit mask marking indeterminant results

652

unsigned Opcode = N->getOpcode();

653

if (N->getNumOperands() != 2 ||

654

!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))

655

return false;

656

657

if (Opcode == ISD::SHL) {

658

// apply shift left to mask if it comes first

659

if (isShiftMask) Mask = Mask << Shift;

660

// determine which bits are made indeterminant by shift

661

Indeterminant = ~(0xFFFFFFFFu << Shift);

662

} else if (Opcode == ISD::SRL) {

663

// apply shift right to mask if it comes first

664

if (isShiftMask) Mask = Mask >> Shift;

665

// determine which bits are made indeterminant by shift

666

Indeterminant = ~(0xFFFFFFFFu >> Shift);

667

// adjust for the left rotate

668

Shift = 32 - Shift;

669

} else if (Opcode == ISD::ROTL) {

670

Indeterminant = 0;

671

} else {

672

return false;

673

}

674

675

// if the mask doesn't intersect any Indeterminant bits

676

if (Mask && !(Mask & Indeterminant)) {

677

SH = Shift & 31;

678

// make sure the mask is still a mask (wrap arounds may not be)

679

return isRunOfOnes(Mask, MB, ME);

680

}

681

return false;

682

}

683

684

bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {

685

SDValue Base = ST->getBasePtr();

686

if (Base.getOpcode() != PPCISD::ADD_TLS)

687

return false;

688

SDValue Offset = ST->getOffset();

689

if (!Offset.isUndef())

690

return false;

691

if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)

692

return false;

693

694

SDLoc dl(ST);

695

EVT MemVT = ST->getMemoryVT();

696

EVT RegVT = ST->getValue().getValueType();

697

698

unsigned Opcode;

699

switch (MemVT.getSimpleVT().SimpleTy) {

700

default:

701

return false;

702

case MVT::i8: {

703

Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;

704

break;

705

}

706

case MVT::i16: {

707

Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;

708

break;

709

}

710

case MVT::i32: {

711

Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;

712

break;

713

}

714

case MVT::i64: {

715

Opcode = PPC::STDXTLS;

716

break;

717

}

718

}

719

SDValue Chain = ST->getChain();

720

SDVTList VTs = ST->getVTList();

721

SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),

722

Chain};

723

SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);

724

transferMemOperands(ST, MN);

725

ReplaceNode(ST, MN);

726

return true;

727

}

728

729

bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {

730

SDValue Base = LD->getBasePtr();

731

if (Base.getOpcode() != PPCISD::ADD_TLS)

732

return false;

733

SDValue Offset = LD->getOffset();

734

if (!Offset.isUndef())

735

return false;

736

if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)

737

return false;

738

739

SDLoc dl(LD);

740

EVT MemVT = LD->getMemoryVT();

741

EVT RegVT = LD->getValueType(0);

742

unsigned Opcode;

743

switch (MemVT.getSimpleVT().SimpleTy) {

744

default:

745

return false;

746

case MVT::i8: {

747

Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;

748

break;

749

}

750

case MVT::i16: {

751

Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;

752

break;

753

}

754

case MVT::i32: {

755

Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;

756

break;

757

}

758

case MVT::i64: {

759

Opcode = PPC::LDXTLS;

760

break;

761

}

762

}

763

SDValue Chain = LD->getChain();

764

SDVTList VTs = LD->getVTList();

765

SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};

766

SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);

767

transferMemOperands(LD, MN);

768

ReplaceNode(LD, MN);

769

return true;

770

}

771

772

/// Turn an or of two masked values into the rotate left word immediate then

773

/// mask insert (rlwimi) instruction.

774

bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {

775

SDValue Op0 = N->getOperand(0);

776

SDValue Op1 = N->getOperand(1);

777

SDLoc dl(N);

778

779

KnownBits LKnown = CurDAG->computeKnownBits(Op0);

780

KnownBits RKnown = CurDAG->computeKnownBits(Op1);

781

782

unsigned TargetMask = LKnown.Zero.getZExtValue();

783

unsigned InsertMask = RKnown.Zero.getZExtValue();

784

785

if ((TargetMask | InsertMask) == 0xFFFFFFFF) {

786

unsigned Op0Opc = Op0.getOpcode();

787

unsigned Op1Opc = Op1.getOpcode();

788

unsigned Value, SH = 0;

789

TargetMask = ~TargetMask;

790

InsertMask = ~InsertMask;

791

792

// If the LHS has a foldable shift and the RHS does not, then swap it to the

793

// RHS so that we can fold the shift into the insert.

794

if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {

795

if (Op0.getOperand(0).getOpcode() == ISD::SHL ||

796

Op0.getOperand(0).getOpcode() == ISD::SRL) {

797

if (Op1.getOperand(0).getOpcode() != ISD::SHL &&

798

Op1.getOperand(0).getOpcode() != ISD::SRL) {

799

std::swap(Op0, Op1);

800

std::swap(Op0Opc, Op1Opc);

801

std::swap(TargetMask, InsertMask);

802

}

803

}

804

} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {

805

if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&

806

Op1.getOperand(0).getOpcode() != ISD::SRL) {

807

std::swap(Op0, Op1);

808

std::swap(Op0Opc, Op1Opc);

809

std::swap(TargetMask, InsertMask);

810

}

811

}

812

813

unsigned MB, ME;

814

if (isRunOfOnes(InsertMask, MB, ME)) {

815

if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&

816

isInt32Immediate(Op1.getOperand(1), Value)) {

817

Op1 = Op1.getOperand(0);

818

SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;

819

}

820

if (Op1Opc == ISD::AND) {

821

// The AND mask might not be a constant, and we need to make sure that

822

// if we're going to fold the masking with the insert, all bits not

823

// know to be zero in the mask are known to be one.

824

KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));

825

bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();

826

827

unsigned SHOpc = Op1.getOperand(0).getOpcode();

828

if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&

829

isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {

830

// Note that Value must be in range here (less than 32) because

831

// otherwise there would not be any bits set in InsertMask.

832

Op1 = Op1.getOperand(0).getOperand(0);

833

SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;

834

}

835

}

836

837

SH &= 31;

838

SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),

839

getI32Imm(ME, dl) };

840

ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));

841

return true;

842

}

843

}

844

return false;

845

}

846

847

static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {

848

unsigned MaxTruncation = 0;

849

// Cannot use range-based for loop here as we need the actual use (i.e. we

850

// need the operand number corresponding to the use). A range-based for

851

// will unbox the use and provide an SDNode*.

852

for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();

853

Use != UseEnd; ++Use) {

854

unsigned Opc =

855

Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();

856

switch (Opc) {

857

default: return 0;

858

case ISD::TRUNCATE:

859

if (Use->isMachineOpcode())

860

return 0;

861

MaxTruncation =

862

std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());

863

continue;

864

case ISD::STORE: {

865

if (Use->isMachineOpcode())

866

return 0;

867

StoreSDNode *STN = cast<StoreSDNode>(*Use);

868

unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();

869

if (MemVTSize == 64 || Use.getOperandNo() != 0)

870

return 0;

871

MaxTruncation = std::max(MaxTruncation, MemVTSize);

872

continue;

873

}

874

case PPC::STW8:

875

case PPC::STWX8:

876

case PPC::STWU8:

877

case PPC::STWUX8:

878

if (Use.getOperandNo() != 0)

879

return 0;

880

MaxTruncation = std::max(MaxTruncation, 32u);

881

continue;

882

case PPC::STH8:

883

case PPC::STHX8:

884

case PPC::STHU8:

885

case PPC::STHUX8:

886

if (Use.getOperandNo() != 0)

887

return 0;

888

MaxTruncation = std::max(MaxTruncation, 16u);

889

continue;

890

case PPC::STB8:

891

case PPC::STBX8:

892

case PPC::STBU8:

893

case PPC::STBUX8:

894

if (Use.getOperandNo() != 0)

895

return 0;

896

MaxTruncation = std::max(MaxTruncation, 8u);

897

continue;

898

}

899

}

900

return MaxTruncation;

901

}

902

903

// For any 32 < Num < 64, check if the Imm contains at least Num consecutive

904

// zeros and return the number of bits by the left of these consecutive zeros.

905

static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {

906

unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));

907

unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));

908

if ((HiTZ + LoLZ) >= Num)

909

return (32 + HiTZ);

910

return 0;

911

}

912

913

// Direct materialization of 64-bit constants by enumerated patterns.

914

static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,

915

uint64_t Imm, unsigned &InstCnt) {

916

unsigned TZ = countTrailingZeros<uint64_t>(Imm);

917

unsigned LZ = countLeadingZeros<uint64_t>(Imm);

918

unsigned TO = countTrailingOnes<uint64_t>(Imm);

919

unsigned LO = countLeadingOnes<uint64_t>(Imm);

920

unsigned Hi32 = Hi_32(Imm);

921

unsigned Lo32 = Lo_32(Imm);

922

SDNode *Result = nullptr;

923

unsigned Shift = 0;

924

925

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

926

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

927

};

928

929

// Following patterns use 1 instructions to materialize the Imm.

930

InstCnt = 1;

931

// 1-1) Patterns : {zeros}{15-bit valve}

932

// {ones}{15-bit valve}

933

if (isInt<16>(Imm)) {

934

SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);

935

return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);

936

}

937

// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}

938

// {ones}{15-bit valve}{16 zeros}

939

if (TZ > 15 && (LZ > 32 || LO > 32))

940

return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

941

getI32Imm((Imm >> 16) & 0xffff));

942

943

// Following patterns use 2 instructions to materialize the Imm.

944

InstCnt = 2;

945

assert(LZ < 64 && "Unexpected leading zeros here.")(static_cast <bool> (LZ < 64 && "Unexpected leading zeros here."
) ? void (0) : __assert_fail ("LZ < 64 && \"Unexpected leading zeros here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 945, __extension__ __PRETTY_FUNCTION__));

946

// Count of ones follwing the leading zeros.

947

unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);

948

// 2-1) Patterns : {zeros}{31-bit value}

949

// {ones}{31-bit value}

950

if (isInt<32>(Imm)) {

951

uint64_t ImmHi16 = (Imm >> 16) & 0xffff;

952

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

953

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

954

return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

955

getI32Imm(Imm & 0xffff));

956

}

957

// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}

958

// {zeros}{15-bit value}{zeros}

959

// {zeros}{ones}{15-bit value}

960

// {ones}{15-bit value}{zeros}

961

// We can take advantage of LI's sign-extension semantics to generate leading

962

// ones, and then use RLDIC to mask off the ones in both sides after rotation.

963

if ((LZ + FO + TZ) > 48) {

964

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

965

getI32Imm((Imm >> TZ) & 0xffff));

966

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

967

getI32Imm(TZ), getI32Imm(LZ));

968

}

969

// 2-3) Pattern : {zeros}{15-bit value}{ones}

970

// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,

971

// therefore we can take advantage of LI's sign-extension semantics, and then

972

// mask them off after rotation.

973

//

974

// +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+

975

// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|

976

// +------------------------+ +------------------------+

977

// 63 0 63 0

978

// Imm (Imm >> (48 - LZ) & 0xffff)

979

// +----sext-----|--16-bit--+ +clear-|-----------------+

980

// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|

981

// +------------------------+ +------------------------+

982

// 63 0 63 0

983

// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ

984

if ((LZ + TO) > 48) {

985

// Since the immediates with (LZ > 32) have been handled by previous

986

// patterns, here we have (LZ <= 32) to make sure we will not shift right

987

// the Imm by a negative value.

988

assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 988, __extension__ __PRETTY_FUNCTION__));

989

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

990

getI32Imm((Imm >> (48 - LZ) & 0xffff)));

991

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

992

getI32Imm(48 - LZ), getI32Imm(LZ));

993

}

994

// 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}

995

// {ones}{15-bit value}{ones}

996

// We can take advantage of LI's sign-extension semantics to generate leading

997

// ones, and then use RLDICL to mask off the ones in left sides (if required)

998

// after rotation.

999

//

1000

// +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+

1001

// |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|

1002

// +------------------------+ +------------------------+

1003

// 63 0 63 0

1004

// Imm (Imm >> TO) & 0xffff

1005

// +----sext-----|--16-bit--+ +LZ|---------------------+

1006

// |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|

1007

// +------------------------+ +------------------------+

1008

// 63 0 63 0

1009

// LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ

1010

if ((LZ + FO + TO) > 48) {

1011

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1012

getI32Imm((Imm >> TO) & 0xffff));

1013

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1014

getI32Imm(TO), getI32Imm(LZ));

1015

}

1016

// 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}

1017

// If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit

1018

// value, we can use LI for Lo16 without generating leading ones then add the

1019

// Hi16(in Lo32).

1020

if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {

1021

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1022

getI32Imm(Lo32 & 0xffff));

1023

return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),

1024

getI32Imm(Lo32 >> 16));

1025

}

1026

// 2-6) Patterns : {******}{49 zeros}{******}

1027

// {******}{49 ones}{******}

1028

// If the Imm contains 49 consecutive zeros/ones, it means that a total of 15

1029

// bits remain on both sides. Rotate right the Imm to construct an int<16>

1030

// value, use LI for int<16> value and then use RLDICL without mask to rotate

1031

// it back.

1032

//

1033

// 1) findContiguousZerosAtLeast(Imm, 49)

1034

// +------|--zeros-|------+ +---ones--||---15 bit--+

1035

// |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|

1036

// +----------------------+ +----------------------+

1037

// 63 0 63 0

1038

//

1039

// 2) findContiguousZerosAtLeast(~Imm, 49)

1040

// +------|--ones--|------+ +---ones--||---15 bit--+

1041

// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|

1042

// +----------------------+ +----------------------+

1043

// 63 0 63 0

1044

if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||

1045

(Shift = findContiguousZerosAtLeast(~Imm, 49))) {

1046

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1047

Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,

1048

getI32Imm(RotImm & 0xffff));

1049

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1050

getI32Imm(Shift), getI32Imm(0));

1051

}

1052

1053

// Following patterns use 3 instructions to materialize the Imm.

1054

InstCnt = 3;

1055

// 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}

1056

// {zeros}{31-bit value}{zeros}

1057

// {zeros}{ones}{31-bit value}

1058

// {ones}{31-bit value}{zeros}

1059

// We can take advantage of LIS's sign-extension semantics to generate leading

1060

// ones, add the remaining bits with ORI, and then use RLDIC to mask off the

1061

// ones in both sides after rotation.

1062

if ((LZ + FO + TZ) > 32) {

1063

uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;

1064

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1065

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1066

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1067

getI32Imm((Imm >> TZ) & 0xffff));

1068

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

1069

getI32Imm(TZ), getI32Imm(LZ));

1070

}

1071

// 3-2) Pattern : {zeros}{31-bit value}{ones}

1072

// Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,

1073

// therefore we can take advantage of LIS's sign-extension semantics, add

1074

// the remaining bits with ORI, and then mask them off after rotation.

1075

// This is similar to Pattern 2-3, please refer to the diagram there.

1076

if ((LZ + TO) > 32) {

1077

// Since the immediates with (LZ > 32) have been handled by previous

1078

// patterns, here we have (LZ <= 32) to make sure we will not shift right

1079

// the Imm by a negative value.

1080

assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1080, __extension__ __PRETTY_FUNCTION__));

1081

Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

1082

getI32Imm((Imm >> (48 - LZ)) & 0xffff));

1083

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1084

getI32Imm((Imm >> (32 - LZ)) & 0xffff));

1085

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1086

getI32Imm(32 - LZ), getI32Imm(LZ));

1087

}

1088

// 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}

1089

// {ones}{31-bit value}{ones}

1090

// We can take advantage of LIS's sign-extension semantics to generate leading

1091

// ones, add the remaining bits with ORI, and then use RLDICL to mask off the

1092

// ones in left sides (if required) after rotation.

1093

// This is similar to Pattern 2-4, please refer to the diagram there.

1094

if ((LZ + FO + TO) > 32) {

1095

Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,

1096

getI32Imm((Imm >> (TO + 16)) & 0xffff));

1097

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1098

getI32Imm((Imm >> TO) & 0xffff));

1099

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1100

getI32Imm(TO), getI32Imm(LZ));

1101

}

1102

// 3-4) Patterns : High word == Low word

1103

if (Hi32 == Lo32) {

1104

// Handle the first 32 bits.

1105

uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;

1106

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1107

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1108

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1109

getI32Imm(Lo32 & 0xffff));

1110

// Use rldimi to insert the Low word into High word.

1111

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),

1112

getI32Imm(0)};

1113

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1114

}

1115

// 3-5) Patterns : {******}{33 zeros}{******}

1116

// {******}{33 ones}{******}

1117

// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31

1118

// bits remain on both sides. Rotate right the Imm to construct an int<32>

1119

// value, use LIS + ORI for int<32> value and then use RLDICL without mask to

1120

// rotate it back.

1121

// This is similar to Pattern 2-6, please refer to the diagram there.

1122

if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||

1123

(Shift = findContiguousZerosAtLeast(~Imm, 33))) {

1124

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1125

uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;

1126

unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;

1127

Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));

1128

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1129

getI32Imm(RotImm & 0xffff));

1130

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1131

getI32Imm(Shift), getI32Imm(0));

1132

}

1133

1134

InstCnt = 0;

1135

return nullptr;

1136

}

1137

1138

// Try to select instructions to generate a 64 bit immediate using prefix as

1139

// well as non prefix instructions. The function will return the SDNode

1140

// to materialize that constant or it will return nullptr if it does not

1141

// find one. The variable InstCnt is set to the number of instructions that

1142

// were selected.

1143

static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,

1144

uint64_t Imm, unsigned &InstCnt) {

1145

unsigned TZ = countTrailingZeros<uint64_t>(Imm);

1146

unsigned LZ = countLeadingZeros<uint64_t>(Imm);

1147

unsigned TO = countTrailingOnes<uint64_t>(Imm);

1148

unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));

1149

unsigned Hi32 = Hi_32(Imm);

1150

unsigned Lo32 = Lo_32(Imm);

1151

1152

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

1153

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

1154

};

1155

1156

auto getI64Imm = [CurDAG, dl](uint64_t Imm) {

1157

return CurDAG->getTargetConstant(Imm, dl, MVT::i64);

1158

};

1159

1160

// Following patterns use 1 instruction to materialize Imm.

1161

InstCnt = 1;

1162

1163

// The pli instruction can materialize up to 34 bits directly.

1164

// If a constant fits within 34-bits, emit the pli instruction here directly.

1165

if (isInt<34>(Imm))

1166

return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1167

CurDAG->getTargetConstant(Imm, dl, MVT::i64));

1168

1169

// Require at least two instructions.

1170

InstCnt = 2;

1171

SDNode *Result = nullptr;

1172

// Patterns : {zeros}{ones}{33-bit value}{zeros}

1173

// {zeros}{33-bit value}{zeros}

1174

// {zeros}{ones}{33-bit value}

1175

// {ones}{33-bit value}{zeros}

1176

// We can take advantage of PLI's sign-extension semantics to generate leading

1177

// ones, and then use RLDIC to mask off the ones on both sides after rotation.

1178

if ((LZ + FO + TZ) > 30) {

1179

APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);

1180

APInt Extended = SignedInt34.sext(64);

1181

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1182

getI64Imm(*Extended.getRawData()));

1183

return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),

1184

getI32Imm(TZ), getI32Imm(LZ));

1185

}

1186

// Pattern : {zeros}{33-bit value}{ones}

1187

// Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,

1188

// therefore we can take advantage of PLI's sign-extension semantics, and then

1189

// mask them off after rotation.

1190

//

1191

// +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+

1192

// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|

1193

// +------------------------+ +------------------------+

1194

// 63 0 63 0

1195

//

1196

// +----sext-----|--34-bit--+ +clear-|-----------------+

1197

// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|

1198

// +------------------------+ +------------------------+

1199

// 63 0 63 0

1200

if ((LZ + TO) > 30) {

1201

APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);

1202

APInt Extended = SignedInt34.sext(64);

1203

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1204

getI64Imm(*Extended.getRawData()));

1205

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1206

getI32Imm(30 - LZ), getI32Imm(LZ));

1207

}

1208

// Patterns : {zeros}{ones}{33-bit value}{ones}

1209

// {ones}{33-bit value}{ones}

1210

// Similar to LI we can take advantage of PLI's sign-extension semantics to

1211

// generate leading ones, and then use RLDICL to mask off the ones in left

1212

// sides (if required) after rotation.

1213

if ((LZ + FO + TO) > 30) {

1214

APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);

1215

APInt Extended = SignedInt34.sext(64);

1216

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,

1217

getI64Imm(*Extended.getRawData()));

1218

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),

1219

getI32Imm(TO), getI32Imm(LZ));

1220

}

1221

// Patterns : {******}{31 zeros}{******}

1222

// : {******}{31 ones}{******}

1223

// If Imm contains 31 consecutive zeros/ones then the remaining bit count

1224

// is 33. Rotate right the Imm to construct a int<33> value, we can use PLI

1225

// for the int<33> value and then use RLDICL without a mask to rotate it back.

1226

//

1227

// +------|--ones--|------+ +---ones--||---33 bit--+

1228

// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|

1229

// +----------------------+ +----------------------+

1230

// 63 0 63 0

1231

for (unsigned Shift = 0; Shift < 63; ++Shift) {

1232

uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();

1233

if (isInt<34>(RotImm)) {

1234

Result =

1235

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));

1236

return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

1237

SDValue(Result, 0), getI32Imm(Shift),

1238

getI32Imm(0));

1239

}

1240

}

1241

1242

// Patterns : High word == Low word

1243

// This is basically a splat of a 32 bit immediate.

1244

if (Hi32 == Lo32) {

1245

Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));

1246

SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),

1247

getI32Imm(0)};

1248

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1249

}

1250

1251

InstCnt = 3;

1252

// Catch-all

1253

// This pattern can form any 64 bit immediate in 3 instructions.

1254

SDNode *ResultHi =

1255

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));

1256

SDNode *ResultLo =

1257

CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));

1258

SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),

1259

getI32Imm(0)};

1260

return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);

1261

}

1262

1263

static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,

1264

unsigned *InstCnt = nullptr) {

1265

unsigned InstCntDirect = 0;

1266

// No more than 3 instructions is used if we can select the i64 immediate

1267

// directly.

1268

SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);

1269

1270

const PPCSubtarget &Subtarget =

1271

CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();

1272

1273

// If we have prefixed instructions and there is a chance we can

1274

// materialize the constant with fewer prefixed instructions than

1275

// non-prefixed, try that.

1276

if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {

1277

unsigned InstCntDirectP = 0;

1278

SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);

1279

// Use the prefix case in either of two cases:

1280

// 1) We have no result from the non-prefix case to use.

1281

// 2) The non-prefix case uses more instructions than the prefix case.

1282

// If the prefix and non-prefix cases use the same number of instructions

1283

// we will prefer the non-prefix case.

1284

if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {

1285

if (InstCnt)

1286

*InstCnt = InstCntDirectP;

1287

return ResultP;

1288

}

1289

}

1290

1291

if (Result) {

1292

if (InstCnt)

1293

*InstCnt = InstCntDirect;

1294

return Result;

1295

}

1296

auto getI32Imm = [CurDAG, dl](unsigned Imm) {

1297

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

1298

};

1299

// Handle the upper 32 bit value.

1300

Result =

1301

selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);

1302

// Add in the last bits as required.

1303

if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {

1304

Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,

1305

SDValue(Result, 0), getI32Imm(Hi16));

1306

++InstCntDirect;

1307

}

1308

if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {

1309

Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),

1310

getI32Imm(Lo16));

1311

++InstCntDirect;

1312

}

1313

if (InstCnt)

1314

*InstCnt = InstCntDirect;

1315

return Result;

1316

}

1317

1318

// Select a 64-bit constant.

1319

static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {

1320

SDLoc dl(N);

1321

1322

// Get 64 bit value.

1323

int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();

1324

if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {

1325

uint64_t SextImm = SignExtend64(Imm, MinSize);

1326

SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);

1327

if (isInt<16>(SextImm))

1328

return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);

1329

}

1330

return selectI64Imm(CurDAG, dl, Imm);

1331

}

1332

1333

namespace {

1334

1335

class BitPermutationSelector {

1336

struct ValueBit {

1337

SDValue V;

1338

1339

// The bit number in the value, using a convention where bit 0 is the

1340

// lowest-order bit.

1341

unsigned Idx;

1342

1343

// ConstZero means a bit we need to mask off.

1344

// Variable is a bit comes from an input variable.

1345

// VariableKnownToBeZero is also a bit comes from an input variable,

1346

// but it is known to be already zero. So we do not need to mask them.

1347

enum Kind {

1348

ConstZero,

1349

Variable,

1350

VariableKnownToBeZero

1351

} K;

1352

1353

ValueBit(SDValue V, unsigned I, Kind K = Variable)

1354

: V(V), Idx(I), K(K) {}

1355

ValueBit(Kind K = Variable)

1356

: V(SDValue(nullptr, 0)), Idx(UINT32_MAX(4294967295U)), K(K) {}

1357

1358

bool isZero() const {

1359

return K == ConstZero || K == VariableKnownToBeZero;

1360

}

1361

1362

bool hasValue() const {

1363

return K == Variable || K == VariableKnownToBeZero;

1364

}

1365

1366

SDValue getValue() const {

1367

assert(hasValue() && "Cannot get the value of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value of a constant bit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1367, __extension__ __PRETTY_FUNCTION__));

1368

return V;

1369

}

1370

1371

unsigned getValueBitIndex() const {

1372

assert(hasValue() && "Cannot get the value bit index of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value bit index of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value bit index of a constant bit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__));

1373

return Idx;

1374

}

1375

};

1376

1377

// A bit group has the same underlying value and the same rotate factor.

1378

struct BitGroup {

1379

SDValue V;

1380

unsigned RLAmt;

1381

unsigned StartIdx, EndIdx;

1382

1383

// This rotation amount assumes that the lower 32 bits of the quantity are

1384

// replicated in the high 32 bits by the rotation operator (which is done

1385

// by rlwinm and friends in 64-bit mode).

1386

bool Repl32;

1387

// Did converting to Repl32 == true change the rotation factor? If it did,

1388

// it decreased it by 32.

1389

bool Repl32CR;

1390

// Was this group coalesced after setting Repl32 to true?

1391

bool Repl32Coalesced;

1392

1393

BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)

1394

: V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),

1395

Repl32Coalesced(false) {

1396

LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << Rdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tbit group for " <<
V.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false)

1397

<< " [" << S << ", " << E << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tbit group for " <<
V.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false);

1398

}

1399

};

1400

1401

// Information on each (Value, RLAmt) pair (like the number of groups

1402

// associated with each) used to choose the lowering method.

1403

struct ValueRotInfo {

1404

SDValue V;

1405

unsigned RLAmt = std::numeric_limits<unsigned>::max();

1406

unsigned NumGroups = 0;

1407

unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();

1408

bool Repl32 = false;

1409

1410

ValueRotInfo() = default;

1411

1412

// For sorting (in reverse order) by NumGroups, and then by

1413

// FirstGroupStartIdx.

1414

bool operator < (const ValueRotInfo &Other) const {

1415

// We need to sort so that the non-Repl32 come first because, when we're

1416

// doing masking, the Repl32 bit groups might be subsumed into the 64-bit

1417

// masking operation.

1418

if (Repl32 < Other.Repl32)

1419

return true;

1420

else if (Repl32 > Other.Repl32)

1421

return false;

1422

else if (NumGroups > Other.NumGroups)

1423

return true;

1424

else if (NumGroups < Other.NumGroups)

1425

return false;

1426

else if (RLAmt == 0 && Other.RLAmt != 0)

1427

return true;

1428

else if (RLAmt != 0 && Other.RLAmt == 0)

1429

return false;

1430

else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)

1431

return true;

1432

return false;

1433

}

1434

};

1435

1436

using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;

1437

using ValueBitsMemoizer =

1438

DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;

1439

ValueBitsMemoizer Memoizer;

1440

1441

// Return a pair of bool and a SmallVector pointer to a memoization entry.

1442

// The bool is true if something interesting was deduced, otherwise if we're

1443

// providing only a generic representation of V (or something else likewise

1444

// uninteresting for instruction selection) through the SmallVector.

1445

std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,

1446

unsigned NumBits) {

1447

auto &ValueEntry = Memoizer[V];

1448

if (ValueEntry)

1449

return std::make_pair(ValueEntry->first, &ValueEntry->second);

1450

ValueEntry.reset(new ValueBitsMemoizedValue());

1451

bool &Interesting = ValueEntry->first;

1452

SmallVector<ValueBit, 64> &Bits = ValueEntry->second;

1453

Bits.resize(NumBits);

1454

1455

switch (V.getOpcode()) {

1456

default: break;

1457

case ISD::ROTL:

1458

if (isa<ConstantSDNode>(V.getOperand(1))) {

1459

unsigned RotAmt = V.getConstantOperandVal(1);

1460

1461

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1462

1463

for (unsigned i = 0; i < NumBits; ++i)

1464

Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];

1465

1466

return std::make_pair(Interesting = true, &Bits);

1467

}

1468

break;

1469

case ISD::SHL:

1470

case PPCISD::SHL:

1471

if (isa<ConstantSDNode>(V.getOperand(1))) {

1472

unsigned ShiftAmt = V.getConstantOperandVal(1);

1473

1474

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1475

1476

for (unsigned i = ShiftAmt; i < NumBits; ++i)

1477

Bits[i] = LHSBits[i - ShiftAmt];

1478

1479

for (unsigned i = 0; i < ShiftAmt; ++i)

1480

Bits[i] = ValueBit(ValueBit::ConstZero);

1481

1482

return std::make_pair(Interesting = true, &Bits);

1483

}

1484

break;

1485

case ISD::SRL:

1486

case PPCISD::SRL:

1487

if (isa<ConstantSDNode>(V.getOperand(1))) {

1488

unsigned ShiftAmt = V.getConstantOperandVal(1);

1489

1490

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1491

1492

for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)

1493

Bits[i] = LHSBits[i + ShiftAmt];

1494

1495

for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)

1496

Bits[i] = ValueBit(ValueBit::ConstZero);

1497

1498

return std::make_pair(Interesting = true, &Bits);

1499

}

1500

break;

1501

case ISD::AND:

1502

if (isa<ConstantSDNode>(V.getOperand(1))) {

1503

uint64_t Mask = V.getConstantOperandVal(1);

1504

1505

const SmallVector<ValueBit, 64> *LHSBits;

1506

// Mark this as interesting, only if the LHS was also interesting. This

1507

// prevents the overall procedure from matching a single immediate 'and'

1508

// (which is non-optimal because such an and might be folded with other

1509

// things if we don't select it here).

1510

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);

1511

1512

for (unsigned i = 0; i < NumBits; ++i)

1513

if (((Mask >> i) & 1) == 1)

1514

Bits[i] = (*LHSBits)[i];

1515

else {

1516

// AND instruction masks this bit. If the input is already zero,

1517

// we have nothing to do here. Otherwise, make the bit ConstZero.

1518

if ((*LHSBits)[i].isZero())

1519

Bits[i] = (*LHSBits)[i];

1520

else

1521

Bits[i] = ValueBit(ValueBit::ConstZero);

1522

}

1523

1524

return std::make_pair(Interesting, &Bits);

1525

}

1526

break;

1527

case ISD::OR: {

1528

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

1529

const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;

1530

1531

bool AllDisjoint = true;

1532

SDValue LastVal = SDValue();

1533

unsigned LastIdx = 0;

1534

for (unsigned i = 0; i < NumBits; ++i) {

1535

if (LHSBits[i].isZero() && RHSBits[i].isZero()) {

1536

// If both inputs are known to be zero and one is ConstZero and

1537

// another is VariableKnownToBeZero, we can select whichever

1538

// we like. To minimize the number of bit groups, we select

1539

// VariableKnownToBeZero if this bit is the next bit of the same

1540

// input variable from the previous bit. Otherwise, we select

1541

// ConstZero.

1542

if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&

1543

LHSBits[i].getValueBitIndex() == LastIdx + 1)

1544

Bits[i] = LHSBits[i];

1545

else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&

1546

RHSBits[i].getValueBitIndex() == LastIdx + 1)

1547

Bits[i] = RHSBits[i];

1548

else

1549

Bits[i] = ValueBit(ValueBit::ConstZero);

1550

}

1551

else if (LHSBits[i].isZero())

1552

Bits[i] = RHSBits[i];

1553

else if (RHSBits[i].isZero())

1554

Bits[i] = LHSBits[i];

1555

else {

1556

AllDisjoint = false;

1557

break;

1558

}

1559

// We remember the value and bit index of this bit.

1560

if (Bits[i].hasValue()) {

1561

LastVal = Bits[i].getValue();

1562

LastIdx = Bits[i].getValueBitIndex();

1563

}

1564

else {

1565

if (LastVal) LastVal = SDValue();

1566

LastIdx = 0;

1567

}

1568

}

1569

1570

if (!AllDisjoint)

1571

break;

1572

1573

return std::make_pair(Interesting = true, &Bits);

1574

}

1575

case ISD::ZERO_EXTEND: {

1576

// We support only the case with zero extension from i32 to i64 so far.

1577

if (V.getValueType() != MVT::i64 ||

1578

V.getOperand(0).getValueType() != MVT::i32)

1579

break;

1580

1581

const SmallVector<ValueBit, 64> *LHSBits;

1582

const unsigned NumOperandBits = 32;

1583

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),

1584

NumOperandBits);

1585

1586

for (unsigned i = 0; i < NumOperandBits; ++i)

1587

Bits[i] = (*LHSBits)[i];

1588

1589

for (unsigned i = NumOperandBits; i < NumBits; ++i)

1590

Bits[i] = ValueBit(ValueBit::ConstZero);

1591

1592

return std::make_pair(Interesting, &Bits);

1593

}

1594

case ISD::TRUNCATE: {

1595

EVT FromType = V.getOperand(0).getValueType();

1596

EVT ToType = V.getValueType();

1597

// We support only the case with truncate from i64 to i32.

1598

if (FromType != MVT::i64 || ToType != MVT::i32)

1599

break;

1600

const unsigned NumAllBits = FromType.getSizeInBits();

1601

SmallVector<ValueBit, 64> *InBits;

1602

std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),

1603

NumAllBits);

1604

const unsigned NumValidBits = ToType.getSizeInBits();

1605

1606

// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.

1607

// So, we cannot include this truncate.

1608

bool UseUpper32bit = false;

1609

for (unsigned i = 0; i < NumValidBits; ++i)

1610

if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {

1611

UseUpper32bit = true;

1612

break;

1613

}

1614

if (UseUpper32bit)

1615

break;

1616

1617

for (unsigned i = 0; i < NumValidBits; ++i)

1618

Bits[i] = (*InBits)[i];

1619

1620

return std::make_pair(Interesting, &Bits);

1621

}

1622

case ISD::AssertZext: {

1623

// For AssertZext, we look through the operand and

1624

// mark the bits known to be zero.

1625

const SmallVector<ValueBit, 64> *LHSBits;

1626

std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),

1627

NumBits);

1628

1629

EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();

1630

const unsigned NumValidBits = FromType.getSizeInBits();

1631

for (unsigned i = 0; i < NumValidBits; ++i)

1632

Bits[i] = (*LHSBits)[i];

1633

1634

// These bits are known to be zero but the AssertZext may be from a value

1635

// that already has some constant zero bits (i.e. from a masking and).

1636

for (unsigned i = NumValidBits; i < NumBits; ++i)

1637

Bits[i] = (*LHSBits)[i].hasValue()

1638

? ValueBit((*LHSBits)[i].getValue(),

1639

(*LHSBits)[i].getValueBitIndex(),

1640

ValueBit::VariableKnownToBeZero)

1641

: ValueBit(ValueBit::ConstZero);

1642

1643

return std::make_pair(Interesting, &Bits);

1644

}

1645

case ISD::LOAD:

1646

LoadSDNode *LD = cast<LoadSDNode>(V);

1647

if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {

1648

EVT VT = LD->getMemoryVT();

1649

const unsigned NumValidBits = VT.getSizeInBits();

1650

1651

for (unsigned i = 0; i < NumValidBits; ++i)

1652

Bits[i] = ValueBit(V, i);

1653

1654

// These bits are known to be zero.

1655

for (unsigned i = NumValidBits; i < NumBits; ++i)

1656

Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);

1657

1658

// Zero-extending load itself cannot be optimized. So, it is not

1659

// interesting by itself though it gives useful information.

1660

return std::make_pair(Interesting = false, &Bits);

1661

}

1662

break;

1663

}

1664

1665

for (unsigned i = 0; i < NumBits; ++i)

1666

Bits[i] = ValueBit(V, i);

1667

1668

return std::make_pair(Interesting = false, &Bits);

1669

}

1670

1671

// For each value (except the constant ones), compute the left-rotate amount

1672

// to get it from its original to final position.

1673

void computeRotationAmounts() {

1674

NeedMask = false;

1675

RLAmt.resize(Bits.size());

1676

for (unsigned i = 0; i < Bits.size(); ++i)

1677

if (Bits[i].hasValue()) {

1678

unsigned VBI = Bits[i].getValueBitIndex();

1679

if (i >= VBI)

1680

RLAmt[i] = i - VBI;

1681

else

1682

RLAmt[i] = Bits.size() - (VBI - i);

1683

} else if (Bits[i].isZero()) {

1684

NeedMask = true;

1685

RLAmt[i] = UINT32_MAX(4294967295U);

1686

} else {

1687

llvm_unreachable("Unknown value bit type")::llvm::llvm_unreachable_internal("Unknown value bit type", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1687);

1688

}

1689

}

1690

1691

// Collect groups of consecutive bits with the same underlying value and

1692

// rotation factor. If we're doing late masking, we ignore zeros, otherwise

1693

// they break up groups.

1694

void collectBitGroups(bool LateMask) {

1695

BitGroups.clear();

1696

1697

unsigned LastRLAmt = RLAmt[0];

1698

SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();

1699

unsigned LastGroupStartIdx = 0;

1700

bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

1701

for (unsigned i = 1; i < Bits.size(); ++i) {

1702

unsigned ThisRLAmt = RLAmt[i];

1703

SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();

1704

if (LateMask && !ThisValue) {

1705

ThisValue = LastValue;

1706

ThisRLAmt = LastRLAmt;

1707

// If we're doing late masking, then the first bit group always starts

1708

// at zero (even if the first bits were zero).

1709

if (BitGroups.empty())

1710

LastGroupStartIdx = 0;

1711

}

1712

1713

// If this bit is known to be zero and the current group is a bit group

1714

// of zeros, we do not need to terminate the current bit group even the

1715

// Value or RLAmt does not match here. Instead, we terminate this group

1716

// when the first non-zero bit appears later.

1717

if (IsGroupOfZeros && Bits[i].isZero())

1718

continue;

1719

1720

// If this bit has the same underlying value and the same rotate factor as

1721

// the last one, then they're part of the same group.

1722

if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)

1723

// We cannot continue the current group if this bits is not known to

1724

// be zero in a bit group of zeros.

1725

if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))

1726

continue;

1727

1728

if (LastValue.getNode())

1729

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

1730

i-1));

1731

LastRLAmt = ThisRLAmt;

1732

LastValue = ThisValue;

1733

LastGroupStartIdx = i;

1734

IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

1735

}

1736

if (LastValue.getNode())

1737

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

1738

Bits.size()-1));

1739

1740

if (BitGroups.empty())

1741

return;

1742

1743

// We might be able to combine the first and last groups.

1744

if (BitGroups.size() > 1) {

1745

// If the first and last groups are the same, then remove the first group

1746

// in favor of the last group, making the ending index of the last group

1747

// equal to the ending index of the to-be-removed first group.

1748

if (BitGroups[0].StartIdx == 0 &&

1749

BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&

1750

BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&

1751

BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {

1752

LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining final bit group with initial one\n"
; } } while (false);

1753

BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;

1754

BitGroups.erase(BitGroups.begin());

1755

}

1756

}

1757

}

1758

1759

// Take all (SDValue, RLAmt) pairs and sort them by the number of groups

1760

// associated with each. If the number of groups are same, we prefer a group

1761

// which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate

1762

// instruction. If there is a degeneracy, pick the one that occurs

1763

// first (in the final value).

1764

void collectValueRotInfo() {

1765

ValueRots.clear();

1766

1767

for (auto &BG : BitGroups) {

1768

unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);

1769

ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];

1770

VRI.V = BG.V;

1771

VRI.RLAmt = BG.RLAmt;

1772

VRI.Repl32 = BG.Repl32;

1773

VRI.NumGroups += 1;

1774

VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);

1775

}

1776

1777

// Now that we've collected the various ValueRotInfo instances, we need to

1778

// sort them.

1779

ValueRotsVec.clear();

1780

for (auto &I : ValueRots) {

1781

ValueRotsVec.push_back(I.second);

1782

}

1783

llvm::sort(ValueRotsVec);

1784

}

1785

1786

// In 64-bit mode, rlwinm and friends have a rotation operator that

1787

// replicates the low-order 32 bits into the high-order 32-bits. The mask

1788

// indices of these instructions can only be in the lower 32 bits, so they

1789

// can only represent some 64-bit bit groups. However, when they can be used,

1790

// the 32-bit replication can be used to represent, as a single bit group,

1791

// otherwise separate bit groups. We'll convert to replicated-32-bit bit

1792

// groups when possible. Returns true if any of the bit groups were

1793

// converted.

1794

void assignRepl32BitGroups() {

1795

// If we have bits like this:

1796

//

1797

// Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0

1798

// V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24

1799

// Groups: | RLAmt = 8 | RLAmt = 40 |

1800

//

1801

// But, making use of a 32-bit operation that replicates the low-order 32

1802

// bits into the high-order 32 bits, this can be one bit group with a RLAmt

1803

// of 8.

1804

1805

auto IsAllLow32 = [this](BitGroup & BG) {

1806

if (BG.StartIdx <= BG.EndIdx) {

1807

for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {

1808

if (!Bits[i].hasValue())

1809

continue;

1810

if (Bits[i].getValueBitIndex() >= 32)

1811

return false;

1812

}

1813

} else {

1814

for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {

1815

if (!Bits[i].hasValue())

1816

continue;

1817

if (Bits[i].getValueBitIndex() >= 32)

1818

return false;

1819

}

1820

for (unsigned i = 0; i <= BG.EndIdx; ++i) {

1821

if (!Bits[i].hasValue())

1822

continue;

1823

if (Bits[i].getValueBitIndex() >= 32)

1824

return false;

1825

}

1826

}

1827

1828

return true;

1829

};

1830

1831

for (auto &BG : BitGroups) {

1832

// If this bit group has RLAmt of 0 and will not be merged with

1833

// another bit group, we don't benefit from Repl32. We don't mark

1834

// such group to give more freedom for later instruction selection.

1835

if (BG.RLAmt == 0) {

1836

auto PotentiallyMerged = [this](BitGroup & BG) {

1837

for (auto &BG2 : BitGroups)

1838

if (&BG != &BG2 && BG.V == BG2.V &&

1839

(BG2.RLAmt == 0 || BG2.RLAmt == 32))

1840

return true;

1841

return false;

1842

};

1843

if (!PotentiallyMerged(BG))

1844

continue;

1845

}

1846

if (BG.StartIdx < 32 && BG.EndIdx < 32) {

1847

if (IsAllLow32(BG)) {

1848

if (BG.RLAmt >= 32) {

1849

BG.RLAmt -= 32;

1850

BG.Repl32CR = true;

1851

}

1852

1853

BG.Repl32 = true;

1854

1855

LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)

1856

<< BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)

1857

<< BG.StartIdx << ", " << BG.EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false);

1858

}

1859

}

1860

}

1861

1862

// Now walk through the bit groups, consolidating where possible.

1863

for (auto I = BitGroups.begin(); I != BitGroups.end();) {

1864

// We might want to remove this bit group by merging it with the previous

1865

// group (which might be the ending group).

1866

auto IP = (I == BitGroups.begin()) ?

1867

std::prev(BitGroups.end()) : std::prev(I);

1868

if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&

1869

I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {

1870

1871

LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1872

<< I->V.getNode() << " RLAmt = " << I->RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1873

<< I->StartIdx << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1874

<< "] with group with range [" << IP->StartIdx << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)

1875

<< IP->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false);

1876

1877

IP->EndIdx = I->EndIdx;

1878

IP->Repl32CR = IP->Repl32CR || I->Repl32CR;

1879

IP->Repl32Coalesced = true;

1880

I = BitGroups.erase(I);

1881

continue;

1882

} else {

1883

// There is a special case worth handling: If there is a single group

1884

// covering the entire upper 32 bits, and it can be merged with both

1885

// the next and previous groups (which might be the same group), then

1886

// do so. If it is the same group (so there will be only one group in

1887

// total), then we need to reverse the order of the range so that it

1888

// covers the entire 64 bits.

1889

if (I->StartIdx == 32 && I->EndIdx == 63) {

1890

assert(std::next(I) == BitGroups.end() &&(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1891, __extension__ __PRETTY_FUNCTION__))

1891

"bit group ends at index 63 but there is another?")(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1891, __extension__ __PRETTY_FUNCTION__));

1892

auto IN = BitGroups.begin();

1893

1894

if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&

1895

(I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&

1896

IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&

1897

IsAllLow32(*I)) {

1898

1899

LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)

1900

<< " RLAmt = " << I->RLAmt << " [" << I->StartIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)

1901

<< ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)

1902

<< "] with 32-bit replicated groups with ranges ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)

1903

<< IP->StartIdx << ", " << IP->EndIdx << "] and ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)

1904

<< IN->StartIdx << ", " << IN->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false);

1905

1906

if (IP == IN) {

1907

// There is only one other group; change it to cover the whole

1908

// range (backward, so that it can still be Repl32 but cover the

1909

// whole 64-bit range).

1910

IP->StartIdx = 31;

1911

IP->EndIdx = 30;

1912

IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;

1913

IP->Repl32Coalesced = true;

1914

I = BitGroups.erase(I);

1915

} else {

1916

// There are two separate groups, one before this group and one

1917

// after us (at the beginning). We're going to remove this group,

1918

// but also the group at the very beginning.

1919

IP->EndIdx = IN->EndIdx;

1920

IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;

1921

IP->Repl32Coalesced = true;

1922

I = BitGroups.erase(I);

1923

BitGroups.erase(BitGroups.begin());

1924

}

1925

1926

// This must be the last group in the vector (and we might have

1927

// just invalidated the iterator above), so break here.

1928

break;

1929

}

1930

}

1931

}

1932

1933

++I;

1934

}

1935

}

1936

1937

SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {

1938

return CurDAG->getTargetConstant(Imm, dl, MVT::i32);

1939

}

1940

1941

uint64_t getZerosMask() {

1942

uint64_t Mask = 0;

1943

for (unsigned i = 0; i < Bits.size(); ++i) {

1944

if (Bits[i].hasValue())

1945

continue;

1946

Mask |= (UINT64_C(1)1UL << i);

1947

}

1948

1949

return ~Mask;

1950

}

1951

1952

// This method extends an input value to 64 bit if input is 32-bit integer.

1953

// While selecting instructions in BitPermutationSelector in 64-bit mode,

1954

// an input value can be a 32-bit integer if a ZERO_EXTEND node is included.

1955

// In such case, we extend it to 64 bit to be consistent with other values.

1956

SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {

1957

if (V.getValueSizeInBits() == 64)

1958

return V;

1959

1960

assert(V.getValueSizeInBits() == 32)(static_cast <bool> (V.getValueSizeInBits() == 32) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 32", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1960, __extension__ __PRETTY_FUNCTION__));

1961

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

1962

SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,

1963

MVT::i64), 0);

1964

SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,

1965

MVT::i64, ImDef, V,

1966

SubRegIdx), 0);

1967

return ExtVal;

1968

}

1969

1970

SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {

1971

if (V.getValueSizeInBits() == 32)

1972

return V;

1973

1974

assert(V.getValueSizeInBits() == 64)(static_cast <bool> (V.getValueSizeInBits() == 64) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 64", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1974, __extension__ __PRETTY_FUNCTION__));

1975

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

1976

SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,

1977

MVT::i32, V, SubRegIdx), 0);

1978

return SubVal;

1979

}

1980

1981

// Depending on the number of groups for a particular value, it might be

1982

// better to rotate, mask explicitly (using andi/andis), and then or the

1983

// result. Select this part of the result first.

1984

void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {

1985

if (BPermRewriterNoMasking)

1986

return;

1987

1988

for (ValueRotInfo &VRI : ValueRotsVec) {

1989

unsigned Mask = 0;

1990

for (unsigned i = 0; i < Bits.size(); ++i) {

1991

if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)

1992

continue;

1993

if (RLAmt[i] != VRI.RLAmt)

1994

continue;

1995

Mask |= (1u << i);

1996

}

1997

1998

// Compute the masks for andi/andis that would be necessary.

1999

unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;

2000

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2001, __extension__ __PRETTY_FUNCTION__))

2001

"No set bits in mask for value bit groups")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2001, __extension__ __PRETTY_FUNCTION__));

2002

bool NeedsRotate = VRI.RLAmt != 0;

2003

2004

// We're trying to minimize the number of instructions. If we have one

2005

// group, using one of andi/andis can break even. If we have three

2006

// groups, we can use both andi and andis and break even (to use both

2007

// andi and andis we also need to or the results together). We need four

2008

// groups if we also need to rotate. To use andi/andis we need to do more

2009

// than break even because rotate-and-mask instructions tend to be easier

2010

// to schedule.

2011

2012

// FIXME: We've biased here against using andi/andis, which is right for

2013

// POWER cores, but not optimal everywhere. For example, on the A2,

2014

// andi/andis have single-cycle latency whereas the rotate-and-mask

2015

// instructions take two cycles, and it would be better to bias toward

2016

// andi/andis in break-even cases.

2017

2018

unsigned NumAndInsts = (unsigned) NeedsRotate +

2019

(unsigned) (ANDIMask != 0) +

2020

(unsigned) (ANDISMask != 0) +

2021

(unsigned) (ANDIMask != 0 && ANDISMask != 0) +

2022

(unsigned) (bool) Res;

2023

2024

LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)

2025

<< " RL: " << VRI.RLAmt << ":"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)

2026

<< "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)

2027

<< " using rotates: " << VRI.NumGroups << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false);

2028

2029

if (NumAndInsts >= VRI.NumGroups)

2030

continue;

2031

2032

LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n";
} } while (false);

2033

2034

if (InstCnt) *InstCnt += NumAndInsts;

2035

2036

SDValue VRot;

2037

if (VRI.RLAmt) {

2038

SDValue Ops[] =

2039

{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

2040

getI32Imm(0, dl), getI32Imm(31, dl) };

2041

VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

2042

Ops), 0);

2043

} else {

2044

VRot = TruncateToInt32(VRI.V, dl);

2045

}

2046

2047

SDValue ANDIVal, ANDISVal;

2048

if (ANDIMask != 0)

2049

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,

2050

VRot, getI32Imm(ANDIMask, dl)),

2051

0);

2052

if (ANDISMask != 0)

2053

ANDISVal =

2054

SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,

2055

getI32Imm(ANDISMask, dl)),

2056

0);

2057

2058

SDValue TotalVal;

2059

if (!ANDIVal)

2060

TotalVal = ANDISVal;

2061

else if (!ANDISVal)

2062

TotalVal = ANDIVal;

2063

else

2064

TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2065

ANDIVal, ANDISVal), 0);

2066

2067

if (!Res)

2068

Res = TotalVal;

2069

else

2070

Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2071

Res, TotalVal), 0);

2072

2073

// Now, remove all groups with this underlying value and rotation

2074

// factor.

2075

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2076

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;

2077

});

2078

}

2079

}

2080

2081

// Instruction selection for the 32-bit case.

2082

SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {

2083

SDLoc dl(N);

2084

SDValue Res;

2085

2086

if (InstCnt) *InstCnt = 0;

2087

2088

// Take care of cases that should use andi/andis first.

2089

SelectAndParts32(dl, Res, InstCnt);

2090

2091

// If we've not yet selected a 'starting' instruction, and we have no zeros

2092

// to fill in, select the (Value, RLAmt) with the highest priority (largest

2093

// number of groups), and start with this rotated value.

2094

if ((!NeedMask || LateMask) && !Res) {

2095

ValueRotInfo &VRI = ValueRotsVec[0];

2096

if (VRI.RLAmt) {

2097

if (InstCnt) *InstCnt += 1;

2098

SDValue Ops[] =

2099

{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

2100

getI32Imm(0, dl), getI32Imm(31, dl) };

2101

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),

2102

0);

2103

} else {

2104

Res = TruncateToInt32(VRI.V, dl);

2105

}

2106

2107

// Now, remove all groups with this underlying value and rotation factor.

2108

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2109

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;

2110

});

2111

}

2112

2113

if (InstCnt) *InstCnt += BitGroups.size();

2114

2115

// Insert the other groups (one at a time).

2116

for (auto &BG : BitGroups) {

2117

if (!Res) {

2118

SDValue Ops[] =

2119

{ TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

2120

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

2121

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

2122

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

2123

} else {

2124

SDValue Ops[] =

2125

{ Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

2126

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

2127

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

2128

Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);

2129

}

2130

}

2131

2132

if (LateMask) {

2133

unsigned Mask = (unsigned) getZerosMask();

2134

2135

unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;

2136

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2137, __extension__ __PRETTY_FUNCTION__))

2137

"No set bits in zeros mask?")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2137, __extension__ __PRETTY_FUNCTION__));

2138

2139

if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +

2140

(unsigned) (ANDISMask != 0) +

2141

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2142

2143

SDValue ANDIVal, ANDISVal;

2144

if (ANDIMask != 0)

2145

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,

2146

Res, getI32Imm(ANDIMask, dl)),

2147

0);

2148

if (ANDISMask != 0)

2149

ANDISVal =

2150

SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,

2151

getI32Imm(ANDISMask, dl)),

2152

0);

2153

2154

if (!ANDIVal)

2155

Res = ANDISVal;

2156

else if (!ANDISVal)

2157

Res = ANDIVal;

2158

else

2159

Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,

2160

ANDIVal, ANDISVal), 0);

2161

}

2162

2163

return Res.getNode();

2164

}

2165

2166

unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,

2167

unsigned MaskStart, unsigned MaskEnd,

2168

bool IsIns) {

2169

// In the notation used by the instructions, 'start' and 'end' are reversed

2170

// because bits are counted from high to low order.

2171

unsigned InstMaskStart = 64 - MaskEnd - 1,

2172

InstMaskEnd = 64 - MaskStart - 1;

2173

2174

if (Repl32)

2175

return 1;

2176

2177

if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||

2178

InstMaskEnd == 63 - RLAmt)

2179

return 1;

2180

2181

return 2;

2182

}

2183

2184

// For 64-bit values, not all combinations of rotates and masks are

2185

// available. Produce one if it is available.

2186

SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,

2187

bool Repl32, unsigned MaskStart, unsigned MaskEnd,

2188

unsigned *InstCnt = nullptr) {

2189

// In the notation used by the instructions, 'start' and 'end' are reversed

2190

// because bits are counted from high to low order.

2191

unsigned InstMaskStart = 64 - MaskEnd - 1,

2192

InstMaskEnd = 64 - MaskStart - 1;

2193

2194

if (InstCnt) *InstCnt += 1;

2195

2196

if (Repl32) {

2197

// This rotation amount assumes that the lower 32 bits of the quantity

2198

// are replicated in the high 32 bits by the rotation operator (which is

2199

// done by rlwinm and friends).

2200

assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2200, __extension__ __PRETTY_FUNCTION__));

2201

assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2201, __extension__ __PRETTY_FUNCTION__));

2202

SDValue Ops[] =

2203

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2204

getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };

2205

return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,

2206

Ops), 0);

2207

}

2208

2209

if (InstMaskEnd == 63) {

2210

SDValue Ops[] =

2211

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2212

getI32Imm(InstMaskStart, dl) };

2213

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);

2214

}

2215

2216

if (InstMaskStart == 0) {

2217

SDValue Ops[] =

2218

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2219

getI32Imm(InstMaskEnd, dl) };

2220

return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);

2221

}

2222

2223

if (InstMaskEnd == 63 - RLAmt) {

2224

SDValue Ops[] =

2225

{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2226

getI32Imm(InstMaskStart, dl) };

2227

return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);

2228

}

2229

2230

// We cannot do this with a single instruction, so we'll use two. The

2231

// problem is that we're not free to choose both a rotation amount and mask

2232

// start and end independently. We can choose an arbitrary mask start and

2233

// end, but then the rotation amount is fixed. Rotation, however, can be

2234

// inverted, and so by applying an "inverse" rotation first, we can get the

2235

// desired result.

2236

if (InstCnt) *InstCnt += 1;

2237

2238

// The rotation mask for the second instruction must be MaskStart.

2239

unsigned RLAmt2 = MaskStart;

2240

// The first instruction must rotate V so that the overall rotation amount

2241

// is RLAmt.

2242

unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;

2243

if (RLAmt1)

2244

V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);

2245

return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);

2246

}

2247

2248

// For 64-bit values, not all combinations of rotates and masks are

2249

// available. Produce a rotate-mask-and-insert if one is available.

2250

SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,

2251

unsigned RLAmt, bool Repl32, unsigned MaskStart,

2252

unsigned MaskEnd, unsigned *InstCnt = nullptr) {

2253

// In the notation used by the instructions, 'start' and 'end' are reversed

2254

// because bits are counted from high to low order.

2255

unsigned InstMaskStart = 64 - MaskEnd - 1,

2256

InstMaskEnd = 64 - MaskStart - 1;

2257

2258

if (InstCnt) *InstCnt += 1;

2259

2260

if (Repl32) {

2261

// This rotation amount assumes that the lower 32 bits of the quantity

2262

// are replicated in the high 32 bits by the rotation operator (which is

2263

// done by rlwinm and friends).

2264

assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2264, __extension__ __PRETTY_FUNCTION__));

2265

assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2265, __extension__ __PRETTY_FUNCTION__));

2266

SDValue Ops[] =

2267

{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2268

getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };

2269

return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,

2270

Ops), 0);

2271

}

2272

2273

if (InstMaskEnd == 63 - RLAmt) {

2274

SDValue Ops[] =

2275

{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),

2276

getI32Imm(InstMaskStart, dl) };

2277

return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);

2278

}

2279

2280

// We cannot do this with a single instruction, so we'll use two. The

2281

// problem is that we're not free to choose both a rotation amount and mask

2282

// start and end independently. We can choose an arbitrary mask start and

2283

// end, but then the rotation amount is fixed. Rotation, however, can be

2284

// inverted, and so by applying an "inverse" rotation first, we can get the

2285

// desired result.

2286

if (InstCnt) *InstCnt += 1;

2287

2288

// The rotation mask for the second instruction must be MaskStart.

2289

unsigned RLAmt2 = MaskStart;

2290

// The first instruction must rotate V so that the overall rotation amount

2291

// is RLAmt.

2292

unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;

2293

if (RLAmt1)

2294

V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);

2295

return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);

2296

}

2297

2298

void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {

2299

if (BPermRewriterNoMasking)

2300

return;

2301

2302

// The idea here is the same as in the 32-bit version, but with additional

2303

// complications from the fact that Repl32 might be true. Because we

2304

// aggressively convert bit groups to Repl32 form (which, for small

2305

// rotation factors, involves no other change), and then coalesce, it might

2306

// be the case that a single 64-bit masking operation could handle both

2307

// some Repl32 groups and some non-Repl32 groups. If converting to Repl32

2308

// form allowed coalescing, then we must use a 32-bit rotaton in order to

2309

// completely capture the new combined bit group.

2310

2311

for (ValueRotInfo &VRI : ValueRotsVec) {

2312

uint64_t Mask = 0;

2313

2314

// We need to add to the mask all bits from the associated bit groups.

2315

// If Repl32 is false, we need to add bits from bit groups that have

2316

// Repl32 true, but are trivially convertable to Repl32 false. Such a

2317

// group is trivially convertable if it overlaps only with the lower 32

2318

// bits, and the group has not been coalesced.

2319

auto MatchingBG = [VRI](const BitGroup &BG) {

2320

if (VRI.V != BG.V)

2321

return false;

2322

2323

unsigned EffRLAmt = BG.RLAmt;

2324

if (!VRI.Repl32 && BG.Repl32) {

2325

if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&

2326

!BG.Repl32Coalesced) {

2327

if (BG.Repl32CR)

2328

EffRLAmt += 32;

2329

} else {

2330

return false;

2331

}

2332

} else if (VRI.Repl32 != BG.Repl32) {

2333

return false;

2334

}

2335

2336

return VRI.RLAmt == EffRLAmt;

2337

};

2338

2339

for (auto &BG : BitGroups) {

2340

if (!MatchingBG(BG))

2341

continue;

2342

2343

if (BG.StartIdx <= BG.EndIdx) {

2344

for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)

2345

Mask |= (UINT64_C(1)1UL << i);

2346

} else {

2347

for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)

2348

Mask |= (UINT64_C(1)1UL << i);

2349

for (unsigned i = 0; i <= BG.EndIdx; ++i)

2350

Mask |= (UINT64_C(1)1UL << i);

2351

}

2352

}

2353

2354

// We can use the 32-bit andi/andis technique if the mask does not

2355

// require any higher-order bits. This can save an instruction compared

2356

// to always using the general 64-bit technique.

2357

bool Use32BitInsts = isUInt<32>(Mask);

2358

// Compute the masks for andi/andis that would be necessary.

2359

unsigned ANDIMask = (Mask & UINT16_MAX(65535)),

2360

ANDISMask = (Mask >> 16) & UINT16_MAX(65535);

2361

2362

bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));

2363

2364

unsigned NumAndInsts = (unsigned) NeedsRotate +

2365

(unsigned) (bool) Res;

2366

unsigned NumOfSelectInsts = 0;

2367

selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);

2368

assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.")(static_cast <bool> (NumOfSelectInsts > 0 &&
"Failed to select an i64 constant.") ? void (0) : __assert_fail
("NumOfSelectInsts > 0 && \"Failed to select an i64 constant.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2368, __extension__ __PRETTY_FUNCTION__));

2369

if (Use32BitInsts)

2370

NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +

2371

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2372

else

2373

NumAndInsts += NumOfSelectInsts + /* and */ 1;

2374

2375

unsigned NumRLInsts = 0;

2376

bool FirstBG = true;

2377

bool MoreBG = false;

2378

for (auto &BG : BitGroups) {

2379

if (!MatchingBG(BG)) {

2380

MoreBG = true;

2381

continue;

2382

}

2383

NumRLInsts +=

2384

SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,

2385

!FirstBG);

2386

FirstBG = false;

2387

}

2388

2389

LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2390

<< " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2391

<< "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)

2392

<< " using rotates: " << NumRLInsts << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false);

2393

2394

// When we'd use andi/andis, we bias toward using the rotates (andi only

2395

// has a record form, and is cracked on POWER cores). However, when using

2396

// general 64-bit constant formation, bias toward the constant form,

2397

// because that exposes more opportunities for CSE.

2398

if (NumAndInsts > NumRLInsts)

2399

continue;

2400

// When merging multiple bit groups, instruction or is used.

2401

// But when rotate is used, rldimi can inert the rotated value into any

2402

// register, so instruction or can be avoided.

2403

if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)

2404

continue;

2405

2406

LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n";
} } while (false);

2407

2408

if (InstCnt) *InstCnt += NumAndInsts;

2409

2410

SDValue VRot;

2411

// We actually need to generate a rotation if we have a non-zero rotation

2412

// factor or, in the Repl32 case, if we care about any of the

2413

// higher-order replicated bits. In the latter case, we generate a mask

2414

// backward so that it actually includes the entire 64 bits.

2415

if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))

2416

VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,

2417

VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);

2418

else

2419

VRot = VRI.V;

2420

2421

SDValue TotalVal;

2422

if (Use32BitInsts) {

2423

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2424, __extension__ __PRETTY_FUNCTION__))

2424

"No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2424, __extension__ __PRETTY_FUNCTION__));

2425

2426

SDValue ANDIVal, ANDISVal;

2427

if (ANDIMask != 0)

2428

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,

2429

ExtendToInt64(VRot, dl),

2430

getI32Imm(ANDIMask, dl)),

2431

0);

2432

if (ANDISMask != 0)

2433

ANDISVal =

2434

SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,

2435

ExtendToInt64(VRot, dl),

2436

getI32Imm(ANDISMask, dl)),

2437

0);

2438

2439

if (!ANDIVal)

2440

TotalVal = ANDISVal;

2441

else if (!ANDISVal)

2442

TotalVal = ANDIVal;

2443

else

2444

TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2445

ExtendToInt64(ANDIVal, dl), ANDISVal), 0);

2446

} else {

2447

TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);

2448

TotalVal =

2449

SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,

2450

ExtendToInt64(VRot, dl), TotalVal),

2451

0);

2452

}

2453

2454

if (!Res)

2455

Res = TotalVal;

2456

else

2457

Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2458

ExtendToInt64(Res, dl), TotalVal),

2459

0);

2460

2461

// Now, remove all groups with this underlying value and rotation

2462

// factor.

2463

eraseMatchingBitGroups(MatchingBG);

2464

}

2465

}

2466

2467

// Instruction selection for the 64-bit case.

2468

SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {

2469

SDLoc dl(N);

2470

SDValue Res;

2471

2472

if (InstCnt) *InstCnt = 0;

2473

2474

// Take care of cases that should use andi/andis first.

2475

SelectAndParts64(dl, Res, InstCnt);

2476

2477

// If we've not yet selected a 'starting' instruction, and we have no zeros

2478

// to fill in, select the (Value, RLAmt) with the highest priority (largest

2479

// number of groups), and start with this rotated value.

2480

if ((!NeedMask || LateMask) && !Res) {

2481

// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32

2482

// groups will come first, and so the VRI representing the largest number

2483

// of groups might not be first (it might be the first Repl32 groups).

2484

unsigned MaxGroupsIdx = 0;

2485

if (!ValueRotsVec[0].Repl32) {

2486

for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)

2487

if (ValueRotsVec[i].Repl32) {

2488

if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)

2489

MaxGroupsIdx = i;

2490

break;

2491

}

2492

}

2493

2494

ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];

2495

bool NeedsRotate = false;

2496

if (VRI.RLAmt) {

2497

NeedsRotate = true;

2498

} else if (VRI.Repl32) {

2499

for (auto &BG : BitGroups) {

2500

if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||

2501

BG.Repl32 != VRI.Repl32)

2502

continue;

2503

2504

// We don't need a rotate if the bit group is confined to the lower

2505

// 32 bits.

2506

if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)

2507

continue;

2508

2509

NeedsRotate = true;

2510

break;

2511

}

2512

}

2513

2514

if (NeedsRotate)

2515

Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,

2516

VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,

2517

InstCnt);

2518

else

2519

Res = VRI.V;

2520

2521

// Now, remove all groups with this underlying value and rotation factor.

2522

if (Res)

2523

eraseMatchingBitGroups([VRI](const BitGroup &BG) {

2524

return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&

2525

BG.Repl32 == VRI.Repl32;

2526

});

2527

}

2528

2529

// Because 64-bit rotates are more flexible than inserts, we might have a

2530

// preference regarding which one we do first (to save one instruction).

2531

if (!Res)

2532

for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {

2533

if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,

2534

false) <

2535

SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,

2536

true)) {

2537

if (I != BitGroups.begin()) {

2538

BitGroup BG = *I;

2539

BitGroups.erase(I);

2540

BitGroups.insert(BitGroups.begin(), BG);

2541

}

2542

2543

break;

2544

}

2545

}

2546

2547

// Insert the other groups (one at a time).

2548

for (auto &BG : BitGroups) {

2549

if (!Res)

2550

Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,

2551

BG.EndIdx, InstCnt);

2552

else

2553

Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,

2554

BG.StartIdx, BG.EndIdx, InstCnt);

2555

}

2556

2557

if (LateMask) {

2558

uint64_t Mask = getZerosMask();

2559

2560

// We can use the 32-bit andi/andis technique if the mask does not

2561

// require any higher-order bits. This can save an instruction compared

2562

// to always using the general 64-bit technique.

2563

bool Use32BitInsts = isUInt<32>(Mask);

2564

// Compute the masks for andi/andis that would be necessary.

2565

unsigned ANDIMask = (Mask & UINT16_MAX(65535)),

2566

ANDISMask = (Mask >> 16) & UINT16_MAX(65535);

2567

2568

if (Use32BitInsts) {

2569

assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2570, __extension__ __PRETTY_FUNCTION__))

2570

"No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2570, __extension__ __PRETTY_FUNCTION__));

2571

2572

if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +

2573

(unsigned) (ANDISMask != 0) +

2574

(unsigned) (ANDIMask != 0 && ANDISMask != 0);

2575

2576

SDValue ANDIVal, ANDISVal;

2577

if (ANDIMask != 0)

2578

ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,

2579

ExtendToInt64(Res, dl),

2580

getI32Imm(ANDIMask, dl)),

2581

0);

2582

if (ANDISMask != 0)

2583

ANDISVal =

2584

SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,

2585

ExtendToInt64(Res, dl),

2586

getI32Imm(ANDISMask, dl)),

2587

0);

2588

2589

if (!ANDIVal)

2590

Res = ANDISVal;

2591

else if (!ANDISVal)

2592

Res = ANDIVal;

2593

else

2594

Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

2595

ExtendToInt64(ANDIVal, dl), ANDISVal), 0);

2596

} else {

2597

unsigned NumOfSelectInsts = 0;

2598

SDValue MaskVal =

2599

SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);

2600

Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,

2601

ExtendToInt64(Res, dl), MaskVal),

2602

0);

2603

if (InstCnt)

2604

*InstCnt += NumOfSelectInsts + /* and */ 1;

2605

}

2606

}

2607

2608

return Res.getNode();

2609

}

2610

2611

SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {

2612

// Fill in BitGroups.

2613

collectBitGroups(LateMask);

2614

if (BitGroups.empty())

2615

return nullptr;

2616

2617

// For 64-bit values, figure out when we can use 32-bit instructions.

2618

if (Bits.size() == 64)

2619

assignRepl32BitGroups();

2620

2621

// Fill in ValueRotsVec.

2622

collectValueRotInfo();

2623

2624

if (Bits.size() == 32) {

2625

return Select32(N, LateMask, InstCnt);

2626

} else {

2627

assert(Bits.size() == 64 && "Not 64 bits here?")(static_cast <bool> (Bits.size() == 64 && "Not 64 bits here?"
) ? void (0) : __assert_fail ("Bits.size() == 64 && \"Not 64 bits here?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2627, __extension__ __PRETTY_FUNCTION__));

2628

return Select64(N, LateMask, InstCnt);

2629

}

2630

2631

return nullptr;

2632

}

2633

2634

void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {

2635

erase_if(BitGroups, F);

2636

}

2637

2638

SmallVector<ValueBit, 64> Bits;

2639

2640

bool NeedMask = false;

2641

SmallVector<unsigned, 64> RLAmt;

2642

2643

SmallVector<BitGroup, 16> BitGroups;

2644

2645

DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;

2646

SmallVector<ValueRotInfo, 16> ValueRotsVec;

2647

2648

SelectionDAG *CurDAG = nullptr;

2649

2650

public:

2651

BitPermutationSelector(SelectionDAG *DAG)

2652

: CurDAG(DAG) {}

2653

2654

// Here we try to match complex bit permutations into a set of

2655

// rotate-and-shift/shift/and/or instructions, using a set of heuristics

2656

// known to produce optimal code for common cases (like i32 byte swapping).

2657

SDNode *Select(SDNode *N) {

2658

Memoizer.clear();

2659

auto Result =

2660

getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());

2661

if (!Result.first)

2662

return nullptr;

2663

Bits = std::move(*Result.second);

2664

2665

LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false)

2666

" selection for: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false);

2667

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(CurDAG); } } while (false);

2668

2669

// Fill it RLAmt and set NeedMask.

2670

computeRotationAmounts();

2671

2672

if (!NeedMask)

2673

return Select(N, false);

2674

2675

// We currently have two techniques for handling results with zeros: early

2676

// masking (the default) and late masking. Late masking is sometimes more

2677

// efficient, but because the structure of the bit groups is different, it

2678

// is hard to tell without generating both and comparing the results. With

2679

// late masking, we ignore zeros in the resulting value when inserting each

2680

// set of bit groups, and then mask in the zeros at the end. With early

2681

// masking, we only insert the non-zero parts of the result at every step.

2682

2683

unsigned InstCnt = 0, InstCntLateMask = 0;

2684

LLVM_DEBUG(dbgs() << "\tEarly masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tEarly masking:\n"; } } while
(false);

2685

SDNode *RN = Select(N, false, &InstCnt);

2686

LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCnt << " instructions\n"; } } while (false);

2687

2688

LLVM_DEBUG(dbgs() << "\tLate masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tLate masking:\n"; } } while
(false);

2689

SDNode *RNLM = Select(N, true, &InstCntLateMask);

2690

LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMaskdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
)

2691

<< " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
);

2692

2693

if (InstCnt <= InstCntLateMask) {

2694

LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tUsing early-masking for isel\n"
; } } while (false);

2695

return RN;

2696

}

2697

2698

LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tUsing late-masking for isel\n"
; } } while (false);

2699

return RNLM;

2700

}

2701

};

2702

2703

class IntegerCompareEliminator {

2704

SelectionDAG *CurDAG;

2705

PPCDAGToDAGISel *S;

2706

// Conversion type for interpreting results of a 32-bit instruction as

2707

// a 64-bit value or vice versa.

2708

enum ExtOrTruncConversion { Ext, Trunc };

2709

2710

// Modifiers to guide how an ISD::SETCC node's result is to be computed

2711

// in a GPR.

2712

// ZExtOrig - use the original condition code, zero-extend value

2713

// ZExtInvert - invert the condition code, zero-extend value

2714

// SExtOrig - use the original condition code, sign-extend value

2715

// SExtInvert - invert the condition code, sign-extend value

2716

enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };

2717

2718

// Comparisons against zero to emit GPR code sequences for. Each of these

2719

// sequences may need to be emitted for two or more equivalent patterns.

2720

// For example (a >= 0) == (a > -1). The direction of the comparison (</>)

2721

// matters as well as the extension type: sext (-1/0), zext (1/0).

2722

// GEZExt - (zext (LHS >= 0))

2723

// GESExt - (sext (LHS >= 0))

2724

// LEZExt - (zext (LHS <= 0))

2725

// LESExt - (sext (LHS <= 0))

2726

enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };

2727

2728

SDNode *tryEXTEND(SDNode *N);

2729

SDNode *tryLogicOpOfCompares(SDNode *N);

2730

SDValue computeLogicOpInGPR(SDValue LogicOp);

2731

SDValue signExtendInputIfNeeded(SDValue Input);

2732

SDValue zeroExtendInputIfNeeded(SDValue Input);

2733

SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);

2734

SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,

2735

ZeroCompare CmpTy);

2736

SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2737

int64_t RHSValue, SDLoc dl);

2738

SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2739

int64_t RHSValue, SDLoc dl);

2740

SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2741

int64_t RHSValue, SDLoc dl);

2742

SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,

2743

int64_t RHSValue, SDLoc dl);

2744

SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);

2745

2746

public:

2747

IntegerCompareEliminator(SelectionDAG *DAG,

2748

PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {

2749

assert(CurDAG->getTargetLoweringInfo()(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2751, __extension__ __PRETTY_FUNCTION__))

2750

.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2751, __extension__ __PRETTY_FUNCTION__))

2751

"Only expecting to use this on 64 bit targets.")(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2751, __extension__ __PRETTY_FUNCTION__));

2752

}

2753

SDNode *Select(SDNode *N) {

2754

if (CmpInGPR == ICGPR_None)

2755

return nullptr;

2756

switch (N->getOpcode()) {

2757

default: break;

2758

case ISD::ZERO_EXTEND:

2759

if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||

2760

CmpInGPR == ICGPR_SextI64)

2761

return nullptr;

2762

LLVM_FALLTHROUGH[[gnu::fallthrough]];

2763

case ISD::SIGN_EXTEND:

2764

if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||

2765

CmpInGPR == ICGPR_ZextI64)

2766

return nullptr;

2767

return tryEXTEND(N);

2768

case ISD::AND:

2769

case ISD::OR:

2770

case ISD::XOR:

2771

return tryLogicOpOfCompares(N);

2772

}

2773

return nullptr;

2774

}

2775

};

2776

2777

static bool isLogicOp(unsigned Opc) {

2778

return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;

2779

}

2780

// The obvious case for wanting to keep the value in a GPR. Namely, the

2781

// result of the comparison is actually needed in a GPR.

2782

SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {

2783

assert((N->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2785, __extension__ __PRETTY_FUNCTION__))

2784

N->getOpcode() == ISD::SIGN_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2785, __extension__ __PRETTY_FUNCTION__))

2785

"Expecting a zero/sign extend node!")(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2785, __extension__ __PRETTY_FUNCTION__));

2786

SDValue WideRes;

2787

// If we are zero-extending the result of a logical operation on i1

2788

// values, we can keep the values in GPRs.

2789

if (isLogicOp(N->getOperand(0).getOpcode()) &&

2790

N->getOperand(0).getValueType() == MVT::i1 &&

2791

N->getOpcode() == ISD::ZERO_EXTEND)

2792

WideRes = computeLogicOpInGPR(N->getOperand(0));

2793

else if (N->getOperand(0).getOpcode() != ISD::SETCC)

2794

return nullptr;

2795

else

2796

WideRes =

2797

getSETCCInGPR(N->getOperand(0),

2798

N->getOpcode() == ISD::SIGN_EXTEND ?

2799

SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);

2800

2801

if (!WideRes)

2802

return nullptr;

2803

2804

SDLoc dl(N);

2805

bool Input32Bit = WideRes.getValueType() == MVT::i32;

2806

bool Output32Bit = N->getValueType(0) == MVT::i32;

2807

2808

NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;

2809

NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;

2810

2811

SDValue ConvOp = WideRes;

2812

if (Input32Bit != Output32Bit)

2813

ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :

2814

ExtOrTruncConversion::Trunc);

2815

return ConvOp.getNode();

2816

}

2817

2818

// Attempt to perform logical operations on the results of comparisons while

2819

// keeping the values in GPRs. Without doing so, these would end up being

2820

// lowered to CR-logical operations which suffer from significant latency and

2821

// low ILP.

2822

SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {

2823

if (N->getValueType(0) != MVT::i1)

2824

return nullptr;

2825

assert(isLogicOp(N->getOpcode()) &&(static_cast <bool> (isLogicOp(N->getOpcode()) &&
"Expected a logic operation on setcc results.") ? void (0) :
__assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2826, __extension__ __PRETTY_FUNCTION__))

2826

"Expected a logic operation on setcc results.")(static_cast <bool> (isLogicOp(N->getOpcode()) &&
"Expected a logic operation on setcc results.") ? void (0) :
__assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2826, __extension__ __PRETTY_FUNCTION__));

2827

SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));

2828

if (!LoweredLogical)

2829

return nullptr;

2830

2831

SDLoc dl(N);

2832

bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;

2833

unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;

2834

SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);

2835

SDValue LHS = LoweredLogical.getOperand(0);

2836

SDValue RHS = LoweredLogical.getOperand(1);

2837

SDValue WideOp;

2838

SDValue OpToConvToRecForm;

2839

2840

// Look through any 32-bit to 64-bit implicit extend nodes to find the

2841

// opcode that is input to the XORI.

2842

if (IsBitwiseNegate &&

2843

LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)

2844

OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);

2845

else if (IsBitwiseNegate)

2846

// If the input to the XORI isn't an extension, that's what we're after.

2847

OpToConvToRecForm = LoweredLogical.getOperand(0);

2848

else

2849

// If this is not an XORI, it is a reg-reg logical op and we can convert

2850

// it to record-form.

2851

OpToConvToRecForm = LoweredLogical;

2852

2853

// Get the record-form version of the node we're looking to use to get the

2854

// CR result from.

2855

uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();

2856

int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);

2857

2858

// Convert the right node to record-form. This is either the logical we're

2859

// looking at or it is the input node to the negation (if we're looking at

2860

// a bitwise negation).

2861

if (NewOpc != -1 && IsBitwiseNegate) {

2862

// The input to the XORI has a record-form. Use it.

2863

assert(LoweredLogical.getConstantOperandVal(1) == 1 &&(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2864, __extension__ __PRETTY_FUNCTION__))

2864

"Expected a PPC::XORI8 only for bitwise negation.")(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2864, __extension__ __PRETTY_FUNCTION__));

2865

// Emit the record-form instruction.

2866

std::vector<SDValue> Ops;

2867

for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)

2868

Ops.push_back(OpToConvToRecForm.getOperand(i));

2869

2870

WideOp =

2871

SDValue(CurDAG->getMachineNode(NewOpc, dl,

2872

OpToConvToRecForm.getValueType(),

2873

MVT::Glue, Ops), 0);

2874

} else {

2875

assert((NewOpc != -1 || !IsBitwiseNegate) &&(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2876, __extension__ __PRETTY_FUNCTION__))

2876

"No record form available for AND8/OR8/XOR8?")(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2876, __extension__ __PRETTY_FUNCTION__));

2877

WideOp =

2878

SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,

2879

dl, MVT::i64, MVT::Glue, LHS, RHS),

2880

0);

2881

}

2882

2883

// Select this node to a single bit from CR0 set by the record-form node

2884

// just created. For bitwise negation, use the EQ bit which is the equivalent

2885

// of negating the result (i.e. it is a bit set when the result of the

2886

// operation is zero).

2887

SDValue SRIdxVal =

2888

CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);

2889

SDValue CRBit =

2890

SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,

2891

MVT::i1, CR0Reg, SRIdxVal,

2892

WideOp.getValue(1)), 0);

2893

return CRBit.getNode();

2894

}

2895

2896

// Lower a logical operation on i1 values into a GPR sequence if possible.

2897

// The result can be kept in a GPR if requested.

2898

// Three types of inputs can be handled:

2899

// - SETCC

2900

// - TRUNCATE

2901

// - Logical operation (AND/OR/XOR)

2902

// There is also a special case that is handled (namely a complement operation

2903

// achieved with xor %a, -1).

2904

SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {

2905

assert(isLogicOp(LogicOp.getOpcode()) &&(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) &&
"Can only handle logic operations here.") ? void (0) : __assert_fail
("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2906, __extension__ __PRETTY_FUNCTION__))

2906

"Can only handle logic operations here.")(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) &&
"Can only handle logic operations here.") ? void (0) : __assert_fail
("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2906, __extension__ __PRETTY_FUNCTION__));

2907

assert(LogicOp.getValueType() == MVT::i1 &&(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2908, __extension__ __PRETTY_FUNCTION__))

2908

"Can only handle logic operations on i1 values here.")(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2908, __extension__ __PRETTY_FUNCTION__));

2909

SDLoc dl(LogicOp);

2910

SDValue LHS, RHS;

2911

2912

// Special case: xor %a, -1

2913

bool IsBitwiseNegation = isBitwiseNot(LogicOp);

2914

2915

// Produces a GPR sequence for each operand of the binary logic operation.

2916

// For SETCC, it produces the respective comparison, for TRUNCATE it truncates

2917

// the value in a GPR and for logic operations, it will recursively produce

2918

// a GPR sequence for the operation.

2919

auto getLogicOperand = [&] (SDValue Operand) -> SDValue {

2920

unsigned OperandOpcode = Operand.getOpcode();

2921

if (OperandOpcode == ISD::SETCC)

2922

return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);

2923

else if (OperandOpcode == ISD::TRUNCATE) {

2924

SDValue InputOp = Operand.getOperand(0);

2925

EVT InVT = InputOp.getValueType();

2926

return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :

2927

PPC::RLDICL, dl, InVT, InputOp,

2928

S->getI64Imm(0, dl),

2929

S->getI64Imm(63, dl)), 0);

2930

} else if (isLogicOp(OperandOpcode))

2931

return computeLogicOpInGPR(Operand);

2932

return SDValue();

2933

};

2934

LHS = getLogicOperand(LogicOp.getOperand(0));

2935

RHS = getLogicOperand(LogicOp.getOperand(1));

2936

2937

// If a GPR sequence can't be produced for the LHS we can't proceed.

2938

// Not producing a GPR sequence for the RHS is only a problem if this isn't

2939

// a bitwise negation operation.

2940

if (!LHS || (!RHS && !IsBitwiseNegation))

2941

return SDValue();

2942

2943

NumLogicOpsOnComparison++;

2944

2945

// We will use the inputs as 64-bit values.

2946

if (LHS.getValueType() == MVT::i32)

2947

LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);

2948

if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)

2949

RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);

2950

2951

unsigned NewOpc;

2952

switch (LogicOp.getOpcode()) {

2953

default: llvm_unreachable("Unknown logic operation.")::llvm::llvm_unreachable_internal("Unknown logic operation.",
"/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2953);

2954

case ISD::AND: NewOpc = PPC::AND8; break;

2955

case ISD::OR: NewOpc = PPC::OR8; break;

2956

case ISD::XOR: NewOpc = PPC::XOR8; break;

2957

}

2958

2959

if (IsBitwiseNegation) {

2960

RHS = S->getI64Imm(1, dl);

2961

NewOpc = PPC::XORI8;

2962

}

2963

2964

return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);

2965

2966

}

2967

2968

/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.

2969

/// Otherwise just reinterpret it as a 64-bit value.

2970

/// Useful when emitting comparison code for 32-bit values without using

2971

/// the compare instruction (which only considers the lower 32-bits).

2972

SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {

2973

assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2974, __extension__ __PRETTY_FUNCTION__))

2974

"Can only sign-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 2974, __extension__ __PRETTY_FUNCTION__));

2975

unsigned Opc = Input.getOpcode();

2976

2977

// The value was sign extended and then truncated to 32-bits. No need to

2978

// sign extend it again.

2979

if (Opc == ISD::TRUNCATE &&

2980

(Input.getOperand(0).getOpcode() == ISD::AssertSext ||

2981

Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))

2982

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

2983

2984

LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);

2985

// The input is a sign-extending load. All ppc sign-extending loads

2986

// sign-extend to the full 64-bits.

2987

if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)

2988

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

2989

2990

ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);

2991

// We don't sign-extend constants.

2992

if (InputConst)

2993

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

2994

2995

SDLoc dl(Input);

2996

SignExtensionsAdded++;

2997

return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,

2998

MVT::i64, Input), 0);

2999

}

3000

3001

/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.

3002

/// Otherwise just reinterpret it as a 64-bit value.

3003

/// Useful when emitting comparison code for 32-bit values without using

3004

/// the compare instruction (which only considers the lower 32-bits).

3005

SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {

3006

assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3007, __extension__ __PRETTY_FUNCTION__))

3007

"Can only zero-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3007, __extension__ __PRETTY_FUNCTION__));

3008

unsigned Opc = Input.getOpcode();

3009

3010

// The only condition under which we can omit the actual extend instruction:

3011

// - The value is a positive constant

3012

// - The value comes from a load that isn't a sign-extending load

3013

// An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.

3014

bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&

3015

(Input.getOperand(0).getOpcode() == ISD::AssertZext ||

3016

Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);

3017

if (IsTruncateOfZExt)

3018

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3019

3020

ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);

3021

if (InputConst && InputConst->getSExtValue() >= 0)

3022

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3023

3024

LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);

3025

// The input is a load that doesn't sign-extend (it will be zero-extended).

3026

if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)

3027

return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);

3028

3029

// None of the above, need to zero-extend.

3030

SDLoc dl(Input);

3031

ZeroExtensionsAdded++;

3032

return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,

3033

S->getI64Imm(0, dl),

3034

S->getI64Imm(32, dl)), 0);

3035

}

3036

3037

// Handle a 32-bit value in a 64-bit register and vice-versa. These are of

3038

// course not actual zero/sign extensions that will generate machine code,

3039

// they're just a way to reinterpret a 32 bit value in a register as a

3040

// 64 bit value and vice-versa.

3041

SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,

3042

ExtOrTruncConversion Conv) {

3043

SDLoc dl(NatWidthRes);

3044

3045

// For reinterpreting 32-bit values as 64 bit values, we generate

3046

// INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>

3047

if (Conv == ExtOrTruncConversion::Ext) {

3048

SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);

3049

SDValue SubRegIdx =

3050

CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

3051

return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,

3052

ImDef, NatWidthRes, SubRegIdx), 0);

3053

}

3054

3055

assert(Conv == ExtOrTruncConversion::Trunc &&(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3056, __extension__ __PRETTY_FUNCTION__))

3056

"Unknown convertion between 32 and 64 bit values.")(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3056, __extension__ __PRETTY_FUNCTION__));

3057

// For reinterpreting 64-bit values as 32-bit values, we just need to

3058

// EXTRACT_SUBREG (i.e. extract the low word).

3059

SDValue SubRegIdx =

3060

CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

3061

return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,

3062

NatWidthRes, SubRegIdx), 0);

3063

}

3064

3065

// Produce a GPR sequence for compound comparisons (<=, >=) against zero.

3066

// Handle both zero-extensions and sign-extensions.

3067

SDValue

3068

IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,

3069

ZeroCompare CmpTy) {

3070

EVT InVT = LHS.getValueType();

3071

bool Is32Bit = InVT == MVT::i32;

3072

SDValue ToExtend;

3073

3074

// Produce the value that needs to be either zero or sign extended.

3075

switch (CmpTy) {

3076

case ZeroCompare::GEZExt:

3077

case ZeroCompare::GESExt:

3078

ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,

3079

dl, InVT, LHS, LHS), 0);

3080

break;

3081

case ZeroCompare::LEZExt:

3082

case ZeroCompare::LESExt: {

3083

if (Is32Bit) {

3084

// Upper 32 bits cannot be undefined for this sequence.

3085

LHS = signExtendInputIfNeeded(LHS);

3086

SDValue Neg =

3087

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3088

ToExtend =

3089

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3090

Neg, S->getI64Imm(1, dl),

3091

S->getI64Imm(63, dl)), 0);

3092

} else {

3093

SDValue Addi =

3094

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3095

S->getI64Imm(~0ULL, dl)), 0);

3096

ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,

3097

Addi, LHS), 0);

3098

}

3099

break;

3100

}

3101

}

3102

3103

// For 64-bit sequences, the extensions are the same for the GE/LE cases.

3104

if (!Is32Bit &&

3105

(CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))

3106

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3107

ToExtend, S->getI64Imm(1, dl),

3108

S->getI64Imm(63, dl)), 0);

3109

if (!Is32Bit &&

3110

(CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))

3111

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,

3112

S->getI64Imm(63, dl)), 0);

3113

3114

assert(Is32Bit && "Should have handled the 32-bit sequences above.")(static_cast <bool> (Is32Bit && "Should have handled the 32-bit sequences above."
) ? void (0) : __assert_fail ("Is32Bit && \"Should have handled the 32-bit sequences above.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3114, __extension__ __PRETTY_FUNCTION__));

3115

// For 32-bit sequences, the extensions differ between GE/LE cases.

3116

switch (CmpTy) {

3117

case ZeroCompare::GEZExt: {

3118

SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),

3119

S->getI32Imm(31, dl) };

3120

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3121

ShiftOps), 0);

3122

}

3123

case ZeroCompare::GESExt:

3124

return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,

3125

S->getI32Imm(31, dl)), 0);

3126

case ZeroCompare::LEZExt:

3127

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,

3128

S->getI32Imm(1, dl)), 0);

3129

case ZeroCompare::LESExt:

3130

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,

3131

S->getI32Imm(-1, dl)), 0);

3132

}

3133

3134

// The above case covers all the enumerators so it can't have a default clause

3135

// to avoid compiler warnings.

3136

llvm_unreachable("Unknown zero-comparison type.")::llvm::llvm_unreachable_internal("Unknown zero-comparison type."
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3136);

3137

}

3138

3139

/// Produces a zero-extended result of comparing two 32-bit values according to

3140

/// the passed condition code.

3141

SDValue

3142

IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,

3143

ISD::CondCode CC,

3144

int64_t RHSValue, SDLoc dl) {

3145

if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||

3146

CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)

3147

return SDValue();

3148

bool IsRHSZero = RHSValue == 0;

3149

bool IsRHSOne = RHSValue == 1;

3150

bool IsRHSNegOne = RHSValue == -1LL;

3151

switch (CC) {

3152

default: return SDValue();

3153

case ISD::SETEQ: {

3154

// (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)

3155

// (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)

3156

SDValue Xor = IsRHSZero ? LHS :

3157

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3158

SDValue Clz =

3159

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3160

SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),

3161

S->getI32Imm(31, dl) };

3162

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3163

ShiftOps), 0);

3164

}

3165

case ISD::SETNE: {

3166

// (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)

3167

// (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)

3168

SDValue Xor = IsRHSZero ? LHS :

3169

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3170

SDValue Clz =

3171

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3172

SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),

3173

S->getI32Imm(31, dl) };

3174

SDValue Shift =

3175

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);

3176

return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,

3177

S->getI32Imm(1, dl)), 0);

3178

}

3179

case ISD::SETGE: {

3180

// (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)

3181

// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)

3182

if(IsRHSZero)

3183

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3184

3185

// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)

3186

// by swapping inputs and falling through.

3187

std::swap(LHS, RHS);

3188

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3189

IsRHSZero = RHSConst && RHSConst->isNullValue();

3190

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3191

}

3192

case ISD::SETLE: {

3193

if (CmpInGPR == ICGPR_NonExtIn)

3194

return SDValue();

3195

// (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)

3196

// (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)

3197

if(IsRHSZero) {

3198

if (CmpInGPR == ICGPR_NonExtIn)

3199

return SDValue();

3200

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3201

}

3202

3203

// The upper 32-bits of the register can't be undefined for this sequence.

3204

LHS = signExtendInputIfNeeded(LHS);

3205

RHS = signExtendInputIfNeeded(RHS);

3206

SDValue Sub =

3207

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3208

SDValue Shift =

3209

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,

3210

S->getI64Imm(1, dl), S->getI64Imm(63, dl)),

3211

0);

3212

return

3213

SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,

3214

MVT::i64, Shift, S->getI32Imm(1, dl)), 0);

3215

}

3216

case ISD::SETGT: {

3217

// (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)

3218

// (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)

3219

// (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)

3220

// Handle SETLT -1 (which is equivalent to SETGE 0).

3221

if (IsRHSNegOne)

3222

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3223

3224

if (IsRHSZero) {

3225

if (CmpInGPR == ICGPR_NonExtIn)

3226

return SDValue();

3227

// The upper 32-bits of the register can't be undefined for this sequence.

3228

LHS = signExtendInputIfNeeded(LHS);

3229

RHS = signExtendInputIfNeeded(RHS);

3230

SDValue Neg =

3231

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3232

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3233

Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);

3234

}

3235

// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as

3236

// (%b < %a) by swapping inputs and falling through.

3237

std::swap(LHS, RHS);

3238

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3239

IsRHSZero = RHSConst && RHSConst->isNullValue();

3240

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3241

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3242

}

3243

case ISD::SETLT: {

3244

// (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)

3245

// (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)

3246

// (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)

3247

// Handle SETLT 1 (which is equivalent to SETLE 0).

3248

if (IsRHSOne) {

3249

if (CmpInGPR == ICGPR_NonExtIn)

3250

return SDValue();

3251

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3252

}

3253

3254

if (IsRHSZero) {

3255

SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),

3256

S->getI32Imm(31, dl) };

3257

return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

3258

ShiftOps), 0);

3259

}

3260

3261

if (CmpInGPR == ICGPR_NonExtIn)

3262

return SDValue();

3263

// The upper 32-bits of the register can't be undefined for this sequence.

3264

LHS = signExtendInputIfNeeded(LHS);

3265

RHS = signExtendInputIfNeeded(RHS);

3266

SDValue SUBFNode =

3267

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3268

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3269

SUBFNode, S->getI64Imm(1, dl),

3270

S->getI64Imm(63, dl)), 0);

3271

}

3272

case ISD::SETUGE:

3273

// (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)

3274

// (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)

3275

std::swap(LHS, RHS);

3276

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3277

case ISD::SETULE: {

3278

if (CmpInGPR == ICGPR_NonExtIn)

3279

return SDValue();

3280

// The upper 32-bits of the register can't be undefined for this sequence.

3281

LHS = zeroExtendInputIfNeeded(LHS);

3282

RHS = zeroExtendInputIfNeeded(RHS);

3283

SDValue Subtract =

3284

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3285

SDValue SrdiNode =

3286

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3287

Subtract, S->getI64Imm(1, dl),

3288

S->getI64Imm(63, dl)), 0);

3289

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,

3290

S->getI32Imm(1, dl)), 0);

3291

}

3292

case ISD::SETUGT:

3293

// (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)

3294

// (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)

3295

std::swap(LHS, RHS);

3296

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3297

case ISD::SETULT: {

3298

if (CmpInGPR == ICGPR_NonExtIn)

3299

return SDValue();

3300

// The upper 32-bits of the register can't be undefined for this sequence.

3301

LHS = zeroExtendInputIfNeeded(LHS);

3302

RHS = zeroExtendInputIfNeeded(RHS);

3303

SDValue Subtract =

3304

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3305

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3306

Subtract, S->getI64Imm(1, dl),

3307

S->getI64Imm(63, dl)), 0);

3308

}

3309

}

3310

}

3311

3312

/// Produces a sign-extended result of comparing two 32-bit values according to

3313

/// the passed condition code.

3314

SDValue

3315

IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,

3316

ISD::CondCode CC,

3317

int64_t RHSValue, SDLoc dl) {

3318

if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||

3319

CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)

3320

return SDValue();

3321

bool IsRHSZero = RHSValue == 0;

3322

bool IsRHSOne = RHSValue == 1;

3323

bool IsRHSNegOne = RHSValue == -1LL;

3324

3325

switch (CC) {

3326

default: return SDValue();

3327

case ISD::SETEQ: {

3328

// (sext (setcc %a, %b, seteq)) ->

3329

// (ashr (shl (ctlz (xor %a, %b)), 58), 63)

3330

// (sext (setcc %a, 0, seteq)) ->

3331

// (ashr (shl (ctlz %a), 58), 63)

3332

SDValue CountInput = IsRHSZero ? LHS :

3333

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3334

SDValue Cntlzw =

3335

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);

3336

SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),

3337

S->getI32Imm(5, dl), S->getI32Imm(31, dl) };

3338

SDValue Slwi =

3339

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);

3340

return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);

3341

}

3342

case ISD::SETNE: {

3343

// Bitwise xor the operands, count leading zeros, shift right by 5 bits and

3344

// flip the bit, finally take 2's complement.

3345

// (sext (setcc %a, %b, setne)) ->

3346

// (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))

3347

// Same as above, but the first xor is not needed.

3348

// (sext (setcc %a, 0, setne)) ->

3349

// (neg (xor (lshr (ctlz %a), 5), 1))

3350

SDValue Xor = IsRHSZero ? LHS :

3351

SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);

3352

SDValue Clz =

3353

SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);

3354

SDValue ShiftOps[] =

3355

{ Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };

3356

SDValue Shift =

3357

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);

3358

SDValue Xori =

3359

SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,

3360

S->getI32Imm(1, dl)), 0);

3361

return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);

3362

}

3363

case ISD::SETGE: {

3364

// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)

3365

// (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)

3366

if (IsRHSZero)

3367

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3368

3369

// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)

3370

// by swapping inputs and falling through.

3371

std::swap(LHS, RHS);

3372

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3373

IsRHSZero = RHSConst && RHSConst->isNullValue();

3374

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3375

}

3376

case ISD::SETLE: {

3377

if (CmpInGPR == ICGPR_NonExtIn)

3378

return SDValue();

3379

// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)

3380

// (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)

3381

if (IsRHSZero)

3382

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3383

3384

// The upper 32-bits of the register can't be undefined for this sequence.

3385

LHS = signExtendInputIfNeeded(LHS);

3386

RHS = signExtendInputIfNeeded(RHS);

3387

SDValue SUBFNode =

3388

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,

3389

LHS, RHS), 0);

3390

SDValue Srdi =

3391

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3392

SUBFNode, S->getI64Imm(1, dl),

3393

S->getI64Imm(63, dl)), 0);

3394

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,

3395

S->getI32Imm(-1, dl)), 0);

3396

}

3397

case ISD::SETGT: {

3398

// (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)

3399

// (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)

3400

// (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)

3401

if (IsRHSNegOne)

3402

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3403

if (IsRHSZero) {

3404

if (CmpInGPR == ICGPR_NonExtIn)

3405

return SDValue();

3406

// The upper 32-bits of the register can't be undefined for this sequence.

3407

LHS = signExtendInputIfNeeded(LHS);

3408

RHS = signExtendInputIfNeeded(RHS);

3409

SDValue Neg =

3410

SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);

3411

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,

3412

S->getI64Imm(63, dl)), 0);

3413

}

3414

// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as

3415

// (%b < %a) by swapping inputs and falling through.

3416

std::swap(LHS, RHS);

3417

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3418

IsRHSZero = RHSConst && RHSConst->isNullValue();

3419

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3420

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3421

}

3422

case ISD::SETLT: {

3423

// (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)

3424

// (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)

3425

// (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)

3426

if (IsRHSOne) {

3427

if (CmpInGPR == ICGPR_NonExtIn)

3428

return SDValue();

3429

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3430

}

3431

if (IsRHSZero)

3432

return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,

3433

S->getI32Imm(31, dl)), 0);

3434

3435

if (CmpInGPR == ICGPR_NonExtIn)

3436

return SDValue();

3437

// The upper 32-bits of the register can't be undefined for this sequence.

3438

LHS = signExtendInputIfNeeded(LHS);

3439

RHS = signExtendInputIfNeeded(RHS);

3440

SDValue SUBFNode =

3441

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3442

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3443

SUBFNode, S->getI64Imm(63, dl)), 0);

3444

}

3445

case ISD::SETUGE:

3446

// (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)

3447

// (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)

3448

std::swap(LHS, RHS);

3449

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3450

case ISD::SETULE: {

3451

if (CmpInGPR == ICGPR_NonExtIn)

3452

return SDValue();

3453

// The upper 32-bits of the register can't be undefined for this sequence.

3454

LHS = zeroExtendInputIfNeeded(LHS);

3455

RHS = zeroExtendInputIfNeeded(RHS);

3456

SDValue Subtract =

3457

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);

3458

SDValue Shift =

3459

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,

3460

S->getI32Imm(1, dl), S->getI32Imm(63,dl)),

3461

0);

3462

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,

3463

S->getI32Imm(-1, dl)), 0);

3464

}

3465

case ISD::SETUGT:

3466

// (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)

3467

// (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)

3468

std::swap(LHS, RHS);

3469

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3470

case ISD::SETULT: {

3471

if (CmpInGPR == ICGPR_NonExtIn)

3472

return SDValue();

3473

// The upper 32-bits of the register can't be undefined for this sequence.

3474

LHS = zeroExtendInputIfNeeded(LHS);

3475

RHS = zeroExtendInputIfNeeded(RHS);

3476

SDValue Subtract =

3477

SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);

3478

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3479

Subtract, S->getI64Imm(63, dl)), 0);

3480

}

3481

}

3482

}

3483

3484

/// Produces a zero-extended result of comparing two 64-bit values according to

3485

/// the passed condition code.

3486

SDValue

3487

IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,

3488

ISD::CondCode CC,

3489

int64_t RHSValue, SDLoc dl) {

3490

if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||

3491

CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)

3492

return SDValue();

3493

bool IsRHSZero = RHSValue == 0;

3494

bool IsRHSOne = RHSValue == 1;

3495

bool IsRHSNegOne = RHSValue == -1LL;

3496

switch (CC) {

3497

default: return SDValue();

3498

case ISD::SETEQ: {

3499

// (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)

3500

// (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)

3501

SDValue Xor = IsRHSZero ? LHS :

3502

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3503

SDValue Clz =

3504

SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);

3505

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,

3506

S->getI64Imm(58, dl),

3507

S->getI64Imm(63, dl)), 0);

3508

}

3509

case ISD::SETNE: {

3510

// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)

3511

// (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)

3512

// {addcz.reg, addcz.CA} = (addcarry %a, -1)

3513

// (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)

3514

SDValue Xor = IsRHSZero ? LHS :

3515

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3516

SDValue AC =

3517

SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,

3518

Xor, S->getI32Imm(~0U, dl)), 0);

3519

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,

3520

Xor, AC.getValue(1)), 0);

3521

}

3522

case ISD::SETGE: {

3523

// {subc.reg, subc.CA} = (subcarry %a, %b)

3524

// (zext (setcc %a, %b, setge)) ->

3525

// (adde (lshr %b, 63), (ashr %a, 63), subc.CA)

3526

// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)

3527

if (IsRHSZero)

3528

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3529

std::swap(LHS, RHS);

3530

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3531

IsRHSZero = RHSConst && RHSConst->isNullValue();

3532

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3533

}

3534

case ISD::SETLE: {

3535

// {subc.reg, subc.CA} = (subcarry %b, %a)

3536

// (zext (setcc %a, %b, setge)) ->

3537

// (adde (lshr %a, 63), (ashr %b, 63), subc.CA)

3538

// (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)

3539

if (IsRHSZero)

3540

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3541

SDValue ShiftL =

3542

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3543

S->getI64Imm(1, dl),

3544

S->getI64Imm(63, dl)), 0);

3545

SDValue ShiftR =

3546

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,

3547

S->getI64Imm(63, dl)), 0);

3548

SDValue SubtractCarry =

3549

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3550

LHS, RHS), 1);

3551

return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3552

ShiftR, ShiftL, SubtractCarry), 0);

3553

}

3554

case ISD::SETGT: {

3555

// {subc.reg, subc.CA} = (subcarry %b, %a)

3556

// (zext (setcc %a, %b, setgt)) ->

3557

// (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)

3558

// (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)

3559

if (IsRHSNegOne)

3560

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);

3561

if (IsRHSZero) {

3562

SDValue Addi =

3563

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3564

S->getI64Imm(~0ULL, dl)), 0);

3565

SDValue Nor =

3566

SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);

3567

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,

3568

S->getI64Imm(1, dl),

3569

S->getI64Imm(63, dl)), 0);

3570

}

3571

std::swap(LHS, RHS);

3572

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3573

IsRHSZero = RHSConst && RHSConst->isNullValue();

3574

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3575

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3576

}

3577

case ISD::SETLT: {

3578

// {subc.reg, subc.CA} = (subcarry %a, %b)

3579

// (zext (setcc %a, %b, setlt)) ->

3580

// (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)

3581

// (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)

3582

if (IsRHSOne)

3583

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);

3584

if (IsRHSZero)

3585

return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3586

S->getI64Imm(1, dl),

3587

S->getI64Imm(63, dl)), 0);

3588

SDValue SRADINode =

3589

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3590

LHS, S->getI64Imm(63, dl)), 0);

3591

SDValue SRDINode =

3592

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3593

RHS, S->getI64Imm(1, dl),

3594

S->getI64Imm(63, dl)), 0);

3595

SDValue SUBFC8Carry =

3596

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3597

RHS, LHS), 1);

3598

SDValue ADDE8Node =

3599

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3600

SRDINode, SRADINode, SUBFC8Carry), 0);

3601

return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

3602

ADDE8Node, S->getI64Imm(1, dl)), 0);

3603

}

3604

case ISD::SETUGE:

3605

// {subc.reg, subc.CA} = (subcarry %a, %b)

3606

// (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)

3607

std::swap(LHS, RHS);

3608

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3609

case ISD::SETULE: {

3610

// {subc.reg, subc.CA} = (subcarry %b, %a)

3611

// (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)

3612

SDValue SUBFC8Carry =

3613

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3614

LHS, RHS), 1);

3615

SDValue SUBFE8Node =

3616

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,

3617

LHS, LHS, SUBFC8Carry), 0);

3618

return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,

3619

SUBFE8Node, S->getI64Imm(1, dl)), 0);

3620

}

3621

case ISD::SETUGT:

3622

// {subc.reg, subc.CA} = (subcarry %b, %a)

3623

// (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)

3624

std::swap(LHS, RHS);

3625

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3626

case ISD::SETULT: {

3627

// {subc.reg, subc.CA} = (subcarry %a, %b)

3628

// (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)

3629

SDValue SubtractCarry =

3630

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3631

RHS, LHS), 1);

3632

SDValue ExtSub =

3633

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,

3634

LHS, LHS, SubtractCarry), 0);

3635

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,

3636

ExtSub), 0);

3637

}

3638

}

3639

}

3640

3641

/// Produces a sign-extended result of comparing two 64-bit values according to

3642

/// the passed condition code.

3643

SDValue

3644

IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,

3645

ISD::CondCode CC,

3646

int64_t RHSValue, SDLoc dl) {

3647

if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||

3648

CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)

3649

return SDValue();

3650

bool IsRHSZero = RHSValue == 0;

3651

bool IsRHSOne = RHSValue == 1;

3652

bool IsRHSNegOne = RHSValue == -1LL;

3653

switch (CC) {

3654

default: return SDValue();

3655

case ISD::SETEQ: {

3656

// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)

3657

// (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)

3658

// {addcz.reg, addcz.CA} = (addcarry %a, -1)

3659

// (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)

3660

SDValue AddInput = IsRHSZero ? LHS :

3661

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3662

SDValue Addic =

3663

SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,

3664

AddInput, S->getI32Imm(~0U, dl)), 0);

3665

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,

3666

Addic, Addic.getValue(1)), 0);

3667

}

3668

case ISD::SETNE: {

3669

// {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))

3670

// (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)

3671

// {subfcz.reg, subfcz.CA} = (subcarry 0, %a)

3672

// (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)

3673

SDValue Xor = IsRHSZero ? LHS :

3674

SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);

3675

SDValue SC =

3676

SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,

3677

Xor, S->getI32Imm(0, dl)), 0);

3678

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,

3679

SC, SC.getValue(1)), 0);

3680

}

3681

case ISD::SETGE: {

3682

// {subc.reg, subc.CA} = (subcarry %a, %b)

3683

// (zext (setcc %a, %b, setge)) ->

3684

// (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))

3685

// (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))

3686

if (IsRHSZero)

3687

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3688

std::swap(LHS, RHS);

3689

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3690

IsRHSZero = RHSConst && RHSConst->isNullValue();

3691

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3692

}

3693

case ISD::SETLE: {

3694

// {subc.reg, subc.CA} = (subcarry %b, %a)

3695

// (zext (setcc %a, %b, setge)) ->

3696

// (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))

3697

// (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)

3698

if (IsRHSZero)

3699

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3700

SDValue ShiftR =

3701

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,

3702

S->getI64Imm(63, dl)), 0);

3703

SDValue ShiftL =

3704

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,

3705

S->getI64Imm(1, dl),

3706

S->getI64Imm(63, dl)), 0);

3707

SDValue SubtractCarry =

3708

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3709

LHS, RHS), 1);

3710

SDValue Adde =

3711

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,

3712

ShiftR, ShiftL, SubtractCarry), 0);

3713

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);

3714

}

3715

case ISD::SETGT: {

3716

// {subc.reg, subc.CA} = (subcarry %b, %a)

3717

// (zext (setcc %a, %b, setgt)) ->

3718

// -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)

3719

// (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)

3720

if (IsRHSNegOne)

3721

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);

3722

if (IsRHSZero) {

3723

SDValue Add =

3724

SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,

3725

S->getI64Imm(-1, dl)), 0);

3726

SDValue Nor =

3727

SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);

3728

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,

3729

S->getI64Imm(63, dl)), 0);

3730

}

3731

std::swap(LHS, RHS);

3732

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3733

IsRHSZero = RHSConst && RHSConst->isNullValue();

3734

IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;

3735

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3736

}

3737

case ISD::SETLT: {

3738

// {subc.reg, subc.CA} = (subcarry %a, %b)

3739

// (zext (setcc %a, %b, setlt)) ->

3740

// -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)

3741

// (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)

3742

if (IsRHSOne)

3743

return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);

3744

if (IsRHSZero) {

3745

return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,

3746

S->getI64Imm(63, dl)), 0);

3747

}

3748

SDValue SRADINode =

3749

SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,

3750

LHS, S->getI64Imm(63, dl)), 0);

3751

SDValue SRDINode =

3752

SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,

3753

RHS, S->getI64Imm(1, dl),

3754

S->getI64Imm(63, dl)), 0);

3755

SDValue SUBFC8Carry =

3756

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3757

RHS, LHS), 1);

3758

SDValue ADDE8Node =

3759

SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,

3760

SRDINode, SRADINode, SUBFC8Carry), 0);

3761

SDValue XORI8Node =

3762

SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

3763

ADDE8Node, S->getI64Imm(1, dl)), 0);

3764

return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,

3765

XORI8Node), 0);

3766

}

3767

case ISD::SETUGE:

3768

// {subc.reg, subc.CA} = (subcarry %a, %b)

3769

// (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)

3770

std::swap(LHS, RHS);

3771

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3772

case ISD::SETULE: {

3773

// {subc.reg, subc.CA} = (subcarry %b, %a)

3774

// (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)

3775

SDValue SubtractCarry =

3776

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3777

LHS, RHS), 1);

3778

SDValue ExtSub =

3779

SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,

3780

LHS, SubtractCarry), 0);

3781

return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,

3782

ExtSub, ExtSub), 0);

3783

}

3784

case ISD::SETUGT:

3785

// {subc.reg, subc.CA} = (subcarry %b, %a)

3786

// (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)

3787

std::swap(LHS, RHS);

3788

LLVM_FALLTHROUGH[[gnu::fallthrough]];

3789

case ISD::SETULT: {

3790

// {subc.reg, subc.CA} = (subcarry %a, %b)

3791

// (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)

3792

SDValue SubCarry =

3793

SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,

3794

RHS, LHS), 1);

3795

return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,

3796

LHS, LHS, SubCarry), 0);

3797

}

3798

}

3799

}

3800

3801

/// Do all uses of this SDValue need the result in a GPR?

3802

/// This is meant to be used on values that have type i1 since

3803

/// it is somewhat meaningless to ask if values of other types

3804

/// should be kept in GPR's.

3805

static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {

3806

assert(Compare.getOpcode() == ISD::SETCC &&(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3807, __extension__ __PRETTY_FUNCTION__))

3807

"An ISD::SETCC node required here.")(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3807, __extension__ __PRETTY_FUNCTION__));

3808

3809

// For values that have a single use, the caller should obviously already have

3810

// checked if that use is an extending use. We check the other uses here.

3811

if (Compare.hasOneUse())

3812

return true;

3813

// We want the value in a GPR if it is being extended, used for a select, or

3814

// used in logical operations.

3815

for (auto CompareUse : Compare.getNode()->uses())

3816

if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&

3817

CompareUse->getOpcode() != ISD::ZERO_EXTEND &&

3818

CompareUse->getOpcode() != ISD::SELECT &&

3819

!isLogicOp(CompareUse->getOpcode())) {

3820

OmittedForNonExtendUses++;

3821

return false;

3822

}

3823

return true;

3824

}

3825

3826

/// Returns an equivalent of a SETCC node but with the result the same width as

3827

/// the inputs. This can also be used for SELECT_CC if either the true or false

3828

/// values is a power of two while the other is zero.

3829

SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,

3830

SetccInGPROpts ConvOpts) {

3831

assert((Compare.getOpcode() == ISD::SETCC ||(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3833, __extension__ __PRETTY_FUNCTION__))

3832

Compare.getOpcode() == ISD::SELECT_CC) &&(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3833, __extension__ __PRETTY_FUNCTION__))

3833

"An ISD::SETCC node required here.")(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 3833, __extension__ __PRETTY_FUNCTION__));

3834

3835

// Don't convert this comparison to a GPR sequence because there are uses

3836

// of the i1 result (i.e. uses that require the result in the CR).

3837

if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))

3838

return SDValue();

3839

3840

SDValue LHS = Compare.getOperand(0);

3841

SDValue RHS = Compare.getOperand(1);

3842

3843

// The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.

3844

int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;

3845

ISD::CondCode CC =

3846

cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();

3847

EVT InputVT = LHS.getValueType();

3848

if (InputVT != MVT::i32 && InputVT != MVT::i64)

3849

return SDValue();

3850

3851

if (ConvOpts == SetccInGPROpts::ZExtInvert ||

3852

ConvOpts == SetccInGPROpts::SExtInvert)

3853

CC = ISD::getSetCCInverse(CC, InputVT);

3854

3855

bool Inputs32Bit = InputVT == MVT::i32;

3856

3857

SDLoc dl(Compare);

3858

ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);

3859

int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX(9223372036854775807L);

3860

bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||

3861

ConvOpts == SetccInGPROpts::SExtInvert;

3862

3863

if (IsSext && Inputs32Bit)

3864

return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);

3865

else if (Inputs32Bit)

3866

return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);

3867

else if (IsSext)

3868

return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);

3869

return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);

3870

}

3871

3872

} // end anonymous namespace

3873

3874

bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {

3875

if (N->getValueType(0) != MVT::i32 &&

3876

N->getValueType(0) != MVT::i64)

3877

return false;

3878

3879

// This optimization will emit code that assumes 64-bit registers

3880

// so we don't want to run it in 32-bit mode. Also don't run it

3881

// on functions that are not to be optimized.

3882

if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())

3883

return false;

3884

3885

// For POWER10, it is more profitable to use the set boolean extension

3886

// instructions rather than the integer compare elimination codegen.

3887

// Users can override this via the command line option, `--ppc-gpr-icmps`.

3888

if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())

3889

return false;

3890

3891

switch (N->getOpcode()) {

3892

default: break;

3893

case ISD::ZERO_EXTEND:

3894

case ISD::SIGN_EXTEND:

3895

case ISD::AND:

3896

case ISD::OR:

3897

case ISD::XOR: {

3898

IntegerCompareEliminator ICmpElim(CurDAG, this);

3899

if (SDNode *New = ICmpElim.Select(N)) {

3900

ReplaceNode(N, New);

3901

return true;

3902

}

3903

}

3904

}

3905

return false;

3906

}

3907

3908

bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {

3909

if (N->getValueType(0) != MVT::i32 &&

3910

N->getValueType(0) != MVT::i64)

3911

return false;

3912

3913

if (!UseBitPermRewriter)

3914

return false;

3915

3916

switch (N->getOpcode()) {

3917

default: break;

3918

case ISD::ROTL:

3919

case ISD::SHL:

3920

case ISD::SRL:

3921

case ISD::AND:

3922

case ISD::OR: {

3923

BitPermutationSelector BPS(CurDAG);

3924

if (SDNode *New = BPS.Select(N)) {

3925

ReplaceNode(N, New);

3926

return true;

3927

}

3928

return false;

3929

}

3930

}

3931

3932

return false;

3933

}

3934

3935

/// SelectCC - Select a comparison of the specified values with the specified

3936

/// condition code, returning the CR# of the expression.

3937

SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,

3938

const SDLoc &dl, SDValue Chain) {

3939

// Always select the LHS.

3940

unsigned Opc;

3941

3942

if (LHS.getValueType() == MVT::i32) {

3943

unsigned Imm;

3944

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

3945

if (isInt32Immediate(RHS, Imm)) {

3946

// SETEQ/SETNE comparison with 16-bit immediate, fold it.

3947

if (isUInt<16>(Imm))

3948

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,

3949

getI32Imm(Imm & 0xFFFF, dl)),

3950

0);

3951

// If this is a 16-bit signed immediate, fold it.

3952

if (isInt<16>((int)Imm))

3953

return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,

3954

getI32Imm(Imm & 0xFFFF, dl)),

3955

0);

3956

3957

// For non-equality comparisons, the default code would materialize the

3958

// constant, then compare against it, like this:

3959

// lis r2, 4660

3960

// ori r2, r2, 22136

3961

// cmpw cr0, r3, r2

3962

// Since we are just comparing for equality, we can emit this instead:

3963

// xoris r0,r3,0x1234

3964

// cmplwi cr0,r0,0x5678

3965

// beq cr0,L6

3966

SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,

3967

getI32Imm(Imm >> 16, dl)), 0);

3968

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,

3969

getI32Imm(Imm & 0xFFFF, dl)), 0);

3970

}

3971

Opc = PPC::CMPLW;

3972

} else if (ISD::isUnsignedIntSetCC(CC)) {

3973

if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))

3974

return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,

3975

getI32Imm(Imm & 0xFFFF, dl)), 0);

3976

Opc = PPC::CMPLW;

3977

} else {

3978

int16_t SImm;

3979

if (isIntS16Immediate(RHS, SImm))

3980

return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,

3981

getI32Imm((int)SImm & 0xFFFF,

3982

dl)),

3983

0);

3984

Opc = PPC::CMPW;

3985

}

3986

} else if (LHS.getValueType() == MVT::i64) {

3987

uint64_t Imm;

3988

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

3989

if (isInt64Immediate(RHS.getNode(), Imm)) {

3990

// SETEQ/SETNE comparison with 16-bit immediate, fold it.

3991

if (isUInt<16>(Imm))

3992

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,

3993

getI32Imm(Imm & 0xFFFF, dl)),

3994

0);

3995

// If this is a 16-bit signed immediate, fold it.

3996

if (isInt<16>(Imm))

3997

return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,

3998

getI32Imm(Imm & 0xFFFF, dl)),

3999

0);

4000

4001

// For non-equality comparisons, the default code would materialize the

4002

// constant, then compare against it, like this:

4003

// lis r2, 4660

4004

// ori r2, r2, 22136

4005

// cmpd cr0, r3, r2

4006

// Since we are just comparing for equality, we can emit this instead:

4007

// xoris r0,r3,0x1234

4008

// cmpldi cr0,r0,0x5678

4009

// beq cr0,L6

4010

if (isUInt<32>(Imm)) {

4011

SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,

4012

getI64Imm(Imm >> 16, dl)), 0);

4013

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,

4014

getI64Imm(Imm & 0xFFFF, dl)),

4015

0);

4016

}

4017

}

4018

Opc = PPC::CMPLD;

4019

} else if (ISD::isUnsignedIntSetCC(CC)) {

4020

if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))

4021

return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,

4022

getI64Imm(Imm & 0xFFFF, dl)), 0);

4023

Opc = PPC::CMPLD;

4024

} else {

4025

int16_t SImm;

4026

if (isIntS16Immediate(RHS, SImm))

4027

return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,

4028

getI64Imm(SImm & 0xFFFF, dl)),

4029

0);

4030

Opc = PPC::CMPD;

4031

}

4032

} else if (LHS.getValueType() == MVT::f32) {

4033

if (Subtarget->hasSPE()) {

4034

switch (CC) {

4035

default:

4036

case ISD::SETEQ:

4037

case ISD::SETNE:

4038

Opc = PPC::EFSCMPEQ;

4039

break;

4040

case ISD::SETLT:

4041

case ISD::SETGE:

4042

case ISD::SETOLT:

4043

case ISD::SETOGE:

4044

case ISD::SETULT:

4045

case ISD::SETUGE:

4046

Opc = PPC::EFSCMPLT;

4047

break;

4048

case ISD::SETGT:

4049

case ISD::SETLE:

4050

case ISD::SETOGT:

4051

case ISD::SETOLE:

4052

case ISD::SETUGT:

4053

case ISD::SETULE:

4054

Opc = PPC::EFSCMPGT;

4055

break;

4056

}

4057

} else

4058

Opc = PPC::FCMPUS;

4059

} else if (LHS.getValueType() == MVT::f64) {

4060

if (Subtarget->hasSPE()) {

4061

switch (CC) {

4062

default:

4063

case ISD::SETEQ:

4064

case ISD::SETNE:

4065

Opc = PPC::EFDCMPEQ;

4066

break;

4067

case ISD::SETLT:

4068

case ISD::SETGE:

4069

case ISD::SETOLT:

4070

case ISD::SETOGE:

4071

case ISD::SETULT:

4072

case ISD::SETUGE:

4073

Opc = PPC::EFDCMPLT;

4074

break;

4075

case ISD::SETGT:

4076

case ISD::SETLE:

4077

case ISD::SETOGT:

4078

case ISD::SETOLE:

4079

case ISD::SETUGT:

4080

case ISD::SETULE:

4081

Opc = PPC::EFDCMPGT;

4082

break;

4083

}

4084

} else

4085

Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;

4086

} else {

4087

assert(LHS.getValueType() == MVT::f128 && "Unknown vt!")(static_cast <bool> (LHS.getValueType() == MVT::f128 &&
"Unknown vt!") ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f128 && \"Unknown vt!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4087, __extension__ __PRETTY_FUNCTION__));

4088

assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector")(static_cast <bool> (Subtarget->hasP9Vector() &&
"XSCMPUQP requires Power9 Vector") ? void (0) : __assert_fail
("Subtarget->hasP9Vector() && \"XSCMPUQP requires Power9 Vector\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4088, __extension__ __PRETTY_FUNCTION__));

4089

Opc = PPC::XSCMPUQP;

4090

}

4091

if (Chain)

4092

return SDValue(

4093

CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),

4094

0);

4095

else

4096

return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);

4097

}

4098

4099

static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,

4100

const PPCSubtarget *Subtarget) {

4101

// For SPE instructions, the result is in GT bit of the CR

4102

bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();

4103

4104

switch (CC) {

4105

case ISD::SETUEQ:

4106

case ISD::SETONE:

4107

case ISD::SETOLE:

4108

case ISD::SETOGE:

4109

llvm_unreachable("Should be lowered by legalize!")::llvm::llvm_unreachable_internal("Should be lowered by legalize!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4109);

4110

default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4110);

4111

case ISD::SETOEQ:

4112

case ISD::SETEQ:

4113

return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;

4114

case ISD::SETUNE:

4115

case ISD::SETNE:

4116

return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;

4117

case ISD::SETOLT:

4118

case ISD::SETLT:

4119

return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;

4120

case ISD::SETULE:

4121

case ISD::SETLE:

4122

return PPC::PRED_LE;

4123

case ISD::SETOGT:

4124

case ISD::SETGT:

4125

return PPC::PRED_GT;

4126

case ISD::SETUGE:

4127

case ISD::SETGE:

4128

return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;

4129

case ISD::SETO: return PPC::PRED_NU;

4130

case ISD::SETUO: return PPC::PRED_UN;

4131

// These two are invalid for floating point. Assume we have int.

4132

case ISD::SETULT: return PPC::PRED_LT;

4133

case ISD::SETUGT: return PPC::PRED_GT;

4134

}

4135

}

4136

4137

/// getCRIdxForSetCC - Return the index of the condition register field

4138

/// associated with the SetCC condition, and whether or not the field is

4139

/// treated as inverted. That is, lt = 0; ge = 0 inverted.

4140

static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {

4141

Invert = false;

4142

switch (CC) {

4143

default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4143);

4144

case ISD::SETOLT:

4145

case ISD::SETLT: return 0; // Bit #0 = SETOLT

4146

case ISD::SETOGT:

4147

case ISD::SETGT: return 1; // Bit #1 = SETOGT

4148

case ISD::SETOEQ:

4149

case ISD::SETEQ: return 2; // Bit #2 = SETOEQ

4150

case ISD::SETUO: return 3; // Bit #3 = SETUO

4151

case ISD::SETUGE:

4152

case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE

4153

case ISD::SETULE:

4154

case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE

4155

case ISD::SETUNE:

4156

case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE

4157

case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO

4158

case ISD::SETUEQ:

4159

case ISD::SETOGE:

4160

case ISD::SETOLE:

4161

case ISD::SETONE:

4162

llvm_unreachable("Invalid branch code: should be expanded by legalize")::llvm::llvm_unreachable_internal("Invalid branch code: should be expanded by legalize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4162);

4163

// These are invalid for floating point. Assume integer.

4164

case ISD::SETULT: return 0;

4165

case ISD::SETUGT: return 1;

4166

}

4167

}

4168

4169

// getVCmpInst: return the vector compare instruction for the specified

4170

// vector type and condition code. Since this is for altivec specific code,

4171

// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,

4172

// and v4f32).

4173

static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,

4174

bool HasVSX, bool &Swap, bool &Negate) {

4175

Swap = false;

4176

Negate = false;

4177

4178

if (VecVT.isFloatingPoint()) {

4179

/* Handle some cases by swapping input operands. */

4180

switch (CC) {

4181

case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;

4182

case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;

4183

case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;

4184

case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;

4185

case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;

4186

case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;

4187

default: break;

4188

}

4189

/* Handle some cases by negating the result. */

4190

switch (CC) {

4191

case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;

4192

case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;

4193

case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;

4194

case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;

4195

default: break;

4196

}

4197

/* We have instructions implementing the remaining cases. */

4198

switch (CC) {

4199

case ISD::SETEQ:

4200

case ISD::SETOEQ:

4201

if (VecVT == MVT::v4f32)

4202

return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;

4203

else if (VecVT == MVT::v2f64)

4204

return PPC::XVCMPEQDP;

4205

break;

4206

case ISD::SETGT:

4207

case ISD::SETOGT:

4208

if (VecVT == MVT::v4f32)

4209

return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;

4210

else if (VecVT == MVT::v2f64)

4211

return PPC::XVCMPGTDP;

4212

break;

4213

case ISD::SETGE:

4214

case ISD::SETOGE:

4215

if (VecVT == MVT::v4f32)

4216

return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;

4217

else if (VecVT == MVT::v2f64)

4218

return PPC::XVCMPGEDP;

4219

break;

4220

default:

4221

break;

4222

}

4223

llvm_unreachable("Invalid floating-point vector compare condition")::llvm::llvm_unreachable_internal("Invalid floating-point vector compare condition"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4223);

4224

} else {

4225

/* Handle some cases by swapping input operands. */

4226

switch (CC) {

4227

case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;

4228

case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;

4229

case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;

4230

case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;

4231

default: break;

4232

}

4233

/* Handle some cases by negating the result. */

4234

switch (CC) {

4235

case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;

4236

case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;

4237

case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;

4238

case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;

4239

default: break;

4240

}

4241

/* We have instructions implementing the remaining cases. */

4242

switch (CC) {

4243

case ISD::SETEQ:

4244

case ISD::SETUEQ:

4245

if (VecVT == MVT::v16i8)

4246

return PPC::VCMPEQUB;

4247

else if (VecVT == MVT::v8i16)

4248

return PPC::VCMPEQUH;

4249

else if (VecVT == MVT::v4i32)

4250

return PPC::VCMPEQUW;

4251

else if (VecVT == MVT::v2i64)

4252

return PPC::VCMPEQUD;

4253

else if (VecVT == MVT::v1i128)

4254

return PPC::VCMPEQUQ;

4255

break;

4256

case ISD::SETGT:

4257

if (VecVT == MVT::v16i8)

4258

return PPC::VCMPGTSB;

4259

else if (VecVT == MVT::v8i16)

4260

return PPC::VCMPGTSH;

4261

else if (VecVT == MVT::v4i32)

4262

return PPC::VCMPGTSW;

4263

else if (VecVT == MVT::v2i64)

4264

return PPC::VCMPGTSD;

4265

else if (VecVT == MVT::v1i128)

4266

return PPC::VCMPGTSQ;

4267

break;

4268

case ISD::SETUGT:

4269

if (VecVT == MVT::v16i8)

4270

return PPC::VCMPGTUB;

4271

else if (VecVT == MVT::v8i16)

4272

return PPC::VCMPGTUH;

4273

else if (VecVT == MVT::v4i32)

4274

return PPC::VCMPGTUW;

4275

else if (VecVT == MVT::v2i64)

4276

return PPC::VCMPGTUD;

4277

else if (VecVT == MVT::v1i128)

4278

return PPC::VCMPGTUQ;

4279

break;

4280

default:

4281

break;

4282

}

4283

llvm_unreachable("Invalid integer vector compare condition")::llvm::llvm_unreachable_internal("Invalid integer vector compare condition"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4283);

4284

}

4285

}

4286

4287

bool PPCDAGToDAGISel::trySETCC(SDNode *N) {

4288

SDLoc dl(N);

4289

unsigned Imm;

4290

bool IsStrict = N->isStrictFPOpcode();

4291

ISD::CondCode CC =

4292

cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();

4293

EVT PtrVT =

4294

CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());

4295

bool isPPC64 = (PtrVT == MVT::i64);

4296

SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();

4297

4298

SDValue LHS = N->getOperand(IsStrict ? 1 : 0);

4299

SDValue RHS = N->getOperand(IsStrict ? 2 : 1);

4300

4301

if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {

4302

// We can codegen setcc op, imm very efficiently compared to a brcond.

4303

// Check for those cases here.

4304

// setcc op, 0

4305

if (Imm == 0) {

4306

SDValue Op = LHS;

4307

switch (CC) {

4308

default: break;

4309

case ISD::SETEQ: {

4310

Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);

4311

SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),

4312

getI32Imm(31, dl) };

4313

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4314

return true;

4315

}

4316

case ISD::SETNE: {

4317

if (isPPC64) break;

4318

SDValue AD =

4319

SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4320

Op, getI32Imm(~0U, dl)), 0);

4321

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));

4322

return true;

4323

}

4324

case ISD::SETLT: {

4325

SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),

4326

getI32Imm(31, dl) };

4327

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4328

return true;

4329

}

4330

case ISD::SETGT: {

4331

SDValue T =

4332

SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);

4333

T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);

4334

SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),

4335

getI32Imm(31, dl) };

4336

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4337

return true;

4338

}

4339

}

4340

} else if (Imm == ~0U) { // setcc op, -1

4341

SDValue Op = LHS;

4342

switch (CC) {

4343

default: break;

4344

case ISD::SETEQ:

4345

if (isPPC64) break;

4346

Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4347

Op, getI32Imm(1, dl)), 0);

4348

CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,

4349

SDValue(CurDAG->getMachineNode(PPC::LI, dl,

4350

MVT::i32,

4351

getI32Imm(0, dl)),

4352

0), Op.getValue(1));

4353

return true;

4354

case ISD::SETNE: {

4355

if (isPPC64) break;

4356

Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);

4357

SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

4358

Op, getI32Imm(~0U, dl));

4359

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,

4360

SDValue(AD, 1));

4361

return true;

4362

}

4363

case ISD::SETLT: {

4364

SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,

4365

getI32Imm(1, dl)), 0);

4366

SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,

4367

Op), 0);

4368

SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),

4369

getI32Imm(31, dl) };

4370

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4371

return true;

4372

}

4373

case ISD::SETGT: {

4374

SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),

4375

getI32Imm(31, dl) };

4376

Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

4377

CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));

4378

return true;

4379

}

4380

}

4381

}

4382

}

4383

4384

// Altivec Vector compare instructions do not set any CR register by default and

4385

// vector compare operations return the same type as the operands.

4386

if (!IsStrict && LHS.getValueType().isVector()) {

4387

if (Subtarget->hasSPE())

4388

return false;

4389

4390

EVT VecVT = LHS.getValueType();

4391

bool Swap, Negate;

4392

unsigned int VCmpInst =

4393

getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);

4394

if (Swap)

4395

std::swap(LHS, RHS);

4396

4397

EVT ResVT = VecVT.changeVectorElementTypeToInteger();

4398

if (Negate) {

4399

SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);

4400

CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,

4401

ResVT, VCmp, VCmp);

4402

return true;

4403

}

4404

4405

CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);

4406

return true;

4407

}

4408

4409

if (Subtarget->useCRBits())

4410

return false;

4411

4412

bool Inv;

4413

unsigned Idx = getCRIdxForSetCC(CC, Inv);

4414

SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);

4415

if (IsStrict)

4416

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));

4417

SDValue IntCR;

4418

4419

// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that

4420

// The correct compare instruction is already set by SelectCC()

4421

if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {

4422

Idx = 1;

4423

}

4424

4425

// Force the ccreg into CR7.

4426

SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);

4427

4428

SDValue InFlag(nullptr, 0); // Null incoming flag value.

4429

CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,

4430

InFlag).getValue(1);

4431

4432

IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,

4433

CCReg), 0);

4434

4435

SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),

4436

getI32Imm(31, dl), getI32Imm(31, dl) };

4437

if (!Inv) {

4438

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4439

return true;

4440

}

4441

4442

// Get the specified bit.

4443

SDValue Tmp =

4444

SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

4445

CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));

4446

return true;

4447

}

4448

4449

/// Does this node represent a load/store node whose address can be represented

4450

/// with a register plus an immediate that's a multiple of \p Val:

4451

bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {

4452

LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);

4453

StoreSDNode *STN = dyn_cast<StoreSDNode>(N);

4454

SDValue AddrOp;

4455

if (LDN)

4456

AddrOp = LDN->getOperand(1);

4457

else if (STN)

4458

AddrOp = STN->getOperand(2);

4459

4460

// If the address points a frame object or a frame object with an offset,

4461

// we need to check the object alignment.

4462

short Imm = 0;

4463

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(

4464

AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :

4465

AddrOp)) {

4466

// If op0 is a frame index that is under aligned, we can't do it either,

4467

// because it is translated to r31 or r1 + slot + offset. We won't know the

4468

// slot number until the stack frame is finalized.

4469

const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();

4470

unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();

4471

if ((SlotAlign % Val) != 0)

4472

return false;

4473

4474

// If we have an offset, we need further check on the offset.

4475

if (AddrOp.getOpcode() != ISD::ADD)

4476

return true;

4477

}

4478

4479

if (AddrOp.getOpcode() == ISD::ADD)

4480

return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);

4481

4482

// If the address comes from the outside, the offset will be zero.

4483

return AddrOp.getOpcode() == ISD::CopyFromReg;

4484

}

4485

4486

void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {

4487

// Transfer memoperands.

4488

MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();

4489

CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});

4490

}

4491

4492

static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,

4493

bool &NeedSwapOps, bool &IsUnCmp) {

4494

4495

assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.")(static_cast <bool> (N->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SELECT_CC && \"Expecting a SELECT_CC here.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4495, __extension__ __PRETTY_FUNCTION__));

4496

4497

SDValue LHS = N->getOperand(0);

4498

SDValue RHS = N->getOperand(1);

4499

SDValue TrueRes = N->getOperand(2);

4500

SDValue FalseRes = N->getOperand(3);

4501

ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);

4502

if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&

4503

N->getSimpleValueType(0) != MVT::i32))

4504

return false;

4505

4506

// We are looking for any of:

4507

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)

4508

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)

4509

// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)

4510

// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)

4511

int64_t TrueResVal = TrueConst->getSExtValue();

4512

if ((TrueResVal < -1 || TrueResVal > 1) ||

4513

(TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||

4514

(TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||

4515

(TrueResVal == 0 &&

4516

(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))

4517

return false;

4518

4519

SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC

4520

? FalseRes

4521

: FalseRes.getOperand(0);

4522

bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;

4523

if (SetOrSelCC.getOpcode() != ISD::SETCC &&

4524

SetOrSelCC.getOpcode() != ISD::SELECT_CC)

4525

return false;

4526

4527

// Without this setb optimization, the outer SELECT_CC will be manually

4528

// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass

4529

// transforms pseudo instruction to isel instruction. When there are more than

4530

// one use for result like zext/sext, with current optimization we only see

4531

// isel is replaced by setb but can't see any significant gain. Since

4532

// setb has longer latency than original isel, we should avoid this. Another

4533

// point is that setb requires comparison always kept, it can break the

4534

// opportunity to get the comparison away if we have in future.

4535

if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))

4536

return false;

4537

4538

SDValue InnerLHS = SetOrSelCC.getOperand(0);

4539

SDValue InnerRHS = SetOrSelCC.getOperand(1);

4540

ISD::CondCode InnerCC =

4541

cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();

4542

// If the inner comparison is a select_cc, make sure the true/false values are

4543

// 1/-1 and canonicalize it if needed.

4544

if (InnerIsSel) {

4545

ConstantSDNode *SelCCTrueConst =

4546

dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));

4547

ConstantSDNode *SelCCFalseConst =

4548

dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));

4549

if (!SelCCTrueConst || !SelCCFalseConst)

4550

return false;

4551

int64_t SelCCTVal = SelCCTrueConst->getSExtValue();

4552

int64_t SelCCFVal = SelCCFalseConst->getSExtValue();

4553

// The values must be -1/1 (requiring a swap) or 1/-1.

4554

if (SelCCTVal == -1 && SelCCFVal == 1) {

4555

std::swap(InnerLHS, InnerRHS);

4556

} else if (SelCCTVal != 1 || SelCCFVal != -1)

4557

return false;

4558

}

4559

4560

// Canonicalize unsigned case

4561

if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {

4562

IsUnCmp = true;

4563

InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;

4564

}

4565

4566

bool InnerSwapped = false;

4567

if (LHS == InnerRHS && RHS == InnerLHS)

4568

InnerSwapped = true;

4569

else if (LHS != InnerLHS || RHS != InnerRHS)

4570

return false;

4571

4572

switch (CC) {

4573

// (select_cc lhs, rhs, 0, \

4574

// (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)

4575

case ISD::SETEQ:

4576

if (!InnerIsSel)

4577

return false;

4578

if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)

4579

return false;

4580

NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;

4581

break;

4582

4583

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

4584

// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)

4585

// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)

4586

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

4587

// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)

4588

// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)

4589

case ISD::SETULT:

4590

if (!IsUnCmp && InnerCC != ISD::SETNE)

4591

return false;

4592

IsUnCmp = true;

4593

LLVM_FALLTHROUGH[[gnu::fallthrough]];

4594

case ISD::SETLT:

4595

if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||

4596

(InnerCC == ISD::SETLT && InnerSwapped))

4597

NeedSwapOps = (TrueResVal == 1);

4598

else

4599

return false;

4600

break;

4601

4602

// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

4603

// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)

4604

// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)

4605

// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

4606

// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)

4607

// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)

4608

case ISD::SETUGT:

4609

if (!IsUnCmp && InnerCC != ISD::SETNE)

4610

return false;

4611

IsUnCmp = true;

4612

LLVM_FALLTHROUGH[[gnu::fallthrough]];

4613

case ISD::SETGT:

4614

if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||

4615

(InnerCC == ISD::SETGT && InnerSwapped))

4616

NeedSwapOps = (TrueResVal == -1);

4617

else

4618

return false;

4619

break;

4620

4621

default:

4622

return false;

4623

}

4624

4625

LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Found a node that can be lowered to a SETB: "
; } } while (false);

4626

LLVM_DEBUG(N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(); } } while (false);

4627

4628

return true;

4629

}

4630

4631

// Return true if it's a software square-root/divide operand.

4632

static bool isSWTestOp(SDValue N) {

4633

if (N.getOpcode() == PPCISD::FTSQRT)

4634

return true;

4635

if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))

4636

return false;

4637

switch (N.getConstantOperandVal(0)) {

4638

case Intrinsic::ppc_vsx_xvtdivdp:

4639

case Intrinsic::ppc_vsx_xvtdivsp:

4640

case Intrinsic::ppc_vsx_xvtsqrtdp:

4641

case Intrinsic::ppc_vsx_xvtsqrtsp:

4642

return true;

4643

}

4644

return false;

4645

}

4646

4647

bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {

4648

assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.")(static_cast <bool> (N->getOpcode() == ISD::BR_CC &&
"ISD::BR_CC is expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"ISD::BR_CC is expected.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4648, __extension__ __PRETTY_FUNCTION__));

4649

// We are looking for following patterns, where `truncate to i1` actually has

4650

// the same semantic with `and 1`.

4651

// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)

4652

// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)

4653

// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)

4654

// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)

4655

// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)

4656

// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)

4657

// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)

4658

// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)

4659

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

4660

if (CC != ISD::SETEQ && CC != ISD::SETNE)

4661

return false;

4662

4663

SDValue CmpRHS = N->getOperand(3);

4664

if (!isa<ConstantSDNode>(CmpRHS) ||

4665

cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)

4666

return false;

4667

4668

SDValue CmpLHS = N->getOperand(2);

4669

if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))

4670

return false;

4671

4672

unsigned PCC = 0;

4673

bool IsCCNE = CC == ISD::SETNE;

4674

if (CmpLHS.getOpcode() == ISD::AND &&

4675

isa<ConstantSDNode>(CmpLHS.getOperand(1)))

4676

switch (CmpLHS.getConstantOperandVal(1)) {

4677

case 1:

4678

PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;

4679

break;

4680

case 2:

4681

PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;

4682

break;

4683

case 4:

4684

PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;

4685

break;

4686

case 8:

4687

PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;

4688

break;

4689

default:

4690

return false;

4691

}

4692

else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&

4693

CmpLHS.getValueType() == MVT::i1)

4694

PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;

4695

4696

if (PCC) {

4697

SDLoc dl(N);

4698

SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),

4699

N->getOperand(0)};

4700

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

4701

return true;

4702

}

4703

return false;

4704

}

4705

4706

bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {

4707

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4707, __extension__ __PRETTY_FUNCTION__));

4708

unsigned Imm;

4709

if (!isInt32Immediate(N->getOperand(1), Imm))

4710

return false;

4711

4712

SDLoc dl(N);

4713

SDValue Val = N->getOperand(0);

4714

unsigned SH, MB, ME;

4715

// If this is an and of a value rotated between 0 and 31 bits and then and'd

4716

// with a mask, emit rlwinm

4717

if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {

4718

Val = Val.getOperand(0);

4719

SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),

4720

getI32Imm(ME, dl)};

4721

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4722

return true;

4723

}

4724

4725

// If this is just a masked value where the input is not handled, and

4726

// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm

4727

if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {

4728

SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),

4729

getI32Imm(ME, dl)};

4730

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

4731

return true;

4732

}

4733

4734

// AND X, 0 -> 0, not "rlwinm 32".

4735

if (Imm == 0) {

4736

ReplaceUses(SDValue(N, 0), N->getOperand(1));

4737

return true;

4738

}

4739

4740

return false;

4741

}

4742

4743

bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {

4744

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4744, __extension__ __PRETTY_FUNCTION__));

4745

uint64_t Imm64;

4746

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))

4747

return false;

4748

4749

unsigned MB, ME;

4750

if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {

4751

// MB ME

4752

// +----------------------+

4753

// |xxxxxxxxxxx00011111000|

4754

// +----------------------+

4755

// 0 32 64

4756

// We can only do it if the MB is larger than 32 and MB <= ME

4757

// as RLWINM will replace the contents of [0 - 32) with [32 - 64) even

4758

// we didn't rotate it.

4759

SDLoc dl(N);

4760

SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),

4761

getI64Imm(ME - 32, dl)};

4762

CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);

4763

return true;

4764

}

4765

4766

return false;

4767

}

4768

4769

bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {

4770

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4770, __extension__ __PRETTY_FUNCTION__));

4771

uint64_t Imm64;

4772

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))

4773

return false;

4774

4775

// Do nothing if it is 16-bit imm as the pattern in the .td file handle

4776

// it well with "andi.".

4777

if (isUInt<16>(Imm64))

4778

return false;

4779

4780

SDLoc Loc(N);

4781

SDValue Val = N->getOperand(0);

4782

4783

// Optimized with two rldicl's as follows:

4784

// Add missing bits on left to the mask and check that the mask is a

4785

// wrapped run of ones, i.e.

4786

// Change pattern |0001111100000011111111|

4787

// to |1111111100000011111111|.

4788

unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);

4789

if (NumOfLeadingZeros != 0)

4790

Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);

4791

4792

unsigned MB, ME;

4793

if (!isRunOfOnes64(Imm64, MB, ME))

4794

return false;

4795

4796

// ME MB MB-ME+63

4797

// +----------------------+ +----------------------+

4798

// |1111111100000011111111| -> |0000001111111111111111|

4799

// +----------------------+ +----------------------+

4800

// 0 63 0 63

4801

// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.

4802

unsigned OnesOnLeft = ME + 1;

4803

unsigned ZerosInBetween = (MB - ME + 63) & 63;

4804

// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear

4805

// on the left the bits that are already zeros in the mask.

4806

Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,

4807

getI64Imm(OnesOnLeft, Loc),

4808

getI64Imm(ZerosInBetween, Loc)),

4809

0);

4810

// MB-ME+63 ME MB

4811

// +----------------------+ +----------------------+

4812

// |0000001111111111111111| -> |0001111100000011111111|

4813

// +----------------------+ +----------------------+

4814

// 0 63 0 63

4815

// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the

4816

// left the number of ones we previously added.

4817

SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),

4818

getI64Imm(NumOfLeadingZeros, Loc)};

4819

CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);

4820

return true;

4821

}

4822

4823

bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {

4824

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4824, __extension__ __PRETTY_FUNCTION__));

4825

unsigned Imm;

4826

if (!isInt32Immediate(N->getOperand(1), Imm))

4827

return false;

4828

4829

SDValue Val = N->getOperand(0);

4830

unsigned Imm2;

4831

// ISD::OR doesn't get all the bitfield insertion fun.

4832

// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a

4833

// bitfield insert.

4834

if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))

4835

return false;

4836

4837

// The idea here is to check whether this is equivalent to:

4838

// (c1 & m) | (x & ~m)

4839

// where m is a run-of-ones mask. The logic here is that, for each bit in

4840

// c1 and c2:

4841

// - if both are 1, then the output will be 1.

4842

// - if both are 0, then the output will be 0.

4843

// - if the bit in c1 is 0, and the bit in c2 is 1, then the output will

4844

// come from x.

4845

// - if the bit in c1 is 1, and the bit in c2 is 0, then the output will

4846

// be 0.

4847

// If that last condition is never the case, then we can form m from the

4848

// bits that are the same between c1 and c2.

4849

unsigned MB, ME;

4850

if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {

4851

SDLoc dl(N);

4852

SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),

4853

getI32Imm(MB, dl), getI32Imm(ME, dl)};

4854

ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));

4855

return true;

4856

}

4857

4858

return false;

4859

}

4860

4861

bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {

4862

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4862, __extension__ __PRETTY_FUNCTION__));

4863

uint64_t Imm64;

4864

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))

4865

return false;

4866

4867

// If this is a 64-bit zero-extension mask, emit rldicl.

4868

unsigned MB = 64 - countTrailingOnes(Imm64);

4869

unsigned SH = 0;

4870

unsigned Imm;

4871

SDValue Val = N->getOperand(0);

4872

SDLoc dl(N);

4873

4874

if (Val.getOpcode() == ISD::ANY_EXTEND) {

4875

auto Op0 = Val.getOperand(0);

4876

if (Op0.getOpcode() == ISD::SRL &&

4877

isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {

4878

4879

auto ResultType = Val.getNode()->getValueType(0);

4880

auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);

4881

SDValue IDVal(ImDef, 0);

4882

4883

Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,

4884

IDVal, Op0.getOperand(0),

4885

getI32Imm(1, dl)),

4886

0);

4887

SH = 64 - Imm;

4888

}

4889

}

4890

4891

// If the operand is a logical right shift, we can fold it into this

4892

// instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)

4893

// for n <= mb. The right shift is really a left rotate followed by a

4894

// mask, and this mask is a more-restrictive sub-mask of the mask implied

4895

// by the shift.

4896

if (Val.getOpcode() == ISD::SRL &&

4897

isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {

4898

assert(Imm < 64 && "Illegal shift amount")(static_cast <bool> (Imm < 64 && "Illegal shift amount"
) ? void (0) : __assert_fail ("Imm < 64 && \"Illegal shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4898, __extension__ __PRETTY_FUNCTION__));

4899

Val = Val.getOperand(0);

4900

SH = 64 - Imm;

4901

}

4902

4903

SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};

4904

CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);

4905

return true;

4906

}

4907

4908

bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {

4909

assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4909, __extension__ __PRETTY_FUNCTION__));

4910

uint64_t Imm64;

4911

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||

4912

!isMask_64(~Imm64))

4913

return false;

4914

4915

// If this is a negated 64-bit zero-extension mask,

4916

// i.e. the immediate is a sequence of ones from most significant side

4917

// and all zero for reminder, we should use rldicr.

4918

unsigned MB = 63 - countTrailingOnes(~Imm64);

4919

unsigned SH = 0;

4920

SDLoc dl(N);

4921

SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};

4922

CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);

4923

return true;

4924

}

4925

4926

bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {

4927

assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"ISD::OR SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"ISD::OR SDNode expected\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4927, __extension__ __PRETTY_FUNCTION__));

4928

uint64_t Imm64;

4929

unsigned MB, ME;

4930

SDValue N0 = N->getOperand(0);

4931

4932

// We won't get fewer instructions if the imm is 32-bit integer.

4933

// rldimi requires the imm to have consecutive ones with both sides zero.

4934

// Also, make sure the first Op has only one use, otherwise this may increase

4935

// register pressure since rldimi is destructive.

4936

if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||

4937

isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())

4938

return false;

4939

4940

unsigned SH = 63 - ME;

4941

SDLoc Dl(N);

4942

// Use select64Imm for making LI instr instead of directly putting Imm64

4943

SDValue Ops[] = {

4944

N->getOperand(0),

4945

SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),

4946

getI32Imm(SH, Dl), getI32Imm(MB, Dl)};

4947

CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);

4948

return true;

4949

}

4950

4951

// Select - Convert the specified operand from a target-independent to a

4952

// target-specific node if it hasn't already been changed.

4953

void PPCDAGToDAGISel::Select(SDNode *N) {

4954

SDLoc dl(N);

4955

if (N->isMachineOpcode()) {

4956

N->setNodeId(-1);

4957

return; // Already selected.

4958

}

4959

4960

// In case any misguided DAG-level optimizations form an ADD with a

4961

// TargetConstant operand, crash here instead of miscompiling (by selecting

4962

// an r+r add instead of some kind of r+i add).

4963

if (N->getOpcode() == ISD::ADD &&

4964

N->getOperand(1).getOpcode() == ISD::TargetConstant)

4965

llvm_unreachable("Invalid ADD with TargetConstant operand")::llvm::llvm_unreachable_internal("Invalid ADD with TargetConstant operand"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4965);

4966

4967

// Try matching complex bit permutations before doing anything else.

4968

if (tryBitPermutation(N))

4969

return;

4970

4971

// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).

4972

if (tryIntCompareInGPR(N))

4973

return;

4974

4975

switch (N->getOpcode()) {

4976

default: break;

4977

4978

case ISD::Constant:

4979

if (N->getValueType(0) == MVT::i64) {

4980

ReplaceNode(N, selectI64Imm(CurDAG, N));

4981

return;

4982

}

4983

break;

4984

4985

case ISD::INTRINSIC_WO_CHAIN: {

4986

// We emit the PPC::FSELS instruction here because of type conflicts with

4987

// the comparison operand. The FSELS instruction is defined to use an 8-byte

4988

// comparison like the FSELD version. The fsels intrinsic takes a 4-byte

4989

// value for the comparison. When selecting through a .td file, a type

4990

// error is raised. Must check this first so we never break on the

4991

// !Subtarget->isISA3_1() check.

4992

if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) {

4993

SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};

4994

CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);

4995

return;

4996

}

4997

4998

if (!Subtarget->isISA3_1())

4999

break;

5000

unsigned Opcode = 0;

5001

switch (N->getConstantOperandVal(0)) {

5002

default:

5003

break;

5004

case Intrinsic::ppc_altivec_vstribr_p:

5005

Opcode = PPC::VSTRIBR_rec;

5006

break;

5007

case Intrinsic::ppc_altivec_vstribl_p:

5008

Opcode = PPC::VSTRIBL_rec;

5009

break;

5010

case Intrinsic::ppc_altivec_vstrihr_p:

5011

Opcode = PPC::VSTRIHR_rec;

5012

break;

5013

case Intrinsic::ppc_altivec_vstrihl_p:

5014

Opcode = PPC::VSTRIHL_rec;

5015

break;

5016

}

5017

if (!Opcode)

5018

break;

5019

5020

// Generate the appropriate vector string isolate intrinsic to match.

5021

EVT VTs[] = {MVT::v16i8, MVT::Glue};

5022

SDValue VecStrOp =

5023

SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);

5024

// Vector string isolate instructions update the EQ bit of CR6.

5025

// Generate a SETBC instruction to extract the bit and place it in a GPR.

5026

SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);

5027

SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);

5028

SDValue CRBit = SDValue(

5029

CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,

5030

CR6Reg, SubRegIdx, VecStrOp.getValue(1)),

5031

0);

5032

CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);

5033

return;

5034

}

5035

5036

case ISD::SETCC:

5037

case ISD::STRICT_FSETCC:

5038

case ISD::STRICT_FSETCCS:

5039

if (trySETCC(N))

5040

return;

5041

break;

5042

// These nodes will be transformed into GETtlsADDR32 node, which

5043

// later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT

5044

case PPCISD::ADDI_TLSLD_L_ADDR:

5045

case PPCISD::ADDI_TLSGD_L_ADDR: {

5046

const Module *Mod = MF->getFunction().getParent();

5047

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||

5048

!Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||

5049

Mod->getPICLevel() == PICLevel::SmallPIC)

5050

break;

5051

// Attach global base pointer on GETtlsADDR32 node in order to

5052

// generate secure plt code for TLS symbols.

5053

getGlobalBaseReg();

5054

} break;

5055

case PPCISD::CALL: {

5056

if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||

5057

!TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||

5058

!Subtarget->isTargetELF())

5059

break;

5060

5061

SDValue Op = N->getOperand(1);

5062

5063

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {

5064

if (GA->getTargetFlags() == PPCII::MO_PLT)

5065

getGlobalBaseReg();

5066

}

5067

else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {

5068

if (ES->getTargetFlags() == PPCII::MO_PLT)

5069

getGlobalBaseReg();

5070

}

5071

}

5072

break;

5073

5074

case PPCISD::GlobalBaseReg:

5075

ReplaceNode(N, getGlobalBaseReg());

5076

return;

5077

5078

case ISD::FrameIndex:

5079

selectFrameIndex(N, N);

5080

return;

5081

5082

case PPCISD::MFOCRF: {

5083

SDValue InFlag = N->getOperand(1);

5084

ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,

5085

N->getOperand(0), InFlag));

5086

return;

5087

}

5088

5089

case PPCISD::READ_TIME_BASE:

5090

ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,

5091

MVT::Other, N->getOperand(0)));

5092

return;

5093

5094

case PPCISD::SRA_ADDZE: {

5095

SDValue N0 = N->getOperand(0);

5096

SDValue ShiftAmt =

5097

CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->

5098

getConstantIntValue(), dl,

5099

N->getValueType(0));

5100

if (N->getValueType(0) == MVT::i64) {

5101

SDNode *Op =

5102

CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,

5103

N0, ShiftAmt);

5104

CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),

5105

SDValue(Op, 1));

5106

return;

5107

} else {

5108

assert(N->getValueType(0) == MVT::i32 &&(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5109, __extension__ __PRETTY_FUNCTION__))

5109

"Expecting i64 or i32 in PPCISD::SRA_ADDZE")(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5109, __extension__ __PRETTY_FUNCTION__));

5110

SDNode *Op =

5111

CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,

5112

N0, ShiftAmt);

5113

CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),

5114

SDValue(Op, 1));

5115

return;

5116

}

5117

}

5118

5119

case ISD::STORE: {

5120

// Change TLS initial-exec D-form stores to X-form stores.

5121

StoreSDNode *ST = cast<StoreSDNode>(N);

5122

if (EnableTLSOpt && Subtarget->isELFv2ABI() &&

5123

ST->getAddressingMode() != ISD::PRE_INC)

5124

if (tryTLSXFormStore(ST))

5125

return;

5126

break;

5127

}

5128

case ISD::LOAD: {

5129

// Handle preincrement loads.

5130

LoadSDNode *LD = cast<LoadSDNode>(N);

5131

EVT LoadedVT = LD->getMemoryVT();

5132

5133

// Normal loads are handled by code generated from the .td file.

5134

if (LD->getAddressingMode() != ISD::PRE_INC) {

5135

// Change TLS initial-exec D-form loads to X-form loads.

5136

if (EnableTLSOpt && Subtarget->isELFv2ABI())

5137

if (tryTLSXFormLoad(LD))

5138

return;

5139

break;

5140

}

5141

5142

SDValue Offset = LD->getOffset();

5143

if (Offset.getOpcode() == ISD::TargetConstant ||

5144

Offset.getOpcode() == ISD::TargetGlobalAddress) {

5145

5146

unsigned Opcode;

5147

bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;

5148

if (LD->getValueType(0) != MVT::i64) {

5149

// Handle PPC32 integer and normal FP loads.

5150

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5150, __extension__ __PRETTY_FUNCTION__));

5151

switch (LoadedVT.getSimpleVT().SimpleTy) {

5152

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5152);

5153

case MVT::f64: Opcode = PPC::LFDU; break;

5154

case MVT::f32: Opcode = PPC::LFSU; break;

5155

case MVT::i32: Opcode = PPC::LWZU; break;

5156

case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;

5157

case MVT::i1:

5158

case MVT::i8: Opcode = PPC::LBZU; break;

5159

}

5160

} else {

5161

assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5161, __extension__ __PRETTY_FUNCTION__));

5162

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5162, __extension__ __PRETTY_FUNCTION__));

5163

switch (LoadedVT.getSimpleVT().SimpleTy) {

5164

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5164);

5165

case MVT::i64: Opcode = PPC::LDU; break;

5166

case MVT::i32: Opcode = PPC::LWZU8; break;

5167

case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;

5168

case MVT::i1:

5169

case MVT::i8: Opcode = PPC::LBZU8; break;

5170

}

5171

}

5172

5173

SDValue Chain = LD->getChain();

5174

SDValue Base = LD->getBasePtr();

5175

SDValue Ops[] = { Offset, Base, Chain };

5176

SDNode *MN = CurDAG->getMachineNode(

5177

Opcode, dl, LD->getValueType(0),

5178

PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);

5179

transferMemOperands(N, MN);

5180

ReplaceNode(N, MN);

5181

return;

5182

} else {

5183

unsigned Opcode;

5184

bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;

5185

if (LD->getValueType(0) != MVT::i64) {

5186

// Handle PPC32 integer and normal FP loads.

5187

assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5187, __extension__ __PRETTY_FUNCTION__));

5188

switch (LoadedVT.getSimpleVT().SimpleTy) {

5189

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5189);

5190

case MVT::f64: Opcode = PPC::LFDUX; break;

5191

case MVT::f32: Opcode = PPC::LFSUX; break;

5192

case MVT::i32: Opcode = PPC::LWZUX; break;

5193

case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;

5194

case MVT::i1:

5195

case MVT::i8: Opcode = PPC::LBZUX; break;

5196

}

5197

} else {

5198

assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5198, __extension__ __PRETTY_FUNCTION__));

5199

assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5200, __extension__ __PRETTY_FUNCTION__))

5200

"Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5200, __extension__ __PRETTY_FUNCTION__));

5201

switch (LoadedVT.getSimpleVT().SimpleTy) {

5202

default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5202);

5203

case MVT::i64: Opcode = PPC::LDUX; break;

5204

case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;

5205

case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;

5206

case MVT::i1:

5207

case MVT::i8: Opcode = PPC::LBZUX8; break;

5208

}

5209

}

5210

5211

SDValue Chain = LD->getChain();

5212

SDValue Base = LD->getBasePtr();

5213

SDValue Ops[] = { Base, Offset, Chain };

5214

SDNode *MN = CurDAG->getMachineNode(

5215

Opcode, dl, LD->getValueType(0),

5216

PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);

5217

transferMemOperands(N, MN);

5218

ReplaceNode(N, MN);

5219

return;

5220

}

5221

}

5222

5223

case ISD::AND:

5224

// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr

5225

if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||

5226

tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))

5227

return;

5228

5229

// Other cases are autogenerated.

5230

break;

5231

case ISD::OR: {

5232

if (N->getValueType(0) == MVT::i32)

5233

if (tryBitfieldInsert(N))

5234

return;

5235

5236

int16_t Imm;

5237

if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

5238

isIntS16Immediate(N->getOperand(1), Imm)) {

5239

KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));

5240

5241

// If this is equivalent to an add, then we can fold it with the

5242

// FrameIndex calculation.

5243

if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {

5244

selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);

5245

return;

5246

}

5247

}

5248

5249

// If this is 'or' against an imm with consecutive ones and both sides zero,

5250

// try to emit rldimi

5251

if (tryAsSingleRLDIMI(N))

5252

return;

5253

5254

// OR with a 32-bit immediate can be handled by ori + oris

5255

// without creating an immediate in a GPR.

5256

uint64_t Imm64 = 0;

5257

bool IsPPC64 = Subtarget->isPPC64();

5258

if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&

5259

(Imm64 & ~0xFFFFFFFFuLL) == 0) {

5260

// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.

5261

uint64_t ImmHi = Imm64 >> 16;

5262

uint64_t ImmLo = Imm64 & 0xFFFF;

5263

if (ImmHi != 0 && ImmLo != 0) {

5264

SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,

5265

N->getOperand(0),

5266

getI16Imm(ImmLo, dl));

5267

SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};

5268

CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);

5269

return;

5270

}

5271

}

5272

5273

// Other cases are autogenerated.

5274

break;

5275

}

5276

case ISD::XOR: {

5277

// XOR with a 32-bit immediate can be handled by xori + xoris

5278

// without creating an immediate in a GPR.

5279

uint64_t Imm64 = 0;

5280

bool IsPPC64 = Subtarget->isPPC64();

5281

if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&

5282

(Imm64 & ~0xFFFFFFFFuLL) == 0) {

5283

// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.

5284

uint64_t ImmHi = Imm64 >> 16;

5285

uint64_t ImmLo = Imm64 & 0xFFFF;

5286

if (ImmHi != 0 && ImmLo != 0) {

5287

SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,

5288

N->getOperand(0),

5289

getI16Imm(ImmLo, dl));

5290

SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};

5291

CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);

5292

return;

5293

}

5294

}

5295

5296

break;

5297

}

5298

case ISD::ADD: {

5299

int16_t Imm;

5300

if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

5301

isIntS16Immediate(N->getOperand(1), Imm)) {

5302

selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);

5303

return;

5304

}

5305

5306

break;

5307

}

5308

case ISD::SHL: {

5309

unsigned Imm, SH, MB, ME;

5310

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&

5311

isRotateAndMask(N, Imm, true, SH, MB, ME)) {

5312

SDValue Ops[] = { N->getOperand(0).getOperand(0),

5313

getI32Imm(SH, dl), getI32Imm(MB, dl),

5314

getI32Imm(ME, dl) };

5315

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5316

return;

5317

}

5318

5319

// Other cases are autogenerated.

5320

break;

5321

}

5322

case ISD::SRL: {

5323

unsigned Imm, SH, MB, ME;

5324

if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&

5325

isRotateAndMask(N, Imm, true, SH, MB, ME)) {

5326

SDValue Ops[] = { N->getOperand(0).getOperand(0),

5327

getI32Imm(SH, dl), getI32Imm(MB, dl),

5328

getI32Imm(ME, dl) };

5329

CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);

5330

return;

5331

}

5332

5333

// Other cases are autogenerated.

5334

break;

5335

}

5336

case ISD::MUL: {

5337

SDValue Op1 = N->getOperand(1);

5338

if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)

5339

break;

5340

5341

// If the multiplier fits int16, we can handle it with mulli.

5342

int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();

5343

unsigned Shift = countTrailingZeros<uint64_t>(Imm);

5344

if (isInt<16>(Imm) || !Shift)

5345

break;

5346

5347

// If the shifted value fits int16, we can do this transformation:

5348

// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to

5349

// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).

5350

uint64_t ImmSh = Imm >> Shift;

5351

if (isInt<16>(ImmSh)) {

5352

uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);

5353

SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);

5354

SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,

5355

N->getOperand(0), SDImm);

5356

CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),

5357

getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));

5358

return;

5359

}

5360

break;

5361

}

5362

// FIXME: Remove this once the ANDI glue bug is fixed:

5363

case PPCISD::ANDI_rec_1_EQ_BIT:

5364

case PPCISD::ANDI_rec_1_GT_BIT: {

5365

if (!ANDIGlueBug)

5366

break;

5367

5368

EVT InVT = N->getOperand(0).getValueType();

5369

assert((InVT == MVT::i64 || InVT == MVT::i32) &&(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5370, __extension__ __PRETTY_FUNCTION__))

5370

"Invalid input type for ANDI_rec_1_EQ_BIT")(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5370, __extension__ __PRETTY_FUNCTION__));

5371

5372

unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;

5373

SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,

5374

N->getOperand(0),

5375

CurDAG->getTargetConstant(1, dl, InVT)),

5376

0);

5377

SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);

5378

SDValue SRIdxVal = CurDAG->getTargetConstant(

5379

N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,

5380

dl, MVT::i32);

5381

5382

CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,

5383

SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);

5384

return;

5385

}

5386

case ISD::SELECT_CC: {

5387

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();

5388

EVT PtrVT =

5389

CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());

5390

bool isPPC64 = (PtrVT == MVT::i64);

5391

5392

// If this is a select of i1 operands, we'll pattern match it.

5393

if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)

5394

break;

5395

5396

if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {

5397

bool NeedSwapOps = false;

5398

bool IsUnCmp = false;

5399

if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {

5400

SDValue LHS = N->getOperand(0);

5401

SDValue RHS = N->getOperand(1);

5402

if (NeedSwapOps)

5403

std::swap(LHS, RHS);

5404

5405

// Make use of SelectCC to generate the comparison to set CR bits, for

5406

// equality comparisons having one literal operand, SelectCC probably

5407

// doesn't need to materialize the whole literal and just use xoris to

5408

// check it first, it leads the following comparison result can't

5409

// exactly represent GT/LT relationship. So to avoid this we specify

5410

// SETGT/SETUGT here instead of SETEQ.

5411

SDValue GenCC =

5412

SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);

5413

CurDAG->SelectNodeTo(

5414

N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,

5415

N->getValueType(0), GenCC);

5416

NumP9Setb++;

5417

return;

5418

}

5419

}

5420

5421

// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc

5422

if (!isPPC64)

5423

if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))

5424

if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))

5425

if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))

5426

if (N1C->isNullValue() && N3C->isNullValue() &&

5427

N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&

5428

// FIXME: Implement this optzn for PPC64.

5429

N->getValueType(0) == MVT::i32) {

5430

SDNode *Tmp =

5431

CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,

5432

N->getOperand(0), getI32Imm(~0U, dl));

5433

CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),

5434

N->getOperand(0), SDValue(Tmp, 1));

5435

return;

5436

}

5437

5438

SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);

5439

5440

if (N->getValueType(0) == MVT::i1) {

5441

// An i1 select is: (c & t) | (!c & f).

5442

bool Inv;

5443

unsigned Idx = getCRIdxForSetCC(CC, Inv);

5444

5445

unsigned SRI;

5446

switch (Idx) {

5447

default: llvm_unreachable("Invalid CC index")::llvm::llvm_unreachable_internal("Invalid CC index", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5447);

5448

case 0: SRI = PPC::sub_lt; break;

5449

case 1: SRI = PPC::sub_gt; break;

5450

case 2: SRI = PPC::sub_eq; break;

5451

case 3: SRI = PPC::sub_un; break;

5452

}

5453

5454

SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);

5455

5456

SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,

5457

CCBit, CCBit), 0);

5458

SDValue C = Inv ? NotCCBit : CCBit,

5459

NotC = Inv ? CCBit : NotCCBit;

5460

5461

SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,

5462

C, N->getOperand(2)), 0);

5463

SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,

5464

NotC, N->getOperand(3)), 0);

5465

5466

CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);

5467

return;

5468

}

5469

5470

unsigned BROpc =

5471

getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);

5472

5473

unsigned SelectCCOp;

5474

if (N->getValueType(0) == MVT::i32)

5475

SelectCCOp = PPC::SELECT_CC_I4;

5476

else if (N->getValueType(0) == MVT::i64)

5477

SelectCCOp = PPC::SELECT_CC_I8;

5478

else if (N->getValueType(0) == MVT::f32) {

5479

if (Subtarget->hasP8Vector())

5480

SelectCCOp = PPC::SELECT_CC_VSSRC;

5481

else if (Subtarget->hasSPE())

5482

SelectCCOp = PPC::SELECT_CC_SPE4;

5483

else

5484

SelectCCOp = PPC::SELECT_CC_F4;

5485

} else if (N->getValueType(0) == MVT::f64) {

5486

if (Subtarget->hasVSX())

5487

SelectCCOp = PPC::SELECT_CC_VSFRC;

5488

else if (Subtarget->hasSPE())

5489

SelectCCOp = PPC::SELECT_CC_SPE;

5490

else

5491

SelectCCOp = PPC::SELECT_CC_F8;

5492

} else if (N->getValueType(0) == MVT::f128)

5493

SelectCCOp = PPC::SELECT_CC_F16;

5494

else if (Subtarget->hasSPE())

5495

SelectCCOp = PPC::SELECT_CC_SPE;

5496

else if (N->getValueType(0) == MVT::v2f64 ||

5497

N->getValueType(0) == MVT::v2i64)

5498

SelectCCOp = PPC::SELECT_CC_VSRC;

5499

else

5500

SelectCCOp = PPC::SELECT_CC_VRRC;

5501

5502

SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),

5503

getI32Imm(BROpc, dl) };

5504

CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);

5505

return;

5506

}

5507

case ISD::VECTOR_SHUFFLE:

5508

if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||

5509

N->getValueType(0) == MVT::v2i64)) {

5510

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

5511

5512

SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),

5513

Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);

5514

unsigned DM[2];

5515

5516

for (int i = 0; i < 2; ++i)

5517

if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)

5518

DM[i] = 0;

5519

else

5520

DM[i] = 1;

5521

5522

if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&

5523

Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&

5524

isa<LoadSDNode>(Op1.getOperand(0))) {

5525

LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));

5526

SDValue Base, Offset;

5527

5528

if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&

5529

(LD->getMemoryVT() == MVT::f64 ||

5530

LD->getMemoryVT() == MVT::i64) &&

5531

SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {

5532

SDValue Chain = LD->getChain();

5533

SDValue Ops[] = { Base, Offset, Chain };

5534

MachineMemOperand *MemOp = LD->getMemOperand();

5535

SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,

5536

N->getValueType(0), Ops);

5537

CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});

5538

return;

5539

}

5540

}

5541

5542

// For little endian, we must swap the input operands and adjust

5543

// the mask elements (reverse and invert them).

5544

if (Subtarget->isLittleEndian()) {

5545

std::swap(Op1, Op2);

5546

unsigned tmp = DM[0];

5547

DM[0] = 1 - DM[1];

5548

DM[1] = 1 - tmp;

5549

}

5550

5551

SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,

5552

MVT::i32);

5553

SDValue Ops[] = { Op1, Op2, DMV };

5554

CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);

5555

return;

5556

}

5557

5558

break;

5559

case PPCISD::BDNZ:

5560

case PPCISD::BDZ: {

5561

bool IsPPC64 = Subtarget->isPPC64();

5562

SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };

5563

CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ

5564

? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)

5565

: (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),

5566

MVT::Other, Ops);

5567

return;

5568

}

5569

case PPCISD::COND_BRANCH: {

5570

// Op #0 is the Chain.

5571

// Op #1 is the PPC::PRED_* number.

5572

// Op #2 is the CR#

5573

// Op #3 is the Dest MBB

5574

// Op #4 is the Flag.

5575

// Prevent PPC::PRED_* from being selected into LI.

5576

unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();

5577

if (EnableBranchHint)

5578

PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));

5579

5580

SDValue Pred = getI32Imm(PCC, dl);

5581

SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),

5582

N->getOperand(0), N->getOperand(4) };

5583

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

5584

return;

5585

}

5586

case ISD::BR_CC: {

5587

if (tryFoldSWTestBRCC(N))

5588

return;

5589

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

5590

unsigned PCC =

5591

getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);

5592

5593

if (N->getOperand(2).getValueType() == MVT::i1) {

5594

unsigned Opc;

5595

bool Swap;

5596

switch (PCC) {

5597

default: llvm_unreachable("Unexpected Boolean-operand predicate")::llvm::llvm_unreachable_internal("Unexpected Boolean-operand predicate"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5597);

5598

case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;

5599

case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;

5600

case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;

5601

case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;

5602

case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;

5603

case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;

5604

}

5605

5606

// A signed comparison of i1 values produces the opposite result to an

5607

// unsigned one if the condition code includes less-than or greater-than.

5608

// This is because 1 is the most negative signed i1 number and the most

5609

// positive unsigned i1 number. The CR-logical operations used for such

5610

// comparisons are non-commutative so for signed comparisons vs. unsigned

5611

// ones, the input operands just need to be swapped.

5612

if (ISD::isSignedIntSetCC(CC))

5613

Swap = !Swap;

5614

5615

SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,

5616

N->getOperand(Swap ? 3 : 2),

5617

N->getOperand(Swap ? 2 : 3)), 0);

5618

CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),

5619

N->getOperand(0));

5620

return;

5621

}

5622

5623

if (EnableBranchHint)

5624

PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));

5625

5626

SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);

5627

SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,

5628

N->getOperand(4), N->getOperand(0) };

5629

CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);

5630

return;

5631

}

5632

case ISD::BRIND: {

5633

// FIXME: Should custom lower this.

5634

SDValue Chain = N->getOperand(0);

5635

SDValue Target = N->getOperand(1);

5636

unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;

5637

unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;

5638

Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,

5639

Chain), 0);

5640

CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);

5641

return;

5642

}

5643

case PPCISD::TOC_ENTRY: {

5644

const bool isPPC64 = Subtarget->isPPC64();

5645

const bool isELFABI = Subtarget->isSVR4ABI();

5646

const bool isAIXABI = Subtarget->isAIXABI();

5647

5648

// PowerPC only support small, medium and large code model.

5649

const CodeModel::Model CModel = TM.getCodeModel();

5650

assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel
== CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models."
) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5651, __extension__ __PRETTY_FUNCTION__))

5651

"PowerPC doesn't support tiny or kernel code models.")(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel
== CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models."
) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5651, __extension__ __PRETTY_FUNCTION__));

5652

5653

if (isAIXABI && CModel == CodeModel::Medium)

5654

report_fatal_error("Medium code model is not supported on AIX.");

5655

5656

// For 64-bit small code model, we allow SelectCodeCommon to handle this,

5657

// selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.

5658

if (isPPC64 && CModel == CodeModel::Small)

5659

break;

5660

5661

// Handle 32-bit small code model.

5662

if (!isPPC64) {

5663

// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either

5664

// PPC::ADDItoc, or PPC::LWZtoc

5665

auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) {

5666

SDValue GA = TocEntry->getOperand(0);

5667

SDValue TocBase = TocEntry->getOperand(1);

5668

SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase);

5669

transferMemOperands(TocEntry, MN);

5670

ReplaceNode(TocEntry, MN);

5671

};

5672

5673

if (isELFABI) {

5674

assert(TM.isPositionIndependent() &&(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))

5675

"32-bit ELF can only have TOC entries in position independent"(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))

5676

" code.")(static_cast <bool> (TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__));

5677

// 32-bit ELF always uses a small code model toc access.

5678

replaceWith(PPC::LWZtoc, N);

5679

return;

5680

}

5681

5682

if (isAIXABI && CModel == CodeModel::Small) {

5683

if (hasTocDataAttr(N->getOperand(0),

5684

CurDAG->getDataLayout().getPointerSize()))

5685

replaceWith(PPC::ADDItoc, N);

5686

else

5687

replaceWith(PPC::LWZtoc, N);

5688

5689

return;

5690

}

5691

}

5692

5693

assert(CModel != CodeModel::Small && "All small code models handled.")(static_cast <bool> (CModel != CodeModel::Small &&
"All small code models handled.") ? void (0) : __assert_fail
("CModel != CodeModel::Small && \"All small code models handled.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5693, __extension__ __PRETTY_FUNCTION__));

5694

5695

assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"(static_cast <bool> ((isPPC64 || (isAIXABI && !
isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."
) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5696, __extension__ __PRETTY_FUNCTION__))

5696

" ELF/AIX or 32-bit AIX in the following.")(static_cast <bool> ((isPPC64 || (isAIXABI && !
isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."
) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5696, __extension__ __PRETTY_FUNCTION__));

5697

5698

// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode

5699

// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We

5700

// generate two instructions as described below. The first source operand

5701

// is a symbol reference. If it must be toc-referenced according to

5702

// Subtarget, we generate:

5703

// [32-bit AIX]

5704

// LWZtocL(@sym, ADDIStocHA(%r2, @sym))

5705

// [64-bit ELF/AIX]

5706

// LDtocL(@sym, ADDIStocHA8(%x2, @sym))

5707

// Otherwise we generate:

5708

// ADDItocL(ADDIStocHA8(%x2, @sym), @sym)

5709

SDValue GA = N->getOperand(0);

5710

SDValue TOCbase = N->getOperand(1);

5711

5712

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

5713

SDNode *Tmp = CurDAG->getMachineNode(

5714

isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);

5715

5716

if (PPCLowering->isAccessedAsGotIndirect(GA)) {

5717

// If it is accessed as got-indirect, we need an extra LWZ/LD to load

5718

// the address.

5719

SDNode *MN = CurDAG->getMachineNode(

5720

isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));

5721

5722

transferMemOperands(N, MN);

5723

ReplaceNode(N, MN);

5724

return;

5725

}

5726

5727

// Build the address relative to the TOC-pointer.

5728

ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,

5729

SDValue(Tmp, 0), GA));

5730

return;

5731

}

5732

case PPCISD::PPC32_PICGOT:

5733

// Generate a PIC-safe GOT reference.

5734

assert(Subtarget->is32BitELFABI() &&(static_cast <bool> (Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void
(0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5735, __extension__ __PRETTY_FUNCTION__))

5735

"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4")(static_cast <bool> (Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void
(0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5735, __extension__ __PRETTY_FUNCTION__));

5736

CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,

5737

PPCLowering->getPointerTy(CurDAG->getDataLayout()),

5738

MVT::i32);

5739

return;

5740

5741

case PPCISD::VADD_SPLAT: {

5742

// This expands into one of three sequences, depending on whether

5743

// the first operand is odd or even, positive or negative.

5744

assert(isa<ConstantSDNode>(N->getOperand(0)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5746, __extension__ __PRETTY_FUNCTION__))

5745

isa<ConstantSDNode>(N->getOperand(1)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5746, __extension__ __PRETTY_FUNCTION__))

5746

"Invalid operand on VADD_SPLAT!")(static_cast <bool> (isa<ConstantSDNode>(N->getOperand
(0)) && isa<ConstantSDNode>(N->getOperand(1)
) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5746, __extension__ __PRETTY_FUNCTION__));

5747

5748

int Elt = N->getConstantOperandVal(0);

5749

int EltSize = N->getConstantOperandVal(1);

5750

unsigned Opc1, Opc2, Opc3;

5751

EVT VT;

5752

5753

if (EltSize == 1) {

5754

Opc1 = PPC::VSPLTISB;

5755

Opc2 = PPC::VADDUBM;

5756

Opc3 = PPC::VSUBUBM;

5757

VT = MVT::v16i8;

5758

} else if (EltSize == 2) {

5759

Opc1 = PPC::VSPLTISH;

5760

Opc2 = PPC::VADDUHM;

5761

Opc3 = PPC::VSUBUHM;

5762

VT = MVT::v8i16;

5763

} else {

5764

assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!")(static_cast <bool> (EltSize == 4 && "Invalid element size on VADD_SPLAT!"
) ? void (0) : __assert_fail ("EltSize == 4 && \"Invalid element size on VADD_SPLAT!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5764, __extension__ __PRETTY_FUNCTION__));

5765

Opc1 = PPC::VSPLTISW;

5766

Opc2 = PPC::VADDUWM;

5767

Opc3 = PPC::VSUBUWM;

5768

VT = MVT::v4i32;

5769

}

5770

5771

if ((Elt & 1) == 0) {

5772

// Elt is even, in the range [-32,-18] + [16,30].

5773

//

5774

// Convert: VADD_SPLAT elt, size

5775

// Into: tmp = VSPLTIS[BHW] elt

5776

// VADDU[BHW]M tmp, tmp

5777

// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4

5778

SDValue EltVal = getI32Imm(Elt >> 1, dl);

5779

SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

5780

SDValue TmpVal = SDValue(Tmp, 0);

5781

ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));

5782

return;

5783

} else if (Elt > 0) {

5784

// Elt is odd and positive, in the range [17,31].

5785

//

5786

// Convert: VADD_SPLAT elt, size

5787

// Into: tmp1 = VSPLTIS[BHW] elt-16

5788

// tmp2 = VSPLTIS[BHW] -16

5789

// VSUBU[BHW]M tmp1, tmp2

5790

SDValue EltVal = getI32Imm(Elt - 16, dl);

5791

SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

5792

EltVal = getI32Imm(-16, dl);

5793

SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

5794

ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),

5795

SDValue(Tmp2, 0)));

5796

return;

5797

} else {

5798

// Elt is odd and negative, in the range [-31,-17].

5799

//

5800

// Convert: VADD_SPLAT elt, size

5801

// Into: tmp1 = VSPLTIS[BHW] elt+16

5802

// tmp2 = VSPLTIS[BHW] -16

5803

// VADDU[BHW]M tmp1, tmp2

5804

SDValue EltVal = getI32Imm(Elt + 16, dl);

5805

SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

5806

EltVal = getI32Imm(-16, dl);

5807

SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);

5808

ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),

5809

SDValue(Tmp2, 0)));

5810

return;

5811

}

5812

}

5813

}

5814

5815

SelectCode(N);

5816

}

5817

5818

// If the target supports the cmpb instruction, do the idiom recognition here.

5819

// We don't do this as a DAG combine because we don't want to do it as nodes

5820

// are being combined (because we might miss part of the eventual idiom). We

5821

// don't want to do it during instruction selection because we want to reuse

5822

// the logic for lowering the masking operations already part of the

5823

// instruction selector.

5824

SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {

5825

SDLoc dl(N);

5826

5827

assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Only OR nodes are supported for CMPB") ? void (0) : __assert_fail
("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5828, __extension__ __PRETTY_FUNCTION__))

5828

"Only OR nodes are supported for CMPB")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Only OR nodes are supported for CMPB") ? void (0) : __assert_fail
("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5828, __extension__ __PRETTY_FUNCTION__));

5829

5830

SDValue Res;

5831

if (!Subtarget->hasCMPB())

5832

return Res;

5833

5834

if (N->getValueType(0) != MVT::i32 &&

5835

N->getValueType(0) != MVT::i64)

5836

return Res;

5837

5838

EVT VT = N->getValueType(0);

5839

5840

SDValue RHS, LHS;

5841

bool BytesFound[8] = {false, false, false, false, false, false, false, false};

5842

uint64_t Mask = 0, Alt = 0;

5843

5844

auto IsByteSelectCC = [this](SDValue O, unsigned &b,

5845

uint64_t &Mask, uint64_t &Alt,

5846

SDValue &LHS, SDValue &RHS) {

5847

if (O.getOpcode() != ISD::SELECT_CC)

5848

return false;

5849

ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();

5850

5851

if (!isa<ConstantSDNode>(O.getOperand(2)) ||

5852

!isa<ConstantSDNode>(O.getOperand(3)))

5853

return false;

5854

5855

uint64_t PM = O.getConstantOperandVal(2);

5856

uint64_t PAlt = O.getConstantOperandVal(3);

5857

for (b = 0; b < 8; ++b) {

5858

uint64_t Mask = UINT64_C(0xFF)0xFFUL << (8*b);

5859

if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)

5860

break;

5861

}

5862

5863

if (b == 8)

5864

return false;

5865

Mask |= PM;

5866

Alt |= PAlt;

5867

5868

if (!isa<ConstantSDNode>(O.getOperand(1)) ||

5869

O.getConstantOperandVal(1) != 0) {

5870

SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);

5871

if (Op0.getOpcode() == ISD::TRUNCATE)

5872

Op0 = Op0.getOperand(0);

5873

if (Op1.getOpcode() == ISD::TRUNCATE)

5874

Op1 = Op1.getOperand(0);

5875

5876

if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&

5877

Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&

5878

isa<ConstantSDNode>(Op0.getOperand(1))) {

5879

5880

unsigned Bits = Op0.getValueSizeInBits();

5881

if (b != Bits/8-1)

5882

return false;

5883

if (Op0.getConstantOperandVal(1) != Bits-8)

5884

return false;

5885

5886

LHS = Op0.getOperand(0);

5887

RHS = Op1.getOperand(0);

5888

return true;

5889

}

5890

5891

// When we have small integers (i16 to be specific), the form present

5892

// post-legalization uses SETULT in the SELECT_CC for the

5893

// higher-order byte, depending on the fact that the

5894

// even-higher-order bytes are known to all be zero, for example:

5895

// select_cc (xor $lhs, $rhs), 256, 65280, 0, setult

5896

// (so when the second byte is the same, because all higher-order

5897

// bits from bytes 3 and 4 are known to be zero, the result of the

5898

// xor can be at most 255)

5899

if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&

5900

isa<ConstantSDNode>(O.getOperand(1))) {

5901

5902

uint64_t ULim = O.getConstantOperandVal(1);

5903

if (ULim != (UINT64_C(1)1UL << b*8))

5904

return false;

5905

5906

// Now we need to make sure that the upper bytes are known to be

5907

// zero.

5908

unsigned Bits = Op0.getValueSizeInBits();

5909

if (!CurDAG->MaskedValueIsZero(

5910

Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))

5911

return false;

5912

5913

LHS = Op0.getOperand(0);

5914

RHS = Op0.getOperand(1);

5915

return true;

5916

}

5917

5918

return false;

5919

}

5920

5921

if (CC != ISD::SETEQ)

5922

return false;

5923

5924

SDValue Op = O.getOperand(0);

5925

if (Op.getOpcode() == ISD::AND) {

5926

if (!isa<ConstantSDNode>(Op.getOperand(1)))

5927

return false;

5928

if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF)0xFFUL << (8*b)))

5929

return false;

5930

5931

SDValue XOR = Op.getOperand(0);

5932

if (XOR.getOpcode() == ISD::TRUNCATE)

5933

XOR = XOR.getOperand(0);

5934

if (XOR.getOpcode() != ISD::XOR)

5935

return false;

5936

5937

LHS = XOR.getOperand(0);

5938

RHS = XOR.getOperand(1);

5939

return true;

5940

} else if (Op.getOpcode() == ISD::SRL) {

5941

if (!isa<ConstantSDNode>(Op.getOperand(1)))

5942

return false;

5943

unsigned Bits = Op.getValueSizeInBits();

5944

if (b != Bits/8-1)

5945

return false;

5946

if (Op.getConstantOperandVal(1) != Bits-8)

5947

return false;

5948

5949

SDValue XOR = Op.getOperand(0);

5950

if (XOR.getOpcode() == ISD::TRUNCATE)

5951

XOR = XOR.getOperand(0);

5952

if (XOR.getOpcode() != ISD::XOR)

5953

return false;

5954

5955

LHS = XOR.getOperand(0);

5956

RHS = XOR.getOperand(1);

5957

return true;

5958

}

5959

5960

return false;

5961

};

5962

5963

SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));

5964

while (!Queue.empty()) {

5965

SDValue V = Queue.pop_back_val();

5966

5967

for (const SDValue &O : V.getNode()->ops()) {

5968

unsigned b = 0;

5969

uint64_t M = 0, A = 0;

5970

SDValue OLHS, ORHS;

5971

if (O.getOpcode() == ISD::OR) {

5972

Queue.push_back(O);

5973

} else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {

5974

if (!LHS) {

5975

LHS = OLHS;

5976

RHS = ORHS;

5977

BytesFound[b] = true;

5978

Mask |= M;

5979

Alt |= A;

5980

} else if ((LHS == ORHS && RHS == OLHS) ||

5981

(RHS == ORHS && LHS == OLHS)) {

5982

BytesFound[b] = true;

5983

Mask |= M;

5984

Alt |= A;

5985

} else {

5986

return Res;

5987

}

5988

} else {

5989

return Res;

5990

}

5991

}

5992

}

5993

5994

unsigned LastB = 0, BCnt = 0;

5995

for (unsigned i = 0; i < 8; ++i)

5996

if (BytesFound[LastB]) {

5997

++BCnt;

5998

LastB = i;

5999

}

6000

6001

if (!LastB || BCnt < 2)

6002

return Res;

6003

6004

// Because we'll be zero-extending the output anyway if don't have a specific

6005

// value for each input byte (via the Mask), we can 'anyext' the inputs.

6006

if (LHS.getValueType() != VT) {

6007

LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);

6008

RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);

6009

}

6010

6011

Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);

6012

6013

bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1)-1L;

6014

if (NonTrivialMask && !Alt) {

6015

// Res = Mask & CMPB

6016

Res = CurDAG->getNode(ISD::AND, dl, VT, Res,

6017

CurDAG->getConstant(Mask, dl, VT));

6018

} else if (Alt) {

6019

// Res = (CMPB & Mask) | (~CMPB & Alt)

6020

// Which, as suggested here:

6021

// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge

6022

// can be written as:

6023

// Res = Alt ^ ((Alt ^ Mask) & CMPB)

6024

// useful because the (Alt ^ Mask) can be pre-computed.

6025

Res = CurDAG->getNode(ISD::AND, dl, VT, Res,

6026

CurDAG->getConstant(Mask ^ Alt, dl, VT));

6027

Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,

6028

CurDAG->getConstant(Alt, dl, VT));

6029

}

6030

6031

return Res;

6032

}

6033

6034

// When CR bit registers are enabled, an extension of an i1 variable to a i32

6035

// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus

6036

// involves constant materialization of a 0 or a 1 or both. If the result of

6037

// the extension is then operated upon by some operator that can be constant

6038

// folded with a constant 0 or 1, and that constant can be materialized using

6039

// only one instruction (like a zero or one), then we should fold in those

6040

// operations with the select.

6041

void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {

6042

if (!Subtarget->useCRBits())

6043

return;

6044

6045

if (N->getOpcode() != ISD::ZERO_EXTEND &&

6046

N->getOpcode() != ISD::SIGN_EXTEND &&

6047

N->getOpcode() != ISD::ANY_EXTEND)

6048

return;

6049

6050

if (N->getOperand(0).getValueType() != MVT::i1)

6051

return;

6052

6053

if (!N->hasOneUse())

6054

return;

6055

6056

SDLoc dl(N);

6057

EVT VT = N->getValueType(0);

6058

SDValue Cond = N->getOperand(0);

6059

SDValue ConstTrue =

6060

CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);

6061

SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);

6062

6063

do {

6064

SDNode *User = *N->use_begin();

6065

if (User->getNumOperands() != 2)

6066

break;

6067

6068

auto TryFold = [this, N, User, dl](SDValue Val) {

6069

SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);

6070

SDValue O0 = UserO0.getNode() == N ? Val : UserO0;

6071

SDValue O1 = UserO1.getNode() == N ? Val : UserO1;

6072

6073

return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,

6074

User->getValueType(0), {O0, O1});

6075

};

6076

6077

// FIXME: When the semantics of the interaction between select and undef

6078

// are clearly defined, it may turn out to be unnecessary to break here.

6079

SDValue TrueRes = TryFold(ConstTrue);

6080

if (!TrueRes || TrueRes.isUndef())

6081

break;

6082

SDValue FalseRes = TryFold(ConstFalse);

6083

if (!FalseRes || FalseRes.isUndef())

6084

break;

6085

6086

// For us to materialize these using one instruction, we must be able to

6087

// represent them as signed 16-bit integers.

6088

uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),

6089

False = cast<ConstantSDNode>(FalseRes)->getZExtValue();

6090

if (!isInt<16>(True) || !isInt<16>(False))

6091

break;

6092

6093

// We can replace User with a new SELECT node, and try again to see if we

6094

// can fold the select with its user.

6095

Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);

6096

N = User;

6097

ConstTrue = TrueRes;

6098

ConstFalse = FalseRes;

6099

} while (N->hasOneUse());

6100

}

6101

6102

void PPCDAGToDAGISel::PreprocessISelDAG() {

6103

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

6104

6105

bool MadeChange = false;

6106

while (Position != CurDAG->allnodes_begin()) {

6107

SDNode *N = &*--Position;

6108

if (N->use_empty())

6109

continue;

6110

6111

SDValue Res;

6112

switch (N->getOpcode()) {

6113

default: break;

6114

case ISD::OR:

6115

Res = combineToCMPB(N);

6116

break;

6117

}

6118

6119

if (!Res)

6120

foldBoolExts(Res, N);

6121

6122

if (Res) {

6123

LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "PPC DAG preprocessing replacing:\nOld: "
; } } while (false);

6124

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(CurDAG); } } while (false);

6125

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false
);

6126

LLVM_DEBUG(Res.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { Res.getNode()->dump(CurDAG); } } while (
false);

6127

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

6128

6129

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);

6130

MadeChange = true;

6131

}

6132

}

6133

6134

if (MadeChange)

6135

CurDAG->RemoveDeadNodes();

6136

}

6137

6138

/// PostprocessISelDAG - Perform some late peephole optimizations

6139

/// on the DAG representation.

6140

void PPCDAGToDAGISel::PostprocessISelDAG() {

6141

// Skip peepholes at -O0.

6142

if (TM.getOptLevel() == CodeGenOpt::None)

6143

return;

6144

6145

PeepholePPC64();

6146

PeepholeCROps();

6147

PeepholePPC64ZExt();

6148

}

6149

6150

// Check if all users of this node will become isel where the second operand

6151

// is the constant zero. If this is so, and if we can negate the condition,

6152

// then we can flip the true and false operands. This will allow the zero to

6153

// be folded with the isel so that we don't need to materialize a register

6154

// containing zero.

6155

bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {

6156

for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

6157

UI != UE; ++UI) {

6158

SDNode *User = *UI;

6159

if (!User->isMachineOpcode())

6160

return false;

6161

if (User->getMachineOpcode() != PPC::SELECT_I4 &&

6162

User->getMachineOpcode() != PPC::SELECT_I8)

6163

return false;

6164

6165

SDNode *Op1 = User->getOperand(1).getNode();

6166

SDNode *Op2 = User->getOperand(2).getNode();

6167

// If we have a degenerate select with two equal operands, swapping will

6168

// not do anything, and we may run into an infinite loop.

6169

if (Op1 == Op2)

6170

return false;

6171

6172

if (!Op2->isMachineOpcode())

6173

return false;

6174

6175

if (Op2->getMachineOpcode() != PPC::LI &&

6176

Op2->getMachineOpcode() != PPC::LI8)

6177

return false;

6178

6179

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));

6180

if (!C)

6181

return false;

6182

6183

if (!C->isNullValue())

6184

return false;

6185

}

6186

6187

return true;

6188

}

6189

6190

void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {

6191

SmallVector<SDNode *, 4> ToReplace;

6192

for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();

6193

UI != UE; ++UI) {

6194

SDNode *User = *UI;

6195

assert((User->getMachineOpcode() == PPC::SELECT_I4 ||(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 6197, __extension__ __PRETTY_FUNCTION__))

6196

User->getMachineOpcode() == PPC::SELECT_I8) &&(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 6197, __extension__ __PRETTY_FUNCTION__))

6197

"Must have all select users")(static_cast <bool> ((User->getMachineOpcode() == PPC
::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8)
&& "Must have all select users") ? void (0) : __assert_fail
("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 6197, __extension__ __PRETTY_FUNCTION__));

6198

ToReplace.push_back(User);

6199

}

6200

6201

for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),

6202

UE = ToReplace.end(); UI != UE; ++UI) {

6203

SDNode *User = *UI;

6204

SDNode *ResNode =

6205

CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),

6206

User->getValueType(0), User->getOperand(0),

6207

User->getOperand(2),

6208

User->getOperand(1));

6209

6210

LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: "
; } } while (false);

6211

LLVM_DEBUG(User->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { User->dump(CurDAG); } } while (false);

6212

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false
);

6213

LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false
);

6214

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

6215

6216

ReplaceUses(User, ResNode);

6217

}

6218

}

6219

6220

void PPCDAGToDAGISel::PeepholeCROps() {

6221

bool IsModified;

6222

do {

6223

IsModified = false;

6224

for (SDNode &Node : CurDAG->allnodes()) {

6225

MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);

6226

if (!MachineNode || MachineNode->use_empty())

6227

continue;

6228

SDNode *ResNode = MachineNode;

6229

6230

bool Op1Set = false, Op1Unset = false,

6231

Op1Not = false,

6232

Op2Set = false, Op2Unset = false,

6233

Op2Not = false;

6234

6235

unsigned Opcode = MachineNode->getMachineOpcode();

6236

switch (Opcode) {

6237

default: break;

6238

case PPC::CRAND:

6239

case PPC::CRNAND:

6240

case PPC::CROR:

6241

case PPC::CRXOR:

6242

case PPC::CRNOR:

6243

case PPC::CREQV:

6244

case PPC::CRANDC:

6245

case PPC::CRORC: {

6246

SDValue Op = MachineNode->getOperand(1);

6247

if (Op.isMachineOpcode()) {

6248

if (Op.getMachineOpcode() == PPC::CRSET)

6249

Op2Set = true;

6250

else if (Op.getMachineOpcode() == PPC::CRUNSET)

6251

Op2Unset = true;

6252

else if (Op.getMachineOpcode() == PPC::CRNOR &&

6253

Op.getOperand(0) == Op.getOperand(1))

6254

Op2Not = true;

6255

}

6256

LLVM_FALLTHROUGH[[gnu::fallthrough]];

6257

}

6258

case PPC::BC:

6259

case PPC::BCn:

6260

case PPC::SELECT_I4:

6261

case PPC::SELECT_I8:

6262

case PPC::SELECT_F4:

6263

case PPC::SELECT_F8:

6264

case PPC::SELECT_SPE:

6265

case PPC::SELECT_SPE4:

6266

case PPC::SELECT_VRRC:

6267

case PPC::SELECT_VSFRC:

6268

case PPC::SELECT_VSSRC:

6269

case PPC::SELECT_VSRC: {

6270

SDValue Op = MachineNode->getOperand(0);

6271

if (Op.isMachineOpcode()) {

6272

if (Op.getMachineOpcode() == PPC::CRSET)

6273

Op1Set = true;

6274

else if (Op.getMachineOpcode() == PPC::CRUNSET)

6275

Op1Unset = true;

6276

else if (Op.getMachineOpcode() == PPC::CRNOR &&

6277

Op.getOperand(0) == Op.getOperand(1))

6278

Op1Not = true;

6279

}

6280

}

6281

break;

6282

}

6283

6284

bool SelectSwap = false;

6285

switch (Opcode) {

6286

default: break;

6287

case PPC::CRAND:

6288

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6289

// x & x = x

6290

ResNode = MachineNode->getOperand(0).getNode();

6291

else if (Op1Set)

6292

// 1 & y = y

6293

ResNode = MachineNode->getOperand(1).getNode();

6294

else if (Op2Set)

6295

// x & 1 = x

6296

ResNode = MachineNode->getOperand(0).getNode();

6297

else if (Op1Unset || Op2Unset)

6298

// x & 0 = 0 & y = 0

6299

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6300

MVT::i1);

6301

else if (Op1Not)

6302

// ~x & y = andc(y, x)

6303

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6304

MVT::i1, MachineNode->getOperand(1),

6305

MachineNode->getOperand(0).

6306

getOperand(0));

6307

else if (Op2Not)

6308

// x & ~y = andc(x, y)

6309

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6310

MVT::i1, MachineNode->getOperand(0),

6311

MachineNode->getOperand(1).

6312

getOperand(0));

6313

else if (AllUsersSelectZero(MachineNode)) {

6314

ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),

6315

MVT::i1, MachineNode->getOperand(0),

6316

MachineNode->getOperand(1));

6317

SelectSwap = true;

6318

}

6319

break;

6320

case PPC::CRNAND:

6321

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6322

// nand(x, x) -> nor(x, x)

6323

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6324

MVT::i1, MachineNode->getOperand(0),

6325

MachineNode->getOperand(0));

6326

else if (Op1Set)

6327

// nand(1, y) -> nor(y, y)

6328

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6329

MVT::i1, MachineNode->getOperand(1),

6330

MachineNode->getOperand(1));

6331

else if (Op2Set)

6332

// nand(x, 1) -> nor(x, x)

6333

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6334

MVT::i1, MachineNode->getOperand(0),

6335

MachineNode->getOperand(0));

6336

else if (Op1Unset || Op2Unset)

6337

// nand(x, 0) = nand(0, y) = 1

6338

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6339

MVT::i1);

6340

else if (Op1Not)

6341

// nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)

6342

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6343

MVT::i1, MachineNode->getOperand(0).

6344

getOperand(0),

6345

MachineNode->getOperand(1));

6346

else if (Op2Not)

6347

// nand(x, ~y) = ~x | y = orc(y, x)

6348

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6349

MVT::i1, MachineNode->getOperand(1).

6350

getOperand(0),

6351

MachineNode->getOperand(0));

6352

else if (AllUsersSelectZero(MachineNode)) {

6353

ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),

6354

MVT::i1, MachineNode->getOperand(0),

6355

MachineNode->getOperand(1));

6356

SelectSwap = true;

6357

}

6358

break;

6359

case PPC::CROR:

6360

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6361

// x | x = x

6362

ResNode = MachineNode->getOperand(0).getNode();

6363

else if (Op1Set || Op2Set)

6364

// x | 1 = 1 | y = 1

6365

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6366

MVT::i1);

6367

else if (Op1Unset)

6368

// 0 | y = y

6369

ResNode = MachineNode->getOperand(1).getNode();

6370

else if (Op2Unset)

6371

// x | 0 = x

6372

ResNode = MachineNode->getOperand(0).getNode();

6373

else if (Op1Not)

6374

// ~x | y = orc(y, x)

6375

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6376

MVT::i1, MachineNode->getOperand(1),

6377

MachineNode->getOperand(0).

6378

getOperand(0));

6379

else if (Op2Not)

6380

// x | ~y = orc(x, y)

6381

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6382

MVT::i1, MachineNode->getOperand(0),

6383

MachineNode->getOperand(1).

6384

getOperand(0));

6385

else if (AllUsersSelectZero(MachineNode)) {

6386

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6387

MVT::i1, MachineNode->getOperand(0),

6388

MachineNode->getOperand(1));

6389

SelectSwap = true;

6390

}

6391

break;

6392

case PPC::CRXOR:

6393

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6394

// xor(x, x) = 0

6395

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6396

MVT::i1);

6397

else if (Op1Set)

6398

// xor(1, y) -> nor(y, y)

6399

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6400

MVT::i1, MachineNode->getOperand(1),

6401

MachineNode->getOperand(1));

6402

else if (Op2Set)

6403

// xor(x, 1) -> nor(x, x)

6404

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6405

MVT::i1, MachineNode->getOperand(0),

6406

MachineNode->getOperand(0));

6407

else if (Op1Unset)

6408

// xor(0, y) = y

6409

ResNode = MachineNode->getOperand(1).getNode();

6410

else if (Op2Unset)

6411

// xor(x, 0) = x

6412

ResNode = MachineNode->getOperand(0).getNode();

6413

else if (Op1Not)

6414

// xor(~x, y) = eqv(x, y)

6415

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6416

MVT::i1, MachineNode->getOperand(0).

6417

getOperand(0),

6418

MachineNode->getOperand(1));

6419

else if (Op2Not)

6420

// xor(x, ~y) = eqv(x, y)

6421

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6422

MVT::i1, MachineNode->getOperand(0),

6423

MachineNode->getOperand(1).

6424

getOperand(0));

6425

else if (AllUsersSelectZero(MachineNode)) {

6426

ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),

6427

MVT::i1, MachineNode->getOperand(0),

6428

MachineNode->getOperand(1));

6429

SelectSwap = true;

6430

}

6431

break;

6432

case PPC::CRNOR:

6433

if (Op1Set || Op2Set)

6434

// nor(1, y) -> 0

6435

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6436

MVT::i1);

6437

else if (Op1Unset)

6438

// nor(0, y) = ~y -> nor(y, y)

6439

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6440

MVT::i1, MachineNode->getOperand(1),

6441

MachineNode->getOperand(1));

6442

else if (Op2Unset)

6443

// nor(x, 0) = ~x

6444

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6445

MVT::i1, MachineNode->getOperand(0),

6446

MachineNode->getOperand(0));

6447

else if (Op1Not)

6448

// nor(~x, y) = andc(x, y)

6449

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6450

MVT::i1, MachineNode->getOperand(0).

6451

getOperand(0),

6452

MachineNode->getOperand(1));

6453

else if (Op2Not)

6454

// nor(x, ~y) = andc(y, x)

6455

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6456

MVT::i1, MachineNode->getOperand(1).

6457

getOperand(0),

6458

MachineNode->getOperand(0));

6459

else if (AllUsersSelectZero(MachineNode)) {

6460

ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),

6461

MVT::i1, MachineNode->getOperand(0),

6462

MachineNode->getOperand(1));

6463

SelectSwap = true;

6464

}

6465

break;

6466

case PPC::CREQV:

6467

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6468

// eqv(x, x) = 1

6469

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6470

MVT::i1);

6471

else if (Op1Set)

6472

// eqv(1, y) = y

6473

ResNode = MachineNode->getOperand(1).getNode();

6474

else if (Op2Set)

6475

// eqv(x, 1) = x

6476

ResNode = MachineNode->getOperand(0).getNode();

6477

else if (Op1Unset)

6478

// eqv(0, y) = ~y -> nor(y, y)

6479

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6480

MVT::i1, MachineNode->getOperand(1),

6481

MachineNode->getOperand(1));

6482

else if (Op2Unset)

6483

// eqv(x, 0) = ~x

6484

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6485

MVT::i1, MachineNode->getOperand(0),

6486

MachineNode->getOperand(0));

6487

else if (Op1Not)

6488

// eqv(~x, y) = xor(x, y)

6489

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6490

MVT::i1, MachineNode->getOperand(0).

6491

getOperand(0),

6492

MachineNode->getOperand(1));

6493

else if (Op2Not)

6494

// eqv(x, ~y) = xor(x, y)

6495

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6496

MVT::i1, MachineNode->getOperand(0),

6497

MachineNode->getOperand(1).

6498

getOperand(0));

6499

else if (AllUsersSelectZero(MachineNode)) {

6500

ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),

6501

MVT::i1, MachineNode->getOperand(0),

6502

MachineNode->getOperand(1));

6503

SelectSwap = true;

6504

}

6505

break;

6506

case PPC::CRANDC:

6507

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6508

// andc(x, x) = 0

6509

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6510

MVT::i1);

6511

else if (Op1Set)

6512

// andc(1, y) = ~y

6513

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6514

MVT::i1, MachineNode->getOperand(1),

6515

MachineNode->getOperand(1));

6516

else if (Op1Unset || Op2Set)

6517

// andc(0, y) = andc(x, 1) = 0

6518

ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),

6519

MVT::i1);

6520

else if (Op2Unset)

6521

// andc(x, 0) = x

6522

ResNode = MachineNode->getOperand(0).getNode();

6523

else if (Op1Not)

6524

// andc(~x, y) = ~(x | y) = nor(x, y)

6525

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6526

MVT::i1, MachineNode->getOperand(0).

6527

getOperand(0),

6528

MachineNode->getOperand(1));

6529

else if (Op2Not)

6530

// andc(x, ~y) = x & y

6531

ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),

6532

MVT::i1, MachineNode->getOperand(0),

6533

MachineNode->getOperand(1).

6534

getOperand(0));

6535

else if (AllUsersSelectZero(MachineNode)) {

6536

ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),

6537

MVT::i1, MachineNode->getOperand(1),

6538

MachineNode->getOperand(0));

6539

SelectSwap = true;

6540

}

6541

break;

6542

case PPC::CRORC:

6543

if (MachineNode->getOperand(0) == MachineNode->getOperand(1))

6544

// orc(x, x) = 1

6545

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6546

MVT::i1);

6547

else if (Op1Set || Op2Unset)

6548

// orc(1, y) = orc(x, 0) = 1

6549

ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),

6550

MVT::i1);

6551

else if (Op2Set)

6552

// orc(x, 1) = x

6553

ResNode = MachineNode->getOperand(0).getNode();

6554

else if (Op1Unset)

6555

// orc(0, y) = ~y

6556

ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),

6557

MVT::i1, MachineNode->getOperand(1),

6558

MachineNode->getOperand(1));

6559

else if (Op1Not)

6560

// orc(~x, y) = ~(x & y) = nand(x, y)

6561

ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),

6562

MVT::i1, MachineNode->getOperand(0).

6563

getOperand(0),

6564

MachineNode->getOperand(1));

6565

else if (Op2Not)

6566

// orc(x, ~y) = x | y

6567

ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),

6568

MVT::i1, MachineNode->getOperand(0),

6569

MachineNode->getOperand(1).

6570

getOperand(0));

6571

else if (AllUsersSelectZero(MachineNode)) {

6572

ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),

6573

MVT::i1, MachineNode->getOperand(1),

6574

MachineNode->getOperand(0));

6575

SelectSwap = true;

6576

}

6577

break;

6578

case PPC::SELECT_I4:

6579

case PPC::SELECT_I8:

6580

case PPC::SELECT_F4:

6581

case PPC::SELECT_F8:

6582

case PPC::SELECT_SPE:

6583

case PPC::SELECT_SPE4:

6584

case PPC::SELECT_VRRC:

6585

case PPC::SELECT_VSFRC:

6586

case PPC::SELECT_VSSRC:

6587

case PPC::SELECT_VSRC:

6588

if (Op1Set)

6589

ResNode = MachineNode->getOperand(1).getNode();

6590

else if (Op1Unset)

6591

ResNode = MachineNode->getOperand(2).getNode();

6592

else if (Op1Not)

6593

ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),

6594

SDLoc(MachineNode),

6595

MachineNode->getValueType(0),

6596

MachineNode->getOperand(0).

6597

getOperand(0),

6598

MachineNode->getOperand(2),

6599

MachineNode->getOperand(1));

6600

break;

6601

case PPC::BC:

6602

case PPC::BCn:

6603

if (Op1Not)

6604

ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :

6605

PPC::BC,

6606

SDLoc(MachineNode),

6607

MVT::Other,

6608

MachineNode->getOperand(0).

6609

getOperand(0),

6610

MachineNode->getOperand(1),

6611

MachineNode->getOperand(2));

6612

// FIXME: Handle Op1Set, Op1Unset here too.

6613

break;

6614

}

6615

6616

// If we're inverting this node because it is used only by selects that

6617

// we'd like to swap, then swap the selects before the node replacement.

6618

if (SelectSwap)

6619

SwapAllSelectUsers(MachineNode);

6620

6621

if (ResNode != MachineNode) {

6622

LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: "
; } } while (false);

6623

LLVM_DEBUG(MachineNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { MachineNode->dump(CurDAG); } } while (false
);

6624

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false
);

6625

LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false
);

6626

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

6627

6628

ReplaceUses(MachineNode, ResNode);

6629

IsModified = true;

6630

}

6631

}

6632

if (IsModified)

6633

CurDAG->RemoveDeadNodes();

6634

} while (IsModified);

6635

}

6636

6637

// Gather the set of 32-bit operations that are known to have their

6638

// higher-order 32 bits zero, where ToPromote contains all such operations.

6639

static bool PeepholePPC64ZExtGather(SDValue Op32,

6640

SmallPtrSetImpl<SDNode *> &ToPromote) {

6641

if (!Op32.isMachineOpcode())

6642

return false;

6643

6644

// First, check for the "frontier" instructions (those that will clear the

6645

// higher-order 32 bits.

6646

6647

// For RLWINM and RLWNM, we need to make sure that the mask does not wrap

6648

// around. If it does not, then these instructions will clear the

6649

// higher-order bits.

6650

if ((Op32.getMachineOpcode() == PPC::RLWINM ||

6651

Op32.getMachineOpcode() == PPC::RLWNM) &&

6652

Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {

6653

ToPromote.insert(Op32.getNode());

6654

return true;

6655

}

6656

6657

// SLW and SRW always clear the higher-order bits.

6658

if (Op32.getMachineOpcode() == PPC::SLW ||

6659

Op32.getMachineOpcode() == PPC::SRW) {

6660

ToPromote.insert(Op32.getNode());

6661

return true;

6662

}

6663

6664

// For LI and LIS, we need the immediate to be positive (so that it is not

6665

// sign extended).

6666

if (Op32.getMachineOpcode() == PPC::LI ||

6667

Op32.getMachineOpcode() == PPC::LIS) {

6668

if (!isUInt<15>(Op32.getConstantOperandVal(0)))

6669

return false;

6670

6671

ToPromote.insert(Op32.getNode());

6672

return true;

6673

}

6674

6675

// LHBRX and LWBRX always clear the higher-order bits.

6676

if (Op32.getMachineOpcode() == PPC::LHBRX ||

6677

Op32.getMachineOpcode() == PPC::LWBRX) {

6678

ToPromote.insert(Op32.getNode());

6679

return true;

6680

}

6681

6682

// CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.

6683

if (Op32.getMachineOpcode() == PPC::CNTLZW ||

6684

Op32.getMachineOpcode() == PPC::CNTTZW) {

6685

ToPromote.insert(Op32.getNode());

6686

return true;

6687

}

6688

6689

// Next, check for those instructions we can look through.

6690

6691

// Assuming the mask does not wrap around, then the higher-order bits are

6692

// taken directly from the first operand.

6693

if (Op32.getMachineOpcode() == PPC::RLWIMI &&

6694

Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {

6695

SmallPtrSet<SDNode *, 16> ToPromote1;

6696

if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))

6697

return false;

6698

6699

ToPromote.insert(Op32.getNode());

6700

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

6701

return true;

6702

}

6703

6704

// For OR, the higher-order bits are zero if that is true for both operands.

6705

// For SELECT_I4, the same is true (but the relevant operand numbers are

6706

// shifted by 1).

6707

if (Op32.getMachineOpcode() == PPC::OR ||

6708

Op32.getMachineOpcode() == PPC::SELECT_I4) {

6709

unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;

6710

SmallPtrSet<SDNode *, 16> ToPromote1;

6711

if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))

6712

return false;

6713

if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))

6714

return false;

6715

6716

ToPromote.insert(Op32.getNode());

6717

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

6718

return true;

6719

}

6720

6721

// For ORI and ORIS, we need the higher-order bits of the first operand to be

6722

// zero, and also for the constant to be positive (so that it is not sign

6723

// extended).

6724

if (Op32.getMachineOpcode() == PPC::ORI ||

6725

Op32.getMachineOpcode() == PPC::ORIS) {

6726

SmallPtrSet<SDNode *, 16> ToPromote1;

6727

if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))

6728

return false;

6729

if (!isUInt<15>(Op32.getConstantOperandVal(1)))

6730

return false;

6731

6732

ToPromote.insert(Op32.getNode());

6733

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

6734

return true;

6735

}

6736

6737

// The higher-order bits of AND are zero if that is true for at least one of

6738

// the operands.

6739

if (Op32.getMachineOpcode() == PPC::AND) {

6740

SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;

6741

bool Op0OK =

6742

PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);

6743

bool Op1OK =

6744

PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);

6745

if (!Op0OK && !Op1OK)

6746

return false;

6747

6748

ToPromote.insert(Op32.getNode());

6749

6750

if (Op0OK)

6751

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

6752

6753

if (Op1OK)

6754

ToPromote.insert(ToPromote2.begin(), ToPromote2.end());

6755

6756

return true;

6757

}

6758

6759

// For ANDI and ANDIS, the higher-order bits are zero if either that is true

6760

// of the first operand, or if the second operand is positive (so that it is

6761

// not sign extended).

6762

if (Op32.getMachineOpcode() == PPC::ANDI_rec ||

6763

Op32.getMachineOpcode() == PPC::ANDIS_rec) {

6764

SmallPtrSet<SDNode *, 16> ToPromote1;

6765

bool Op0OK =

6766

PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);

6767

bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));

6768

if (!Op0OK && !Op1OK)

6769

return false;

6770

6771

ToPromote.insert(Op32.getNode());

6772

6773

if (Op0OK)

6774

ToPromote.insert(ToPromote1.begin(), ToPromote1.end());

6775

6776

return true;

6777

}

6778

6779

return false;

6780

}

6781

6782

void PPCDAGToDAGISel::PeepholePPC64ZExt() {

6783

if (!Subtarget->isPPC64())

6784

return;

6785

6786

// When we zero-extend from i32 to i64, we use a pattern like this:

6787

// def : Pat<(i64 (zext i32:$in)),

6788

// (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),

6789

// 0, 32)>;

6790

// There are several 32-bit shift/rotate instructions, however, that will

6791

// clear the higher-order bits of their output, rendering the RLDICL

6792

// unnecessary. When that happens, we remove it here, and redefine the

6793

// relevant 32-bit operation to be a 64-bit operation.

6794

6795

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

6796

6797

bool MadeChange = false;

6798

while (Position != CurDAG->allnodes_begin()) {

6799

SDNode *N = &*--Position;

6800

// Skip dead nodes and any non-machine opcodes.

6801

if (N->use_empty() || !N->isMachineOpcode())

6802

continue;

6803

6804

if (N->getMachineOpcode() != PPC::RLDICL)

6805

continue;

6806

6807

if (N->getConstantOperandVal(1) != 0 ||

6808

N->getConstantOperandVal(2) != 32)

6809

continue;

6810

6811

SDValue ISR = N->getOperand(0);

6812

if (!ISR.isMachineOpcode() ||

6813

ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)

6814

continue;

6815

6816

if (!ISR.hasOneUse())

6817

continue;

6818

6819

if (ISR.getConstantOperandVal(2) != PPC::sub_32)

6820

continue;

6821

6822

SDValue IDef = ISR.getOperand(0);

6823

if (!IDef.isMachineOpcode() ||

6824

IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)

6825

continue;

6826

6827

// We now know that we're looking at a canonical i32 -> i64 zext. See if we

6828

// can get rid of it.

6829

6830

SDValue Op32 = ISR->getOperand(1);

6831

if (!Op32.isMachineOpcode())

6832

continue;

6833

6834

// There are some 32-bit instructions that always clear the high-order 32

6835

// bits, there are also some instructions (like AND) that we can look

6836

// through.

6837

SmallPtrSet<SDNode *, 16> ToPromote;

6838

if (!PeepholePPC64ZExtGather(Op32, ToPromote))

6839

continue;

6840

6841

// If the ToPromote set contains nodes that have uses outside of the set

6842

// (except for the original INSERT_SUBREG), then abort the transformation.

6843

bool OutsideUse = false;

6844

for (SDNode *PN : ToPromote) {

6845

for (SDNode *UN : PN->uses()) {

6846

if (!ToPromote.count(UN) && UN != ISR.getNode()) {

6847

OutsideUse = true;

6848

break;

6849

}

6850

}

6851

6852

if (OutsideUse)

6853

break;

6854

}

6855

if (OutsideUse)

6856

continue;

6857

6858

MadeChange = true;

6859

6860

// We now know that this zero extension can be removed by promoting to

6861

// nodes in ToPromote to 64-bit operations, where for operations in the

6862

// frontier of the set, we need to insert INSERT_SUBREGs for their

6863

// operands.

6864

for (SDNode *PN : ToPromote) {

6865

unsigned NewOpcode;

6866

switch (PN->getMachineOpcode()) {

6867

default:

6868

llvm_unreachable("Don't know the 64-bit variant of this instruction")::llvm::llvm_unreachable_internal("Don't know the 64-bit variant of this instruction"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 6868);

6869

case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;

6870

case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;

6871

case PPC::SLW: NewOpcode = PPC::SLW8; break;

6872

case PPC::SRW: NewOpcode = PPC::SRW8; break;

6873

case PPC::LI: NewOpcode = PPC::LI8; break;

6874

case PPC::LIS: NewOpcode = PPC::LIS8; break;

6875

case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;

6876

case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;

6877

case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;

6878

case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;

6879

case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;

6880

case PPC::OR: NewOpcode = PPC::OR8; break;

6881

case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;

6882

case PPC::ORI: NewOpcode = PPC::ORI8; break;

6883

case PPC::ORIS: NewOpcode = PPC::ORIS8; break;

6884

case PPC::AND: NewOpcode = PPC::AND8; break;

6885

case PPC::ANDI_rec:

6886

NewOpcode = PPC::ANDI8_rec;

6887

break;

6888

case PPC::ANDIS_rec:

6889

NewOpcode = PPC::ANDIS8_rec;

6890

break;

6891

}

6892

6893

// Note: During the replacement process, the nodes will be in an

6894

// inconsistent state (some instructions will have operands with values

6895

// of the wrong type). Once done, however, everything should be right

6896

// again.

6897

6898

SmallVector<SDValue, 4> Ops;

6899

for (const SDValue &V : PN->ops()) {

6900

if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&

6901

!isa<ConstantSDNode>(V)) {

6902

SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };

6903

SDNode *ReplOp =

6904

CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),

6905

ISR.getNode()->getVTList(), ReplOpOps);

6906

Ops.push_back(SDValue(ReplOp, 0));

6907

} else {

6908

Ops.push_back(V);

6909

}

6910

}

6911

6912

// Because all to-be-promoted nodes only have users that are other

6913

// promoted nodes (or the original INSERT_SUBREG), we can safely replace

6914

// the i32 result value type with i64.

6915

6916

SmallVector<EVT, 2> NewVTs;

6917

SDVTList VTs = PN->getVTList();

6918

for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)

6919

if (VTs.VTs[i] == MVT::i32)

6920

NewVTs.push_back(MVT::i64);

6921

else

6922

NewVTs.push_back(VTs.VTs[i]);

6923

6924

LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "
; } } while (false);

6925

LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { PN->dump(CurDAG); } } while (false);

6926

6927

CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);

6928

6929

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false
);

6930

LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { PN->dump(CurDAG); } } while (false);

6931

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

6932

}

6933

6934

// Now we replace the original zero extend and its associated INSERT_SUBREG

6935

// with the value feeding the INSERT_SUBREG (which has now been promoted to

6936

// return an i64).

6937

6938

LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "
; } } while (false);

6939

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(CurDAG); } } while (false);

6940

LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false
);

6941

LLVM_DEBUG(Op32.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { Op32.getNode()->dump(CurDAG); } } while
(false);

6942

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

6943

6944

ReplaceUses(N, Op32.getNode());

6945

}

6946

6947

if (MadeChange)

6948

CurDAG->RemoveDeadNodes();

6949

}

6950

6951

static bool isVSXSwap(SDValue N) {

6952

if (!N->isMachineOpcode())

6953

return false;

6954

unsigned Opc = N->getMachineOpcode();

6955

6956

// Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate

6957

// operand is 2.

6958

if (Opc == PPC::XXPERMDIs) {

6959

return isa<ConstantSDNode>(N->getOperand(1)) &&

6960

N->getConstantOperandVal(1) == 2;

6961

} else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {

6962

return N->getOperand(0) == N->getOperand(1) &&

6963

isa<ConstantSDNode>(N->getOperand(2)) &&

6964

N->getConstantOperandVal(2) == 2;

6965

}

6966

6967

return false;

6968

}

6969

6970

// TODO: Make this complete and replace with a table-gen bit.

6971

static bool isLaneInsensitive(SDValue N) {

6972

if (!N->isMachineOpcode())

6973

return false;

6974

unsigned Opc = N->getMachineOpcode();

6975

6976

switch (Opc) {

6977

default:

6978

return false;

6979

case PPC::VAVGSB:

6980

case PPC::VAVGUB:

6981

case PPC::VAVGSH:

6982

case PPC::VAVGUH:

6983

case PPC::VAVGSW:

6984

case PPC::VAVGUW:

6985

case PPC::VMAXFP:

6986

case PPC::VMAXSB:

6987

case PPC::VMAXUB:

6988

case PPC::VMAXSH:

6989

case PPC::VMAXUH:

6990

case PPC::VMAXSW:

6991

case PPC::VMAXUW:

6992

case PPC::VMINFP:

6993

case PPC::VMINSB:

6994

case PPC::VMINUB:

6995

case PPC::VMINSH:

6996

case PPC::VMINUH:

6997

case PPC::VMINSW:

6998

case PPC::VMINUW:

6999

case PPC::VADDFP:

7000

case PPC::VADDUBM:

7001

case PPC::VADDUHM:

7002

case PPC::VADDUWM:

7003

case PPC::VSUBFP:

7004

case PPC::VSUBUBM:

7005

case PPC::VSUBUHM:

7006

case PPC::VSUBUWM:

7007

case PPC::VAND:

7008

case PPC::VANDC:

7009

case PPC::VOR:

7010

case PPC::VORC:

7011

case PPC::VXOR:

7012

case PPC::VNOR:

7013

case PPC::VMULUWM:

7014

return true;

7015

}

7016

}

7017

7018

// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is

7019

// lane-insensitive.

7020

static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {

7021

// Our desired xxswap might be source of COPY_TO_REGCLASS.

7022

// TODO: Can we put this a common method for DAG?

7023

auto SkipRCCopy = [](SDValue V) {

7024

while (V->isMachineOpcode() &&

7025

V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {

7026

// All values in the chain should have single use.

7027

if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))

7028

return SDValue();

7029

V = V->getOperand(0);

7030

}

7031

return V.hasOneUse() ? V : SDValue();

7032

};

7033

7034

SDValue VecOp = SkipRCCopy(N->getOperand(0));

7035

if (!VecOp || !isLaneInsensitive(VecOp))

7036

return;

7037

7038

SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),

7039

RHS = SkipRCCopy(VecOp.getOperand(1));

7040

if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))

7041

return;

7042

7043

// These swaps may still have chain-uses here, count on dead code elimination

7044

// in following passes to remove them.

7045

DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));

7046

DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));

7047

DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));

7048

}

7049

7050

void PPCDAGToDAGISel::PeepholePPC64() {

7051

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

7052

7053

while (Position != CurDAG->allnodes_begin()) {

7054

SDNode *N = &*--Position;

7055

// Skip dead nodes and any non-machine opcodes.

7056

if (N->use_empty() || !N->isMachineOpcode())

7057

continue;

7058

7059

if (isVSXSwap(SDValue(N, 0)))

7060

reduceVSXSwap(N, CurDAG);

7061

7062

unsigned FirstOp;

7063

unsigned StorageOpcode = N->getMachineOpcode();

7064

bool RequiresMod4Offset = false;

7065

7066

switch (StorageOpcode) {

7067

default: continue;

7068

7069

case PPC::LWA:

7070

case PPC::LD:

7071

case PPC::DFLOADf64:

7072

case PPC::DFLOADf32:

7073

RequiresMod4Offset = true;

7074

LLVM_FALLTHROUGH[[gnu::fallthrough]];

7075

case PPC::LBZ:

7076

case PPC::LBZ8:

7077

case PPC::LFD:

7078

case PPC::LFS:

7079

case PPC::LHA:

7080

case PPC::LHA8:

7081

case PPC::LHZ:

7082

case PPC::LHZ8:

7083

case PPC::LWZ:

7084

case PPC::LWZ8:

7085

FirstOp = 0;

7086

break;

7087

7088

case PPC::STD:

7089

case PPC::DFSTOREf64:

7090

case PPC::DFSTOREf32:

7091

RequiresMod4Offset = true;

7092

LLVM_FALLTHROUGH[[gnu::fallthrough]];

7093

case PPC::STB:

7094

case PPC::STB8:

7095

case PPC::STFD:

7096

case PPC::STFS:

7097

case PPC::STH:

7098

case PPC::STH8:

7099

case PPC::STW:

7100

case PPC::STW8:

7101

FirstOp = 1;

7102

break;

7103

}

7104

7105

// If this is a load or store with a zero offset, or within the alignment,

7106

// we may be able to fold an add-immediate into the memory operation.

7107

// The check against alignment is below, as it can't occur until we check

7108

// the arguments to N

7109

if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))

7110

continue;

7111

7112

SDValue Base = N->getOperand(FirstOp + 1);

7113

if (!Base.isMachineOpcode())

7114

continue;

7115

7116

unsigned Flags = 0;

7117

bool ReplaceFlags = true;

7118

7119

// When the feeding operation is an add-immediate of some sort,

7120

// determine whether we need to add relocation information to the

7121

// target flags on the immediate operand when we fold it into the

7122

// load instruction.

7123

//

7124

// For something like ADDItocL, the relocation information is

7125

// inferred from the opcode; when we process it in the AsmPrinter,

7126

// we add the necessary relocation there. A load, though, can receive

7127

// relocation from various flavors of ADDIxxx, so we need to carry

7128

// the relocation information in the target flags.

7129

switch (Base.getMachineOpcode()) {

7130

default: continue;

7131

7132

case PPC::ADDI8:

7133

case PPC::ADDI:

7134

// In some cases (such as TLS) the relocation information

7135

// is already in place on the operand, so copying the operand

7136

// is sufficient.

7137

ReplaceFlags = false;

7138

// For these cases, the immediate may not be divisible by 4, in

7139

// which case the fold is illegal for DS-form instructions. (The

7140

// other cases provide aligned addresses and are always safe.)

7141

if (RequiresMod4Offset &&

7142

(!isa<ConstantSDNode>(Base.getOperand(1)) ||

7143

Base.getConstantOperandVal(1) % 4 != 0))

7144

continue;

7145

break;

7146

case PPC::ADDIdtprelL:

7147

Flags = PPCII::MO_DTPREL_LO;

7148

break;

7149

case PPC::ADDItlsldL:

7150

Flags = PPCII::MO_TLSLD_LO;

7151

break;

7152

case PPC::ADDItocL:

7153

Flags = PPCII::MO_TOC_LO;

7154

break;

7155

}

7156

7157

SDValue ImmOpnd = Base.getOperand(1);

7158

7159

// On PPC64, the TOC base pointer is guaranteed by the ABI only to have

7160

// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,

7161

// we might have needed different @ha relocation values for the offset

7162

// pointers).

7163

int MaxDisplacement = 7;

7164

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {

7165

const GlobalValue *GV = GA->getGlobal();

7166

Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());

7167

MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);

7168

}

7169

7170

bool UpdateHBase = false;

7171

SDValue HBase = Base.getOperand(0);

7172

7173

int Offset = N->getConstantOperandVal(FirstOp);

7174

if (ReplaceFlags) {

7175

if (Offset < 0 || Offset > MaxDisplacement) {

7176

// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only

7177

// one use, then we can do this for any offset, we just need to also

7178

// update the offset (i.e. the symbol addend) on the addis also.

7179

if (Base.getMachineOpcode() != PPC::ADDItocL)

7180

continue;

7181

7182

if (!HBase.isMachineOpcode() ||

7183

HBase.getMachineOpcode() != PPC::ADDIStocHA8)

7184

continue;

7185

7186

if (!Base.hasOneUse() || !HBase.hasOneUse())

7187

continue;

7188

7189

SDValue HImmOpnd = HBase.getOperand(1);

7190

if (HImmOpnd != ImmOpnd)

7191

continue;

7192

7193

UpdateHBase = true;

7194

}

7195

} else {

7196

// If we're directly folding the addend from an addi instruction, then:

7197

// 1. In general, the offset on the memory access must be zero.

7198

// 2. If the addend is a constant, then it can be combined with a

7199

// non-zero offset, but only if the result meets the encoding

7200

// requirements.

7201

if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {

7202

Offset += C->getSExtValue();

7203

7204

if (RequiresMod4Offset && (Offset % 4) != 0)

7205

continue;

7206

7207

if (!isInt<16>(Offset))

7208

continue;

7209

7210

ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),

7211

ImmOpnd.getValueType());

7212

} else if (Offset != 0) {

7213

continue;

7214

}

7215

}

7216

7217

// We found an opportunity. Reverse the operands from the add

7218

// immediate and substitute them into the load or store. If

7219

// needed, update the target flags for the immediate operand to

7220

// reflect the necessary relocation information.

7221

LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Folding add-immediate into mem-op:\nBase: "
; } } while (false);

7222

LLVM_DEBUG(Base->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { Base->dump(CurDAG); } } while (false);

7223

LLVM_DEBUG(dbgs() << "\nN: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\nN: "; } } while (false);

7224

LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(CurDAG); } } while (false);

7225

LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\n"; } } while (false);

7226

7227

// If the relocation information isn't already present on the

7228

// immediate operand, add it now.

7229

if (ReplaceFlags) {

7230

if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {

7231

SDLoc dl(GA);

7232

const GlobalValue *GV = GA->getGlobal();

7233

Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());

7234

// We can't perform this optimization for data whose alignment

7235

// is insufficient for the instruction encoding.

7236

if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {

7237

LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Rejected this candidate for alignment.\n\n"
; } } while (false);

7238

continue;

7239

}

7240

ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);

7241

} else if (ConstantPoolSDNode *CP =

7242

dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {

7243

const Constant *C = CP->getConstVal();

7244

ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),

7245

Offset, Flags);

7246

}

7247

}

7248

7249

if (FirstOp == 1) // Store

7250

(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,

7251

Base.getOperand(0), N->getOperand(3));

7252

else // Load

7253

(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),

7254

N->getOperand(2));

7255

7256

if (UpdateHBase)

7257

(void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),

7258

ImmOpnd);

7259

7260

// The add-immediate may now be dead, in which case remove it.

7261

if (Base.getNode()->use_empty())

7262

CurDAG->RemoveDeadNode(Base.getNode());

7263

}

7264

}

7265

7266

/// createPPCISelDag - This pass converts a legalized DAG into a

7267

/// PowerPC-specific DAG, ready for instruction scheduling.

7268

///

7269

FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,

7270

CodeGenOpt::Level OptLevel) {

7271

return new PPCDAGToDAGISel(TM, OptLevel);

7272

}

File:	llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Warning:	line 1914, column 15 Value stored to 'I' is never read

Bug Summary

Annotated Source Code