/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

1

//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//

2

//

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6

//

7

//===----------------------------------------------------------------------===//

8

//

9

// This file implements the PPCISelLowering class.

10

//

11

//===----------------------------------------------------------------------===//

12

13

#include "PPCISelLowering.h"

14

#include "MCTargetDesc/PPCPredicates.h"

15

#include "PPC.h"

16

#include "PPCCCState.h"

17

#include "PPCCallingConv.h"

18

#include "PPCFrameLowering.h"

19

#include "PPCInstrInfo.h"

20

#include "PPCMachineFunctionInfo.h"

21

#include "PPCPerfectShuffle.h"

22

#include "PPCRegisterInfo.h"

23

#include "PPCSubtarget.h"

24

#include "PPCTargetMachine.h"

25

#include "llvm/ADT/APFloat.h"

26

#include "llvm/ADT/APInt.h"

27

#include "llvm/ADT/ArrayRef.h"

28

#include "llvm/ADT/DenseMap.h"

29

#include "llvm/ADT/None.h"

30

#include "llvm/ADT/STLExtras.h"

31

#include "llvm/ADT/SmallPtrSet.h"

32

#include "llvm/ADT/SmallSet.h"

33

#include "llvm/ADT/SmallVector.h"

34

#include "llvm/ADT/Statistic.h"

35

#include "llvm/ADT/StringRef.h"

36

#include "llvm/ADT/StringSwitch.h"

37

#include "llvm/CodeGen/CallingConvLower.h"

38

#include "llvm/CodeGen/ISDOpcodes.h"

39

#include "llvm/CodeGen/MachineBasicBlock.h"

40

#include "llvm/CodeGen/MachineFrameInfo.h"

41

#include "llvm/CodeGen/MachineFunction.h"

42

#include "llvm/CodeGen/MachineInstr.h"

43

#include "llvm/CodeGen/MachineInstrBuilder.h"

44

#include "llvm/CodeGen/MachineJumpTableInfo.h"

45

#include "llvm/CodeGen/MachineLoopInfo.h"

46

#include "llvm/CodeGen/MachineMemOperand.h"

47

#include "llvm/CodeGen/MachineModuleInfo.h"

48

#include "llvm/CodeGen/MachineOperand.h"

49

#include "llvm/CodeGen/MachineRegisterInfo.h"

50

#include "llvm/CodeGen/RuntimeLibcalls.h"

51

#include "llvm/CodeGen/SelectionDAG.h"

52

#include "llvm/CodeGen/SelectionDAGNodes.h"

53

#include "llvm/CodeGen/TargetInstrInfo.h"

54

#include "llvm/CodeGen/TargetLowering.h"

55

#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

56

#include "llvm/CodeGen/TargetRegisterInfo.h"

57

#include "llvm/CodeGen/ValueTypes.h"

58

#include "llvm/IR/CallingConv.h"

59

#include "llvm/IR/Constant.h"

60

#include "llvm/IR/Constants.h"

61

#include "llvm/IR/DataLayout.h"

62

#include "llvm/IR/DebugLoc.h"

63

#include "llvm/IR/DerivedTypes.h"

64

#include "llvm/IR/Function.h"

65

#include "llvm/IR/GlobalValue.h"

66

#include "llvm/IR/IRBuilder.h"

67

#include "llvm/IR/Instructions.h"

68

#include "llvm/IR/Intrinsics.h"

69

#include "llvm/IR/IntrinsicsPowerPC.h"

70

#include "llvm/IR/Module.h"

71

#include "llvm/IR/Type.h"

72

#include "llvm/IR/Use.h"

73

#include "llvm/IR/Value.h"

74

#include "llvm/MC/MCContext.h"

75

#include "llvm/MC/MCExpr.h"

76

#include "llvm/MC/MCRegisterInfo.h"

77

#include "llvm/MC/MCSectionXCOFF.h"

78

#include "llvm/MC/MCSymbolXCOFF.h"

79

#include "llvm/Support/AtomicOrdering.h"

80

#include "llvm/Support/BranchProbability.h"

81

#include "llvm/Support/Casting.h"

82

#include "llvm/Support/CodeGen.h"

83

#include "llvm/Support/CommandLine.h"

84

#include "llvm/Support/Compiler.h"

85

#include "llvm/Support/Debug.h"

86

#include "llvm/Support/ErrorHandling.h"

87

#include "llvm/Support/Format.h"

88

#include "llvm/Support/KnownBits.h"

89

#include "llvm/Support/MachineValueType.h"

90

#include "llvm/Support/MathExtras.h"

91

#include "llvm/Support/raw_ostream.h"

92

#include "llvm/Target/TargetMachine.h"

93

#include "llvm/Target/TargetOptions.h"

94

#include <algorithm>

95

#include <cassert>

96

#include <cstdint>

97

#include <iterator>

98

#include <list>

99

#include <utility>

100

#include <vector>

101

102

using namespace llvm;

103

104

#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"

105

106

static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",

107

cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);

108

109

static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",

110

cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);

111

112

static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",

113

cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);

114

115

static cl::opt<bool> DisableSCO("disable-ppc-sco",

116

cl::desc("disable sibling call optimization on ppc"), cl::Hidden);

117

118

static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",

119

cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);

120

121

static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",

122

cl::desc("use absolute jump tables on ppc"), cl::Hidden);

123

124

STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"};

125

STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"};

126

STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
};

127

STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
};

128

129

static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);

130

131

static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);

132

133

// FIXME: Remove this once the bug has been fixed!

134

extern cl::opt<bool> ANDIGlueBug;

135

136

PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

137

const PPCSubtarget &STI)

138

: TargetLowering(TM), Subtarget(STI) {

139

// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all

140

// arguments are at least 4/8 bytes aligned.

141

bool isPPC64 = Subtarget.isPPC64();

142

setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));

143

144

// Set up the register classes.

145

addRegisterClass(MVT::i32, &PPC::GPRCRegClass);

146

if (!useSoftFloat()) {

147

if (hasSPE()) {

148

addRegisterClass(MVT::f32, &PPC::GPRCRegClass);

149

addRegisterClass(MVT::f64, &PPC::SPERCRegClass);

150

} else {

151

addRegisterClass(MVT::f32, &PPC::F4RCRegClass);

152

addRegisterClass(MVT::f64, &PPC::F8RCRegClass);

153

}

154

}

155

156

// Match BITREVERSE to customized fast code sequence in the td file.

157

setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);

158

setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);

159

160

// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.

161

setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);

162

163

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.

164

for (MVT VT : MVT::integer_valuetypes()) {

165

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

166

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);

167

}

168

169

if (Subtarget.isISA3_0()) {

170

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);

171

setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);

172

setTruncStoreAction(MVT::f64, MVT::f16, Legal);

173

setTruncStoreAction(MVT::f32, MVT::f16, Legal);

174

} else {

175

// No extending loads from f16 or HW conversions back and forth.

176

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);

177

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);

178

setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);

179

setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);

180

setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);

181

setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

182

setTruncStoreAction(MVT::f64, MVT::f16, Expand);

183

setTruncStoreAction(MVT::f32, MVT::f16, Expand);

184

}

185

186

setTruncStoreAction(MVT::f64, MVT::f32, Expand);

187

188

// PowerPC has pre-inc load and store's.

189

setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);

190

setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);

191

setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);

192

setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);

193

setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);

194

setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);

195

setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);

196

setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);

197

setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);

198

setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);

199

if (!Subtarget.hasSPE()) {

200

setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);

201

setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);

202

setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);

203

setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);

204

}

205

206

// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.

207

const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

208

for (MVT VT : ScalarIntVTs) {

209

setOperationAction(ISD::ADDC, VT, Legal);

210

setOperationAction(ISD::ADDE, VT, Legal);

211

setOperationAction(ISD::SUBC, VT, Legal);

212

setOperationAction(ISD::SUBE, VT, Legal);

213

}

214

215

if (Subtarget.useCRBits()) {

216

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

217

218

if (isPPC64 || Subtarget.hasFPCVT()) {

219

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);

220

AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,

221

isPPC64 ? MVT::i64 : MVT::i32);

222

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);

223

AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,

224

isPPC64 ? MVT::i64 : MVT::i32);

225

226

setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);

227

AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,

228

isPPC64 ? MVT::i64 : MVT::i32);

229

setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);

230

AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,

231

isPPC64 ? MVT::i64 : MVT::i32);

232

} else {

233

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);

234

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);

235

setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);

236

setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);

237

}

238

239

// PowerPC does not support direct load/store of condition registers.

240

setOperationAction(ISD::LOAD, MVT::i1, Custom);

241

setOperationAction(ISD::STORE, MVT::i1, Custom);

242

243

// FIXME: Remove this once the ANDI glue bug is fixed:

244

if (ANDIGlueBug)

245

setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);

246

247

for (MVT VT : MVT::integer_valuetypes()) {

248

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

249

setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);

250

setTruncStoreAction(VT, MVT::i1, Expand);

251

}

252

253

addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);

254

}

255

256

// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

257

// PPC (the libcall is not available).

258

setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);

259

setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);

260

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);

261

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);

262

263

// We do not currently implement these libm ops for PowerPC.

264

setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);

265

setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);

266

setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);

267

setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);

268

setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);

269

setOperationAction(ISD::FREM, MVT::ppcf128, Expand);

270

271

// PowerPC has no SREM/UREM instructions unless we are on P9

272

// On P9 we may use a hardware instruction to compute the remainder.

273

// When the result of both the remainder and the division is required it is

274

// more efficient to compute the remainder from the result of the division

275

// rather than use the remainder instruction. The instructions are legalized

276

// directly because the DivRemPairsPass performs the transformation at the IR

277

// level.

278

if (Subtarget.isISA3_0()) {

279

setOperationAction(ISD::SREM, MVT::i32, Legal);

280

setOperationAction(ISD::UREM, MVT::i32, Legal);

281

setOperationAction(ISD::SREM, MVT::i64, Legal);

282

setOperationAction(ISD::UREM, MVT::i64, Legal);

283

} else {

284

setOperationAction(ISD::SREM, MVT::i32, Expand);

285

setOperationAction(ISD::UREM, MVT::i32, Expand);

286

setOperationAction(ISD::SREM, MVT::i64, Expand);

287

setOperationAction(ISD::UREM, MVT::i64, Expand);

288

}

289

290

// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.

291

setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

292

setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);

293

setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);

294

setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

295

setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

296

setOperationAction(ISD::SDIVREM, MVT::i32, Expand);

297

setOperationAction(ISD::UDIVREM, MVT::i64, Expand);

298

setOperationAction(ISD::SDIVREM, MVT::i64, Expand);

299

300

// Handle constrained floating-point operations of scalar.

301

// TODO: Handle SPE specific operation.

302

setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);

303

setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);

304

setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);

305

setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);

306

setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);

307

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

308

309

setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);

310

setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);

311

setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);

312

setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);

313

setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);

314

if (Subtarget.hasVSX()) {

315

setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);

316

setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);

317

}

318

319

if (Subtarget.hasFSQRT()) {

320

setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);

321

setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);

322

}

323

324

if (Subtarget.hasFPRND()) {

325

setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);

326

setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);

327

setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);

328

setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);

329

330

setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);

331

setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);

332

setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);

333

setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);

334

}

335

336

// We don't support sin/cos/sqrt/fmod/pow

337

setOperationAction(ISD::FSIN , MVT::f64, Expand);

338

setOperationAction(ISD::FCOS , MVT::f64, Expand);

339

setOperationAction(ISD::FSINCOS, MVT::f64, Expand);

340

setOperationAction(ISD::FREM , MVT::f64, Expand);

341

setOperationAction(ISD::FPOW , MVT::f64, Expand);

342

setOperationAction(ISD::FSIN , MVT::f32, Expand);

343

setOperationAction(ISD::FCOS , MVT::f32, Expand);

344

setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

345

setOperationAction(ISD::FREM , MVT::f32, Expand);

346

setOperationAction(ISD::FPOW , MVT::f32, Expand);

347

if (Subtarget.hasSPE()) {

348

setOperationAction(ISD::FMA , MVT::f64, Expand);

349

setOperationAction(ISD::FMA , MVT::f32, Expand);

350

} else {

351

setOperationAction(ISD::FMA , MVT::f64, Legal);

352

setOperationAction(ISD::FMA , MVT::f32, Legal);

353

}

354

355

if (Subtarget.hasSPE())

356

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

357

358

setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);

359

360

// If we're enabling GP optimizations, use hardware square root

361

if (!Subtarget.hasFSQRT() &&

362

!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&

363

Subtarget.hasFRE()))

364

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

365

366

if (!Subtarget.hasFSQRT() &&

367

!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&

368

Subtarget.hasFRES()))

369

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

370

371

if (Subtarget.hasFCPSGN()) {

372

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);

373

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);

374

} else {

375

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

376

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

377

}

378

379

if (Subtarget.hasFPRND()) {

380

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

381

setOperationAction(ISD::FCEIL, MVT::f64, Legal);

382

setOperationAction(ISD::FTRUNC, MVT::f64, Legal);

383

setOperationAction(ISD::FROUND, MVT::f64, Legal);

384

385

setOperationAction(ISD::FFLOOR, MVT::f32, Legal);

386

setOperationAction(ISD::FCEIL, MVT::f32, Legal);

387

setOperationAction(ISD::FTRUNC, MVT::f32, Legal);

388

setOperationAction(ISD::FROUND, MVT::f32, Legal);

389

}

390

391

// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd

392

// to speed up scalar BSWAP64.

393

// CTPOP or CTTZ were introduced in P8/P9 respectively

394

setOperationAction(ISD::BSWAP, MVT::i32 , Expand);

395

if (Subtarget.hasP9Vector())

396

setOperationAction(ISD::BSWAP, MVT::i64 , Custom);

397

else

398

setOperationAction(ISD::BSWAP, MVT::i64 , Expand);

399

if (Subtarget.isISA3_0()) {

400

setOperationAction(ISD::CTTZ , MVT::i32 , Legal);

401

setOperationAction(ISD::CTTZ , MVT::i64 , Legal);

402

} else {

403

setOperationAction(ISD::CTTZ , MVT::i32 , Expand);

404

setOperationAction(ISD::CTTZ , MVT::i64 , Expand);

405

}

406

407

if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {

408

setOperationAction(ISD::CTPOP, MVT::i32 , Legal);

409

setOperationAction(ISD::CTPOP, MVT::i64 , Legal);

410

} else {

411

setOperationAction(ISD::CTPOP, MVT::i32 , Expand);

412

setOperationAction(ISD::CTPOP, MVT::i64 , Expand);

413

}

414

415

// PowerPC does not have ROTR

416

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

417

setOperationAction(ISD::ROTR, MVT::i64 , Expand);

418

419

if (!Subtarget.useCRBits()) {

420

// PowerPC does not have Select

421

setOperationAction(ISD::SELECT, MVT::i32, Expand);

422

setOperationAction(ISD::SELECT, MVT::i64, Expand);

423

setOperationAction(ISD::SELECT, MVT::f32, Expand);

424

setOperationAction(ISD::SELECT, MVT::f64, Expand);

425

}

426

427

// PowerPC wants to turn select_cc of FP into fsel when possible.

428

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

429

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

430

431

// PowerPC wants to optimize integer setcc a bit

432

if (!Subtarget.useCRBits())

433

setOperationAction(ISD::SETCC, MVT::i32, Custom);

434

435

if (Subtarget.hasFPU()) {

436

setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);

437

setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);

438

setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);

439

440

setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);

441

setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);

442

setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);

443

}

444

445

// PowerPC does not have BRCOND which requires SetCC

446

if (!Subtarget.useCRBits())

447

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

448

449

setOperationAction(ISD::BR_JT, MVT::Other, Expand);

450

451

if (Subtarget.hasSPE()) {

452

// SPE has built-in conversions

453

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);

454

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);

455

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);

456

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);

457

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);

458

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);

459

} else {

460

// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.

461

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);

462

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

463

464

// PowerPC does not have [U|S]INT_TO_FP

465

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);

466

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);

467

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);

468

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);

469

}

470

471

if (Subtarget.hasDirectMove() && isPPC64) {

472

setOperationAction(ISD::BITCAST, MVT::f32, Legal);

473

setOperationAction(ISD::BITCAST, MVT::i32, Legal);

474

setOperationAction(ISD::BITCAST, MVT::i64, Legal);

475

setOperationAction(ISD::BITCAST, MVT::f64, Legal);

476

if (TM.Options.UnsafeFPMath) {

477

setOperationAction(ISD::LRINT, MVT::f64, Legal);

478

setOperationAction(ISD::LRINT, MVT::f32, Legal);

479

setOperationAction(ISD::LLRINT, MVT::f64, Legal);

480

setOperationAction(ISD::LLRINT, MVT::f32, Legal);

481

setOperationAction(ISD::LROUND, MVT::f64, Legal);

482

setOperationAction(ISD::LROUND, MVT::f32, Legal);

483

setOperationAction(ISD::LLROUND, MVT::f64, Legal);

484

setOperationAction(ISD::LLROUND, MVT::f32, Legal);

485

}

486

} else {

487

setOperationAction(ISD::BITCAST, MVT::f32, Expand);

488

setOperationAction(ISD::BITCAST, MVT::i32, Expand);

489

setOperationAction(ISD::BITCAST, MVT::i64, Expand);

490

setOperationAction(ISD::BITCAST, MVT::f64, Expand);

491

}

492

493

// We cannot sextinreg(i1). Expand to shifts.

494

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

495

496

// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support

497

// SjLj exception handling but a light-weight setjmp/longjmp replacement to

498

// support continuation, user-level threading, and etc.. As a result, no

499

// other SjLj exception interfaces are implemented and please don't build

500

// your own exception handling based on them.

501

// LLVM/Clang supports zero-cost DWARF exception handling.

502

setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);

503

setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

504

505

// We want to legalize GlobalAddress and ConstantPool nodes into the

506

// appropriate instructions to materialize the address.

507

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

508

setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);

509

setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

510

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

511

setOperationAction(ISD::JumpTable, MVT::i32, Custom);

512

setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);

513

setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);

514

setOperationAction(ISD::BlockAddress, MVT::i64, Custom);

515

setOperationAction(ISD::ConstantPool, MVT::i64, Custom);

516

setOperationAction(ISD::JumpTable, MVT::i64, Custom);

517

518

// TRAP is legal.

519

setOperationAction(ISD::TRAP, MVT::Other, Legal);

520

521

// TRAMPOLINE is custom lowered.

522

setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);

523

setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

524

525

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

526

setOperationAction(ISD::VASTART , MVT::Other, Custom);

527

528

if (Subtarget.is64BitELFABI()) {

529

// VAARG always uses double-word chunks, so promote anything smaller.

530

setOperationAction(ISD::VAARG, MVT::i1, Promote);

531

AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);

532

setOperationAction(ISD::VAARG, MVT::i8, Promote);

533

AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);

534

setOperationAction(ISD::VAARG, MVT::i16, Promote);

535

AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);

536

setOperationAction(ISD::VAARG, MVT::i32, Promote);

537

AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);

538

setOperationAction(ISD::VAARG, MVT::Other, Expand);

539

} else if (Subtarget.is32BitELFABI()) {

540

// VAARG is custom lowered with the 32-bit SVR4 ABI.

541

setOperationAction(ISD::VAARG, MVT::Other, Custom);

542

setOperationAction(ISD::VAARG, MVT::i64, Custom);

543

} else

544

setOperationAction(ISD::VAARG, MVT::Other, Expand);

545

546

// VACOPY is custom lowered with the 32-bit SVR4 ABI.

547

if (Subtarget.is32BitELFABI())

548

setOperationAction(ISD::VACOPY , MVT::Other, Custom);

549

else

550

setOperationAction(ISD::VACOPY , MVT::Other, Expand);

551

552

// Use the default implementation.

553

setOperationAction(ISD::VAEND , MVT::Other, Expand);

554

setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);

555

setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);

556

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);

557

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);

558

setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);

559

setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);

560

setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);

561

setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);

562

563

// We want to custom lower some of our intrinsics.

564

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

565

566

// To handle counter-based loop conditions.

567

setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);

568

569

setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);

570

setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);

571

setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);

572

setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);

573

574

// Comparisons that require checking two conditions.

575

if (Subtarget.hasSPE()) {

576

setCondCodeAction(ISD::SETO, MVT::f32, Expand);

577

setCondCodeAction(ISD::SETO, MVT::f64, Expand);

578

setCondCodeAction(ISD::SETUO, MVT::f32, Expand);

579

setCondCodeAction(ISD::SETUO, MVT::f64, Expand);

580

}

581

setCondCodeAction(ISD::SETULT, MVT::f32, Expand);

582

setCondCodeAction(ISD::SETULT, MVT::f64, Expand);

583

setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);

584

setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);

585

setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);

586

setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);

587

setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);

588

setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);

589

setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);

590

setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);

591

setCondCodeAction(ISD::SETONE, MVT::f32, Expand);

592

setCondCodeAction(ISD::SETONE, MVT::f64, Expand);

593

594

if (Subtarget.has64BitSupport()) {

595

// They also have instructions for converting between i64 and fp.

596

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);

597

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);

598

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

599

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);

600

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

601

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);

602

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

603

setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);

604

// This is just the low 32 bits of a (signed) fp->i64 conversion.

605

// We cannot do this with Promote because i64 is not a legal type.

606

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);

607

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

608

609

if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {

610

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

611

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);

612

}

613

} else {

614

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

615

if (Subtarget.hasSPE()) {

616

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);

617

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);

618

} else {

619

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);

620

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

621

}

622

}

623

624

// With the instructions enabled under FPCVT, we can do everything.

625

if (Subtarget.hasFPCVT()) {

626

if (Subtarget.has64BitSupport()) {

627

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);

628

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);

629

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

630

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);

631

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

632

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);

633

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

634

setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);

635

}

636

637

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);

638

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);

639

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);

640

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);

641

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

642

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

643

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

644

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);

645

}

646

647

if (Subtarget.use64BitRegs()) {

648

// 64-bit PowerPC implementations can support i64 types directly

649

addRegisterClass(MVT::i64, &PPC::G8RCRegClass);

650

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

651

setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

652

// 64-bit PowerPC wants to expand i128 shifts itself.

653

setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);

654

setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);

655

setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);

656

} else {

657

// 32-bit PowerPC wants to expand i64 shifts itself.

658

setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);

659

setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);

660

setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

661

}

662

663

// PowerPC has better expansions for funnel shifts than the generic

664

// TargetLowering::expandFunnelShift.

665

if (Subtarget.has64BitSupport()) {

666

setOperationAction(ISD::FSHL, MVT::i64, Custom);

667

setOperationAction(ISD::FSHR, MVT::i64, Custom);

668

}

669

setOperationAction(ISD::FSHL, MVT::i32, Custom);

670

setOperationAction(ISD::FSHR, MVT::i32, Custom);

671

672

if (Subtarget.hasVSX()) {

673

setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);

674

setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);

675

setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);

676

setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);

677

}

678

679

if (Subtarget.hasAltivec()) {

680

for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {

681

setOperationAction(ISD::SADDSAT, VT, Legal);

682

setOperationAction(ISD::SSUBSAT, VT, Legal);

683

setOperationAction(ISD::UADDSAT, VT, Legal);

684

setOperationAction(ISD::USUBSAT, VT, Legal);

685

}

686

// First set operation action for all vector types to expand. Then we

687

// will selectively turn on ones that can be effectively codegen'd.

688

for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

689

// add/sub are legal for all supported vector VT's.

690

setOperationAction(ISD::ADD, VT, Legal);

691

setOperationAction(ISD::SUB, VT, Legal);

692

693

// For v2i64, these are only valid with P8Vector. This is corrected after

694

// the loop.

695

if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {

696

setOperationAction(ISD::SMAX, VT, Legal);

697

setOperationAction(ISD::SMIN, VT, Legal);

698

setOperationAction(ISD::UMAX, VT, Legal);

699

setOperationAction(ISD::UMIN, VT, Legal);

700

}

701

else {

702

setOperationAction(ISD::SMAX, VT, Expand);

703

setOperationAction(ISD::SMIN, VT, Expand);

704

setOperationAction(ISD::UMAX, VT, Expand);

705

setOperationAction(ISD::UMIN, VT, Expand);

706

}

707

708

if (Subtarget.hasVSX()) {

709

setOperationAction(ISD::FMAXNUM, VT, Legal);

710

setOperationAction(ISD::FMINNUM, VT, Legal);

711

}

712

713

// Vector instructions introduced in P8

714

if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {

715

setOperationAction(ISD::CTPOP, VT, Legal);

716

setOperationAction(ISD::CTLZ, VT, Legal);

717

}

718

else {

719

setOperationAction(ISD::CTPOP, VT, Expand);

720

setOperationAction(ISD::CTLZ, VT, Expand);

721

}

722

723

// Vector instructions introduced in P9

724

if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))

725

setOperationAction(ISD::CTTZ, VT, Legal);

726

else

727

setOperationAction(ISD::CTTZ, VT, Expand);

728

729

// We promote all shuffles to v16i8.

730

setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);

731

AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);

732

733

// We promote all non-typed operations to v4i32.

734

setOperationAction(ISD::AND , VT, Promote);

735

AddPromotedToType (ISD::AND , VT, MVT::v4i32);

736

setOperationAction(ISD::OR , VT, Promote);

737

AddPromotedToType (ISD::OR , VT, MVT::v4i32);

738

setOperationAction(ISD::XOR , VT, Promote);

739

AddPromotedToType (ISD::XOR , VT, MVT::v4i32);

740

setOperationAction(ISD::LOAD , VT, Promote);

741

AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);

742

setOperationAction(ISD::SELECT, VT, Promote);

743

AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);

744

setOperationAction(ISD::VSELECT, VT, Legal);

745

setOperationAction(ISD::SELECT_CC, VT, Promote);

746

AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);

747

setOperationAction(ISD::STORE, VT, Promote);

748

AddPromotedToType (ISD::STORE, VT, MVT::v4i32);

749

750

// No other operations are legal.

751

setOperationAction(ISD::MUL , VT, Expand);

752

setOperationAction(ISD::SDIV, VT, Expand);

753

setOperationAction(ISD::SREM, VT, Expand);

754

setOperationAction(ISD::UDIV, VT, Expand);

755

setOperationAction(ISD::UREM, VT, Expand);

756

setOperationAction(ISD::FDIV, VT, Expand);

757

setOperationAction(ISD::FREM, VT, Expand);

758

setOperationAction(ISD::FNEG, VT, Expand);

759

setOperationAction(ISD::FSQRT, VT, Expand);

760

setOperationAction(ISD::FLOG, VT, Expand);

761

setOperationAction(ISD::FLOG10, VT, Expand);

762

setOperationAction(ISD::FLOG2, VT, Expand);

763

setOperationAction(ISD::FEXP, VT, Expand);

764

setOperationAction(ISD::FEXP2, VT, Expand);

765

setOperationAction(ISD::FSIN, VT, Expand);

766

setOperationAction(ISD::FCOS, VT, Expand);

767

setOperationAction(ISD::FABS, VT, Expand);

768

setOperationAction(ISD::FFLOOR, VT, Expand);

769

setOperationAction(ISD::FCEIL, VT, Expand);

770

setOperationAction(ISD::FTRUNC, VT, Expand);

771

setOperationAction(ISD::FRINT, VT, Expand);

772

setOperationAction(ISD::FNEARBYINT, VT, Expand);

773

setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);

774

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);

775

setOperationAction(ISD::BUILD_VECTOR, VT, Expand);

776

setOperationAction(ISD::MULHU, VT, Expand);

777

setOperationAction(ISD::MULHS, VT, Expand);

778

setOperationAction(ISD::UMUL_LOHI, VT, Expand);

779

setOperationAction(ISD::SMUL_LOHI, VT, Expand);

780

setOperationAction(ISD::UDIVREM, VT, Expand);

781

setOperationAction(ISD::SDIVREM, VT, Expand);

782

setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);

783

setOperationAction(ISD::FPOW, VT, Expand);

784

setOperationAction(ISD::BSWAP, VT, Expand);

785

setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

786

setOperationAction(ISD::ROTL, VT, Expand);

787

setOperationAction(ISD::ROTR, VT, Expand);

788

789

for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {

790

setTruncStoreAction(VT, InnerVT, Expand);

791

setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);

792

setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);

793

setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);

794

}

795

}

796

setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);

797

if (!Subtarget.hasP8Vector()) {

798

setOperationAction(ISD::SMAX, MVT::v2i64, Expand);

799

setOperationAction(ISD::SMIN, MVT::v2i64, Expand);

800

setOperationAction(ISD::UMAX, MVT::v2i64, Expand);

801

setOperationAction(ISD::UMIN, MVT::v2i64, Expand);

802

}

803

804

for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})

805

setOperationAction(ISD::ABS, VT, Custom);

806

807

// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle

808

// with merges, splats, etc.

809

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

810

811

// Vector truncates to sub-word integer that fit in an Altivec/VSX register

812

// are cheap, so handle them before they get expanded to scalar.

813

setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);

814

setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);

815

setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);

816

setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);

817

setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);

818

819

setOperationAction(ISD::AND , MVT::v4i32, Legal);

820

setOperationAction(ISD::OR , MVT::v4i32, Legal);

821

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

822

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

823

setOperationAction(ISD::SELECT, MVT::v4i32,

824

Subtarget.useCRBits() ? Legal : Expand);

825

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

826

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);

827

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);

828

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);

829

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);

830

setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);

831

setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);

832

setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);

833

setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);

834

setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);

835

setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);

836

setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);

837

setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

838

839

// Without hasP8Altivec set, v2i64 SMAX isn't available.

840

// But ABS custom lowering requires SMAX support.

841

if (!Subtarget.hasP8Altivec())

842

setOperationAction(ISD::ABS, MVT::v2i64, Expand);

843

844

// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.

845

setOperationAction(ISD::ROTL, MVT::v1i128, Custom);

846

// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).

847

if (Subtarget.hasAltivec())

848

for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})

849

setOperationAction(ISD::ROTL, VT, Legal);

850

// With hasP8Altivec set, we can lower ISD::ROTL to vrld.

851

if (Subtarget.hasP8Altivec())

852

setOperationAction(ISD::ROTL, MVT::v2i64, Legal);

853

854

addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);

855

addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);

856

addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);

857

addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);

858

859

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

860

setOperationAction(ISD::FMA, MVT::v4f32, Legal);

861

862

if (Subtarget.hasVSX()) {

863

setOperationAction(ISD::FDIV, MVT::v4f32, Legal);

864

setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);

865

}

866

867

if (Subtarget.hasP8Altivec())

868

setOperationAction(ISD::MUL, MVT::v4i32, Legal);

869

else

870

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

871

872

if (Subtarget.isISA3_1()) {

873

setOperationAction(ISD::MUL, MVT::v2i64, Legal);

874

setOperationAction(ISD::MULHS, MVT::v2i64, Legal);

875

setOperationAction(ISD::MULHU, MVT::v2i64, Legal);

876

setOperationAction(ISD::MULHS, MVT::v4i32, Legal);

877

setOperationAction(ISD::MULHU, MVT::v4i32, Legal);

878

setOperationAction(ISD::UDIV, MVT::v2i64, Legal);

879

setOperationAction(ISD::SDIV, MVT::v2i64, Legal);

880

setOperationAction(ISD::UDIV, MVT::v4i32, Legal);

881

setOperationAction(ISD::SDIV, MVT::v4i32, Legal);

882

setOperationAction(ISD::UREM, MVT::v2i64, Legal);

883

setOperationAction(ISD::SREM, MVT::v2i64, Legal);

884

setOperationAction(ISD::UREM, MVT::v4i32, Legal);

885

setOperationAction(ISD::SREM, MVT::v4i32, Legal);

886

setOperationAction(ISD::UREM, MVT::v1i128, Legal);

887

setOperationAction(ISD::SREM, MVT::v1i128, Legal);

888

setOperationAction(ISD::UDIV, MVT::v1i128, Legal);

889

setOperationAction(ISD::SDIV, MVT::v1i128, Legal);

890

setOperationAction(ISD::ROTL, MVT::v1i128, Legal);

891

}

892

893

setOperationAction(ISD::MUL, MVT::v8i16, Legal);

894

setOperationAction(ISD::MUL, MVT::v16i8, Custom);

895

896

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);

897

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

898

899

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);

900

setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);

901

setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);

902

setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

903

904

// Altivec does not contain unordered floating-point compare instructions

905

setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);

906

setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);

907

setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);

908

setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);

909

910

if (Subtarget.hasVSX()) {

911

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);

912

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

913

if (Subtarget.hasP8Vector()) {

914

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);

915

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);

916

}

917

if (Subtarget.hasDirectMove() && isPPC64) {

918

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);

919

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);

920

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);

921

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);

922

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);

923

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);

924

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);

925

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);

926

}

927

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

928

929

// The nearbyint variants are not allowed to raise the inexact exception

930

// so we can only code-gen them with unsafe math.

931

if (TM.Options.UnsafeFPMath) {

932

setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);

933

setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);

934

}

935

936

setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);

937

setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);

938

setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);

939

setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);

940

setOperationAction(ISD::FRINT, MVT::v2f64, Legal);

941

setOperationAction(ISD::FROUND, MVT::v2f64, Legal);

942

setOperationAction(ISD::FROUND, MVT::f64, Legal);

943

setOperationAction(ISD::FRINT, MVT::f64, Legal);

944

945

setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

946

setOperationAction(ISD::FRINT, MVT::v4f32, Legal);

947

setOperationAction(ISD::FROUND, MVT::v4f32, Legal);

948

setOperationAction(ISD::FROUND, MVT::f32, Legal);

949

setOperationAction(ISD::FRINT, MVT::f32, Legal);

950

951

setOperationAction(ISD::MUL, MVT::v2f64, Legal);

952

setOperationAction(ISD::FMA, MVT::v2f64, Legal);

953

954

setOperationAction(ISD::FDIV, MVT::v2f64, Legal);

955

setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);

956

957

// Share the Altivec comparison restrictions.

958

setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);

959

setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);

960

setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);

961

setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);

962

963

setOperationAction(ISD::LOAD, MVT::v2f64, Legal);

964

setOperationAction(ISD::STORE, MVT::v2f64, Legal);

965

966

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);

967

968

if (Subtarget.hasP8Vector())

969

addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);

970

971

addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);

972

973

addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);

974

addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);

975

addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

976

977

if (Subtarget.hasP8Altivec()) {

978

setOperationAction(ISD::SHL, MVT::v2i64, Legal);

979

setOperationAction(ISD::SRA, MVT::v2i64, Legal);

980

setOperationAction(ISD::SRL, MVT::v2i64, Legal);

981

982

// 128 bit shifts can be accomplished via 3 instructions for SHL and

983

// SRL, but not for SRA because of the instructions available:

984

// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth

985

// doing

986

setOperationAction(ISD::SHL, MVT::v1i128, Expand);

987

setOperationAction(ISD::SRL, MVT::v1i128, Expand);

988

setOperationAction(ISD::SRA, MVT::v1i128, Expand);

989

990

setOperationAction(ISD::SETCC, MVT::v2i64, Legal);

991

}

992

else {

993

setOperationAction(ISD::SHL, MVT::v2i64, Expand);

994

setOperationAction(ISD::SRA, MVT::v2i64, Expand);

995

setOperationAction(ISD::SRL, MVT::v2i64, Expand);

996

997

setOperationAction(ISD::SETCC, MVT::v2i64, Custom);

998

999

// VSX v2i64 only supports non-arithmetic operations.

1000

setOperationAction(ISD::ADD, MVT::v2i64, Expand);

1001

setOperationAction(ISD::SUB, MVT::v2i64, Expand);

1002

}

1003

1004

if (Subtarget.isISA3_1())

1005

setOperationAction(ISD::SETCC, MVT::v1i128, Legal);

1006

else

1007

setOperationAction(ISD::SETCC, MVT::v1i128, Expand);

1008

1009

setOperationAction(ISD::LOAD, MVT::v2i64, Promote);

1010

AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);

1011

setOperationAction(ISD::STORE, MVT::v2i64, Promote);

1012

AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);

1013

1014

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);

1015

1016

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);

1017

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);

1018

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);

1019

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);

1020

setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);

1021

setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);

1022

setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);

1023

setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);

1024

1025

// Custom handling for partial vectors of integers converted to

1026

// floating point. We already have optimal handling for v2i32 through

1027

// the DAG combine, so those aren't necessary.

1028

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);

1029

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);

1030

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);

1031

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);

1032

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);

1033

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);

1034

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);

1035

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);

1036

setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);

1037

setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);

1038

setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);

1039

setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);

1040

setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);

1041

setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);

1042

setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);

1043

setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);

1044

1045

setOperationAction(ISD::FNEG, MVT::v4f32, Legal);

1046

setOperationAction(ISD::FNEG, MVT::v2f64, Legal);

1047

setOperationAction(ISD::FABS, MVT::v4f32, Legal);

1048

setOperationAction(ISD::FABS, MVT::v2f64, Legal);

1049

setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);

1050

setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);

1051

1052

if (Subtarget.hasDirectMove())

1053

setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);

1054

setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);

1055

1056

// Handle constrained floating-point operations of vector.

1057

// The predictor is `hasVSX` because altivec instruction has

1058

// no exception but VSX vector instruction has.

1059

setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);

1060

setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);

1061

setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);

1062

setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);

1063

setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);

1064

setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);

1065

setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);

1066

setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);

1067

setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);

1068

setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);

1069

setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);

1070

setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);

1071

setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);

1072

1073

setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);

1074

setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);

1075

setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);

1076

setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);

1077

setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);

1078

setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);

1079

setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);

1080

setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);

1081

setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);

1082

setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);

1083

setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);

1084

setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);

1085

setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);

1086

1087

addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);

1088

}

1089

1090

if (Subtarget.hasP8Altivec()) {

1091

addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);

1092

addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);

1093

}

1094

1095

if (Subtarget.hasP9Vector()) {

1096

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);

1097

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

1098

1099

// 128 bit shifts can be accomplished via 3 instructions for SHL and

1100

// SRL, but not for SRA because of the instructions available:

1101

// VS{RL} and VS{RL}O.

1102

setOperationAction(ISD::SHL, MVT::v1i128, Legal);

1103

setOperationAction(ISD::SRL, MVT::v1i128, Legal);

1104

setOperationAction(ISD::SRA, MVT::v1i128, Expand);

1105

1106

addRegisterClass(MVT::f128, &PPC::VRRCRegClass);

1107

setOperationAction(ISD::FADD, MVT::f128, Legal);

1108

setOperationAction(ISD::FSUB, MVT::f128, Legal);

1109

setOperationAction(ISD::FDIV, MVT::f128, Legal);

1110

setOperationAction(ISD::FMUL, MVT::f128, Legal);

1111

setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);

1112

// No extending loads to f128 on PPC.

1113

for (MVT FPT : MVT::fp_valuetypes())

1114

setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);

1115

setOperationAction(ISD::FMA, MVT::f128, Legal);

1116

setCondCodeAction(ISD::SETULT, MVT::f128, Expand);

1117

setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);

1118

setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);

1119

setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);

1120

setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);

1121

setCondCodeAction(ISD::SETONE, MVT::f128, Expand);

1122

1123

setOperationAction(ISD::FTRUNC, MVT::f128, Legal);

1124

setOperationAction(ISD::FRINT, MVT::f128, Legal);

1125

setOperationAction(ISD::FFLOOR, MVT::f128, Legal);

1126

setOperationAction(ISD::FCEIL, MVT::f128, Legal);

1127

setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);

1128

setOperationAction(ISD::FROUND, MVT::f128, Legal);

1129

1130

setOperationAction(ISD::SELECT, MVT::f128, Expand);

1131

setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);

1132

setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);

1133

setTruncStoreAction(MVT::f128, MVT::f64, Expand);

1134

setTruncStoreAction(MVT::f128, MVT::f32, Expand);

1135

setOperationAction(ISD::BITCAST, MVT::i128, Custom);

1136

// No implementation for these ops for PowerPC.

1137

setOperationAction(ISD::FSIN, MVT::f128, Expand);

1138

setOperationAction(ISD::FCOS, MVT::f128, Expand);

1139

setOperationAction(ISD::FPOW, MVT::f128, Expand);

1140

setOperationAction(ISD::FPOWI, MVT::f128, Expand);

1141

setOperationAction(ISD::FREM, MVT::f128, Expand);

1142

1143

// Handle constrained floating-point operations of fp128

1144

setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);

1145

setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);

1146

setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);

1147

setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);

1148

setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);

1149

setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);

1150

setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);

1151

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);

1152

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

1153

setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);

1154

setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);

1155

setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);

1156

setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);

1157

setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);

1158

setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);

1159

setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);

1160

setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);

1161

setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);

1162

setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);

1163

setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);

1164

}

1165

1166

if (Subtarget.hasP9Altivec()) {

1167

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);

1168

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);

1169

1170

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);

1171

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);

1172

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);

1173

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);

1174

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);

1175

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);

1176

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);

1177

}

1178

}

1179

1180

if (Subtarget.pairedVectorMemops()) {

1181

addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);

1182

setOperationAction(ISD::LOAD, MVT::v256i1, Custom);

1183

setOperationAction(ISD::STORE, MVT::v256i1, Custom);

1184

}

1185

if (Subtarget.hasMMA()) {

1186

addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);

1187

setOperationAction(ISD::LOAD, MVT::v512i1, Custom);

1188

setOperationAction(ISD::STORE, MVT::v512i1, Custom);

1189

setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);

1190

}

1191

1192

if (Subtarget.has64BitSupport())

1193

setOperationAction(ISD::PREFETCH, MVT::Other, Legal);

1194

1195

if (Subtarget.isISA3_1())

1196

setOperationAction(ISD::SRA, MVT::v1i128, Legal);

1197

1198

setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);

1199

1200

if (!isPPC64) {

1201

setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);

1202

setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);

1203

}

1204

1205

setBooleanContents(ZeroOrOneBooleanContent);

1206

1207

if (Subtarget.hasAltivec()) {

1208

// Altivec instructions set fields to all zeros or all ones.

1209

setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

1210

}

1211

1212

if (!isPPC64) {

1213

// These libcalls are not available in 32-bit.

1214

setLibcallName(RTLIB::SHL_I128, nullptr);

1215

setLibcallName(RTLIB::SRL_I128, nullptr);

1216

setLibcallName(RTLIB::SRA_I128, nullptr);

1217

}

1218

1219

if (!isPPC64)

1220

setMaxAtomicSizeInBitsSupported(32);

1221

1222

setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);

1223

1224

// We have target-specific dag combine patterns for the following nodes:

1225

setTargetDAGCombine(ISD::ADD);

1226

setTargetDAGCombine(ISD::SHL);

1227

setTargetDAGCombine(ISD::SRA);

1228

setTargetDAGCombine(ISD::SRL);

1229

setTargetDAGCombine(ISD::MUL);

1230

setTargetDAGCombine(ISD::FMA);

1231

setTargetDAGCombine(ISD::SINT_TO_FP);

1232

setTargetDAGCombine(ISD::BUILD_VECTOR);

1233

if (Subtarget.hasFPCVT())

1234

setTargetDAGCombine(ISD::UINT_TO_FP);

1235

setTargetDAGCombine(ISD::LOAD);

1236

setTargetDAGCombine(ISD::STORE);

1237

setTargetDAGCombine(ISD::BR_CC);

1238

if (Subtarget.useCRBits())

1239

setTargetDAGCombine(ISD::BRCOND);

1240

setTargetDAGCombine(ISD::BSWAP);

1241

setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

1242

setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);

1243

setTargetDAGCombine(ISD::INTRINSIC_VOID);

1244

1245

setTargetDAGCombine(ISD::SIGN_EXTEND);

1246

setTargetDAGCombine(ISD::ZERO_EXTEND);

1247

setTargetDAGCombine(ISD::ANY_EXTEND);

1248

1249

setTargetDAGCombine(ISD::TRUNCATE);

1250

setTargetDAGCombine(ISD::VECTOR_SHUFFLE);

1251

1252

1253

if (Subtarget.useCRBits()) {

1254

setTargetDAGCombine(ISD::TRUNCATE);

1255

setTargetDAGCombine(ISD::SETCC);

1256

setTargetDAGCombine(ISD::SELECT_CC);

1257

}

1258

1259

if (Subtarget.hasP9Altivec()) {

1260

setTargetDAGCombine(ISD::ABS);

1261

setTargetDAGCombine(ISD::VSELECT);

1262

}

1263

1264

setLibcallName(RTLIB::LOG_F128, "logf128");

1265

setLibcallName(RTLIB::LOG2_F128, "log2f128");

1266

setLibcallName(RTLIB::LOG10_F128, "log10f128");

1267

setLibcallName(RTLIB::EXP_F128, "expf128");

1268

setLibcallName(RTLIB::EXP2_F128, "exp2f128");

1269

setLibcallName(RTLIB::SIN_F128, "sinf128");

1270

setLibcallName(RTLIB::COS_F128, "cosf128");

1271

setLibcallName(RTLIB::POW_F128, "powf128");

1272

setLibcallName(RTLIB::FMIN_F128, "fminf128");

1273

setLibcallName(RTLIB::FMAX_F128, "fmaxf128");

1274

setLibcallName(RTLIB::POWI_F128, "__powikf2");

1275

setLibcallName(RTLIB::REM_F128, "fmodf128");

1276

1277

// With 32 condition bits, we don't need to sink (and duplicate) compares

1278

// aggressively in CodeGenPrep.

1279

if (Subtarget.useCRBits()) {

1280

setHasMultipleConditionRegisters();

1281

setJumpIsExpensive();

1282

}

1283

1284

setMinFunctionAlignment(Align(4));

1285

1286

switch (Subtarget.getCPUDirective()) {

1287

default: break;

1288

case PPC::DIR_970:

1289

case PPC::DIR_A2:

1290

case PPC::DIR_E500:

1291

case PPC::DIR_E500mc:

1292

case PPC::DIR_E5500:

1293

case PPC::DIR_PWR4:

1294

case PPC::DIR_PWR5:

1295

case PPC::DIR_PWR5X:

1296

case PPC::DIR_PWR6:

1297

case PPC::DIR_PWR6X:

1298

case PPC::DIR_PWR7:

1299

case PPC::DIR_PWR8:

1300

case PPC::DIR_PWR9:

1301

case PPC::DIR_PWR10:

1302

case PPC::DIR_PWR_FUTURE:

1303

setPrefLoopAlignment(Align(16));

1304

setPrefFunctionAlignment(Align(16));

1305

break;

1306

}

1307

1308

if (Subtarget.enableMachineScheduler())

1309

setSchedulingPreference(Sched::Source);

1310

else

1311

setSchedulingPreference(Sched::Hybrid);

1312

1313

computeRegisterProperties(STI.getRegisterInfo());

1314

1315

// The Freescale cores do better with aggressive inlining of memcpy and

1316

// friends. GCC uses same threshold of 128 bytes (= 32 word stores).

1317

if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||

1318

Subtarget.getCPUDirective() == PPC::DIR_E5500) {

1319

MaxStoresPerMemset = 32;

1320

MaxStoresPerMemsetOptSize = 16;

1321

MaxStoresPerMemcpy = 32;

1322

MaxStoresPerMemcpyOptSize = 8;

1323

MaxStoresPerMemmove = 32;

1324

MaxStoresPerMemmoveOptSize = 8;

1325

} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {

1326

// The A2 also benefits from (very) aggressive inlining of memcpy and

1327

// friends. The overhead of a the function call, even when warm, can be

1328

// over one hundred cycles.

1329

MaxStoresPerMemset = 128;

1330

MaxStoresPerMemcpy = 128;

1331

MaxStoresPerMemmove = 128;

1332

MaxLoadsPerMemcmp = 128;

1333

} else {

1334

MaxLoadsPerMemcmp = 8;

1335

MaxLoadsPerMemcmpOptSize = 4;

1336

}

1337

1338

IsStrictFPEnabled = true;

1339

1340

// Let the subtarget (CPU) decide if a predictable select is more expensive

1341

// than the corresponding branch. This information is used in CGP to decide

1342

// when to convert selects into branches.

1343

PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();

1344

}

1345

1346

/// getMaxByValAlign - Helper for getByValTypeAlignment to determine

1347

/// the desired ByVal argument alignment.

1348

static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {

1349

if (MaxAlign == MaxMaxAlign)

1350

return;

1351

if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {

1352

if (MaxMaxAlign >= 32 &&

1353

VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)

1354

MaxAlign = Align(32);

1355

else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&

1356

MaxAlign < 16)

1357

MaxAlign = Align(16);

1358

} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {

1359

Align EltAlign;

1360

getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);

1361

if (EltAlign > MaxAlign)

1362

MaxAlign = EltAlign;

1363

} else if (StructType *STy = dyn_cast<StructType>(Ty)) {

1364

for (auto *EltTy : STy->elements()) {

1365

Align EltAlign;

1366

getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);

1367

if (EltAlign > MaxAlign)

1368

MaxAlign = EltAlign;

1369

if (MaxAlign == MaxMaxAlign)

1370

break;

1371

}

1372

}

1373

}

1374

1375

/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate

1376

/// function arguments in the caller parameter area.

1377

unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,

1378

const DataLayout &DL) const {

1379

// 16byte and wider vectors are passed on 16byte boundary.

1380

// The rest is 8 on PPC64 and 4 on PPC32 boundary.

1381

Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);

1382

if (Subtarget.hasAltivec())

1383

getMaxByValAlign(Ty, Alignment, Align(16));

1384

return Alignment.value();

1385

}

1386

1387

bool PPCTargetLowering::useSoftFloat() const {

1388

return Subtarget.useSoftFloat();

1389

}

1390

1391

bool PPCTargetLowering::hasSPE() const {

1392

return Subtarget.hasSPE();

1393

}

1394

1395

bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {

1396

return VT.isScalarInteger();

1397

}

1398

1399

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

1400

switch ((PPCISD::NodeType)Opcode) {

1401

case PPCISD::FIRST_NUMBER: break;

1402

case PPCISD::FSEL: return "PPCISD::FSEL";

1403

case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";

1404

case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";

1405

case PPCISD::FCFID: return "PPCISD::FCFID";

1406

case PPCISD::FCFIDU: return "PPCISD::FCFIDU";

1407

case PPCISD::FCFIDS: return "PPCISD::FCFIDS";

1408

case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";

1409

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

1410

case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";

1411

case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";

1412

case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";

1413

case PPCISD::FP_TO_UINT_IN_VSR:

1414

return "PPCISD::FP_TO_UINT_IN_VSR,";

1415

case PPCISD::FP_TO_SINT_IN_VSR:

1416

return "PPCISD::FP_TO_SINT_IN_VSR";

1417

case PPCISD::FRE: return "PPCISD::FRE";

1418

case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";

1419

case PPCISD::STFIWX: return "PPCISD::STFIWX";

1420

case PPCISD::VPERM: return "PPCISD::VPERM";

1421

case PPCISD::XXSPLT: return "PPCISD::XXSPLT";

1422

case PPCISD::XXSPLTI_SP_TO_DP:

1423

return "PPCISD::XXSPLTI_SP_TO_DP";

1424

case PPCISD::XXSPLTI32DX:

1425

return "PPCISD::XXSPLTI32DX";

1426

case PPCISD::VECINSERT: return "PPCISD::VECINSERT";

1427

case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";

1428

case PPCISD::VECSHL: return "PPCISD::VECSHL";

1429

case PPCISD::CMPB: return "PPCISD::CMPB";

1430

case PPCISD::Hi: return "PPCISD::Hi";

1431

case PPCISD::Lo: return "PPCISD::Lo";

1432

case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";

1433

case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";

1434

case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";

1435

case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";

1436

case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";

1437

case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";

1438

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

1439

case PPCISD::SRL: return "PPCISD::SRL";

1440

case PPCISD::SRA: return "PPCISD::SRA";

1441

case PPCISD::SHL: return "PPCISD::SHL";

1442

case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";

1443

case PPCISD::CALL: return "PPCISD::CALL";

1444

case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";

1445

case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";

1446

case PPCISD::MTCTR: return "PPCISD::MTCTR";

1447

case PPCISD::BCTRL: return "PPCISD::BCTRL";

1448

case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";

1449

case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";

1450

case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";

1451

case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";

1452

case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";

1453

case PPCISD::MFOCRF: return "PPCISD::MFOCRF";

1454

case PPCISD::MFVSR: return "PPCISD::MFVSR";

1455

case PPCISD::MTVSRA: return "PPCISD::MTVSRA";

1456

case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";

1457

case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";

1458

case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";

1459

case PPCISD::SCALAR_TO_VECTOR_PERMUTED:

1460

return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";

1461

case PPCISD::ANDI_rec_1_EQ_BIT:

1462

return "PPCISD::ANDI_rec_1_EQ_BIT";

1463

case PPCISD::ANDI_rec_1_GT_BIT:

1464

return "PPCISD::ANDI_rec_1_GT_BIT";

1465

case PPCISD::VCMP: return "PPCISD::VCMP";

1466

case PPCISD::VCMPo: return "PPCISD::VCMPo";

1467

case PPCISD::LBRX: return "PPCISD::LBRX";

1468

case PPCISD::STBRX: return "PPCISD::STBRX";

1469

case PPCISD::LFIWAX: return "PPCISD::LFIWAX";

1470

case PPCISD::LFIWZX: return "PPCISD::LFIWZX";

1471

case PPCISD::LXSIZX: return "PPCISD::LXSIZX";

1472

case PPCISD::STXSIX: return "PPCISD::STXSIX";

1473

case PPCISD::VEXTS: return "PPCISD::VEXTS";

1474

case PPCISD::LXVD2X: return "PPCISD::LXVD2X";

1475

case PPCISD::STXVD2X: return "PPCISD::STXVD2X";

1476

case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";

1477

case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";

1478

case PPCISD::ST_VSR_SCAL_INT:

1479

return "PPCISD::ST_VSR_SCAL_INT";

1480

case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";

1481

case PPCISD::BDNZ: return "PPCISD::BDNZ";

1482

case PPCISD::BDZ: return "PPCISD::BDZ";

1483

case PPCISD::MFFS: return "PPCISD::MFFS";

1484

case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";

1485

case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";

1486

case PPCISD::CR6SET: return "PPCISD::CR6SET";

1487

case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";

1488

case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";

1489

case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";

1490

case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";

1491

case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";

1492

case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";

1493

case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";

1494

case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";

1495

case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";

1496

case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";

1497

case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";

1498

case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";

1499

case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";

1500

case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";

1501

case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";

1502

case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";

1503

case PPCISD::PADDI_DTPREL:

1504

return "PPCISD::PADDI_DTPREL";

1505

case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";

1506

case PPCISD::SC: return "PPCISD::SC";

1507

case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";

1508

case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";

1509

case PPCISD::RFEBB: return "PPCISD::RFEBB";

1510

case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";

1511

case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";

1512

case PPCISD::VABSD: return "PPCISD::VABSD";

1513

case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";

1514

case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";

1515

case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";

1516

case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";

1517

case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";

1518

case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";

1519

case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";

1520

case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:

1521

return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";

1522

case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:

1523

return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";

1524

case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";

1525

case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";

1526

case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";

1527

case PPCISD::XXMFACC: return "PPCISD::XXMFACC";

1528

case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";

1529

case PPCISD::FNMSUB: return "PPCISD::FNMSUB";

1530

case PPCISD::STRICT_FADDRTZ:

1531

return "PPCISD::STRICT_FADDRTZ";

1532

case PPCISD::STRICT_FCTIDZ:

1533

return "PPCISD::STRICT_FCTIDZ";

1534

case PPCISD::STRICT_FCTIWZ:

1535

return "PPCISD::STRICT_FCTIWZ";

1536

case PPCISD::STRICT_FCTIDUZ:

1537

return "PPCISD::STRICT_FCTIDUZ";

1538

case PPCISD::STRICT_FCTIWUZ:

1539

return "PPCISD::STRICT_FCTIWUZ";

1540

case PPCISD::STRICT_FCFID:

1541

return "PPCISD::STRICT_FCFID";

1542

case PPCISD::STRICT_FCFIDU:

1543

return "PPCISD::STRICT_FCFIDU";

1544

case PPCISD::STRICT_FCFIDS:

1545

return "PPCISD::STRICT_FCFIDS";

1546

case PPCISD::STRICT_FCFIDUS:

1547

return "PPCISD::STRICT_FCFIDUS";

1548

case PPCISD::LXVRZX: return "PPCISD::LXVRZX";

1549

}

1550

return nullptr;

1551

}

1552

1553

EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,

1554

EVT VT) const {

1555

if (!VT.isVector())

1556

return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;

1557

1558

return VT.changeVectorElementTypeToInteger();

1559

}

1560

1561

bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {

1562

assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1562, __PRETTY_FUNCTION__));

1563

return true;

1564

}

1565

1566

//===----------------------------------------------------------------------===//

1567

// Node matching predicates, for use by the tblgen matching code.

1568

//===----------------------------------------------------------------------===//

1569

1570

/// isFloatingPointZero - Return true if this is 0.0 or -0.0.

1571

static bool isFloatingPointZero(SDValue Op) {

1572

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

1573

return CFP->getValueAPF().isZero();

1574

else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {

1575

// Maybe this has already been legalized into the constant pool?

1576

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))

1577

if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

1578

return CFP->getValueAPF().isZero();

1579

}

1580

return false;

1581

}

1582

1583

/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return

1584

/// true if Op is undef or if it matches the specified value.

1585

static bool isConstantOrUndef(int Op, int Val) {

1586

return Op < 0 || Op == Val;

1587

}

1588

1589

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

1590

/// VPKUHUM instruction.

1591

/// The ShuffleKind distinguishes between big-endian operations with

1592

/// two different inputs (0), either-endian operations with two identical

1593

/// inputs (1), and little-endian operations with two different inputs (2).

1594

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1595

bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1596

SelectionDAG &DAG) {

1597

bool IsLE = DAG.getDataLayout().isLittleEndian();

1598

if (ShuffleKind == 0) {

1599

if (IsLE)

1600

return false;

1601

for (unsigned i = 0; i != 16; ++i)

1602

if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))

1603

return false;

1604

} else if (ShuffleKind == 2) {

1605

if (!IsLE)

1606

return false;

1607

for (unsigned i = 0; i != 16; ++i)

1608

if (!isConstantOrUndef(N->getMaskElt(i), i*2))

1609

return false;

1610

} else if (ShuffleKind == 1) {

1611

unsigned j = IsLE ? 0 : 1;

1612

for (unsigned i = 0; i != 8; ++i)

1613

if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||

1614

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))

1615

return false;

1616

}

1617

return true;

1618

}

1619

1620

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

1621

/// VPKUWUM instruction.

1622

/// The ShuffleKind distinguishes between big-endian operations with

1623

/// two different inputs (0), either-endian operations with two identical

1624

/// inputs (1), and little-endian operations with two different inputs (2).

1625

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1626

bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1627

SelectionDAG &DAG) {

1628

bool IsLE = DAG.getDataLayout().isLittleEndian();

1629

if (ShuffleKind == 0) {

1630

if (IsLE)

1631

return false;

1632

for (unsigned i = 0; i != 16; i += 2)

1633

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||

1634

!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))

1635

return false;

1636

} else if (ShuffleKind == 2) {

1637

if (!IsLE)

1638

return false;

1639

for (unsigned i = 0; i != 16; i += 2)

1640

if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||

1641

!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))

1642

return false;

1643

} else if (ShuffleKind == 1) {

1644

unsigned j = IsLE ? 0 : 2;

1645

for (unsigned i = 0; i != 8; i += 2)

1646

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||

1647

!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||

1648

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||

1649

!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))

1650

return false;

1651

}

1652

return true;

1653

}

1654

1655

/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a

1656

/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the

1657

/// current subtarget.

1658

///

1659

/// The ShuffleKind distinguishes between big-endian operations with

1660

/// two different inputs (0), either-endian operations with two identical

1661

/// inputs (1), and little-endian operations with two different inputs (2).

1662

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1663

bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1664

SelectionDAG &DAG) {

1665

const PPCSubtarget& Subtarget =

1666

static_cast<const PPCSubtarget&>(DAG.getSubtarget());

1667

if (!Subtarget.hasP8Vector())

1668

return false;

1669

1670

bool IsLE = DAG.getDataLayout().isLittleEndian();

1671

if (ShuffleKind == 0) {

1672

if (IsLE)

1673

return false;

1674

for (unsigned i = 0; i != 16; i += 4)

1675

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||

1676

!isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||

1677

!isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||

1678

!isConstantOrUndef(N->getMaskElt(i+3), i*2+7))

1679

return false;

1680

} else if (ShuffleKind == 2) {

1681

if (!IsLE)

1682

return false;

1683

for (unsigned i = 0; i != 16; i += 4)

1684

if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||

1685

!isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||

1686

!isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||

1687

!isConstantOrUndef(N->getMaskElt(i+3), i*2+3))

1688

return false;

1689

} else if (ShuffleKind == 1) {

1690

unsigned j = IsLE ? 0 : 4;

1691

for (unsigned i = 0; i != 8; i += 4)

1692

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||

1693

!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||

1694

!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||

1695

!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||

1696

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||

1697

!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||

1698

!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||

1699

!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))

1700

return false;

1701

}

1702

return true;

1703

}

1704

1705

/// isVMerge - Common function, used to match vmrg* shuffles.

1706

///

1707

static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,

1708

unsigned LHSStart, unsigned RHSStart) {

1709

if (N->getValueType(0) != MVT::v16i8)

1710

return false;

1711

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__))

1712

"Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__));

1713

1714

for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units

1715

for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit

1716

if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),

1717

LHSStart+j+i*UnitSize) ||

1718

!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),

1719

RHSStart+j+i*UnitSize))

1720

return false;

1721

}

1722

return true;

1723

}

1724

1725

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

1726

/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).

1727

/// The ShuffleKind distinguishes between big-endian merges with two

1728

/// different inputs (0), either-endian merges with two identical inputs (1),

1729

/// and little-endian merges with two different inputs (2). For the latter,

1730

/// the input operands are swapped (see PPCInstrAltivec.td).

1731

bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

1732

unsigned ShuffleKind, SelectionDAG &DAG) {

1733

if (DAG.getDataLayout().isLittleEndian()) {

1734

if (ShuffleKind == 1) // unary

1735

return isVMerge(N, UnitSize, 0, 0);

1736

else if (ShuffleKind == 2) // swapped

1737

return isVMerge(N, UnitSize, 0, 16);

1738

else

1739

return false;

1740

} else {

1741

if (ShuffleKind == 1) // unary

1742

return isVMerge(N, UnitSize, 8, 8);

1743

else if (ShuffleKind == 0) // normal

1744

return isVMerge(N, UnitSize, 8, 24);

1745

else

1746

return false;

1747

}

1748

}

1749

1750

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

1751

/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).

1752

/// The ShuffleKind distinguishes between big-endian merges with two

1753

/// different inputs (0), either-endian merges with two identical inputs (1),

1754

/// and little-endian merges with two different inputs (2). For the latter,

1755

/// the input operands are swapped (see PPCInstrAltivec.td).

1756

bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

1757

unsigned ShuffleKind, SelectionDAG &DAG) {

1758

if (DAG.getDataLayout().isLittleEndian()) {

1759

if (ShuffleKind == 1) // unary

1760

return isVMerge(N, UnitSize, 8, 8);

1761

else if (ShuffleKind == 2) // swapped

1762

return isVMerge(N, UnitSize, 8, 24);

1763

else

1764

return false;

1765

} else {

1766

if (ShuffleKind == 1) // unary

1767

return isVMerge(N, UnitSize, 0, 0);

1768

else if (ShuffleKind == 0) // normal

1769

return isVMerge(N, UnitSize, 0, 16);

1770

else

1771

return false;

1772

}

1773

}

1774

1775

/**

1776

* Common function used to match vmrgew and vmrgow shuffles

1777

*

1778

* The indexOffset determines whether to look for even or odd words in

1779

* the shuffle mask. This is based on the of the endianness of the target

1780

* machine.

1781

* - Little Endian:

1782

* - Use offset of 0 to check for odd elements

1783

* - Use offset of 4 to check for even elements

1784

* - Big Endian:

1785

* - Use offset of 0 to check for even elements

1786

* - Use offset of 4 to check for odd elements

1787

* A detailed description of the vector element ordering for little endian and

1788

* big endian can be found at

1789

* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html

1790

* Targeting your applications - what little endian and big endian IBM XL C/C++

1791

* compiler differences mean to you

1792

*

1793

* The mask to the shuffle vector instruction specifies the indices of the

1794

* elements from the two input vectors to place in the result. The elements are

1795

* numbered in array-access order, starting with the first vector. These vectors

1796

* are always of type v16i8, thus each vector will contain 16 elements of size

1797

* 8. More info on the shuffle vector can be found in the

1798

* http://llvm.org/docs/LangRef.html#shufflevector-instruction

1799

* Language Reference.

1800

*

1801

* The RHSStartValue indicates whether the same input vectors are used (unary)

1802

* or two different input vectors are used, based on the following:

1803

* - If the instruction uses the same vector for both inputs, the range of the

1804

* indices will be 0 to 15. In this case, the RHSStart value passed should

1805

* be 0.

1806

* - If the instruction has two different vectors then the range of the

1807

* indices will be 0 to 31. In this case, the RHSStart value passed should

1808

* be 16 (indices 0-15 specify elements in the first vector while indices 16

1809

* to 31 specify elements in the second vector).

1810

*

1811

* \param[in] N The shuffle vector SD Node to analyze

1812

* \param[in] IndexOffset Specifies whether to look for even or odd elements

1813

* \param[in] RHSStartValue Specifies the starting index for the righthand input

1814

* vector to the shuffle_vector instruction

1815

* \return true iff this shuffle vector represents an even or odd word merge

1816

*/

1817

static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,

1818

unsigned RHSStartValue) {

1819

if (N->getValueType(0) != MVT::v16i8)

1820

return false;

1821

1822

for (unsigned i = 0; i < 2; ++i)

1823

for (unsigned j = 0; j < 4; ++j)

1824

if (!isConstantOrUndef(N->getMaskElt(i*4+j),

1825

i*RHSStartValue+j+IndexOffset) ||

1826

!isConstantOrUndef(N->getMaskElt(i*4+j+8),

1827

i*RHSStartValue+j+IndexOffset+8))

1828

return false;

1829

return true;

1830

}

1831

1832

/**

1833

* Determine if the specified shuffle mask is suitable for the vmrgew or

1834

* vmrgow instructions.

1835

*

1836

* \param[in] N The shuffle vector SD Node to analyze

1837

* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)

1838

* \param[in] ShuffleKind Identify the type of merge:

1839

* - 0 = big-endian merge with two different inputs;

1840

* - 1 = either-endian merge with two identical inputs;

1841

* - 2 = little-endian merge with two different inputs (inputs are swapped for

1842

* little-endian merges).

1843

* \param[in] DAG The current SelectionDAG

1844

* \return true iff this shuffle mask

1845

*/

1846

bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,

1847

unsigned ShuffleKind, SelectionDAG &DAG) {

1848

if (DAG.getDataLayout().isLittleEndian()) {

1849

unsigned indexOffset = CheckEven ? 4 : 0;

1850

if (ShuffleKind == 1) // Unary

1851

return isVMerge(N, indexOffset, 0);

1852

else if (ShuffleKind == 2) // swapped

1853

return isVMerge(N, indexOffset, 16);

1854

else

1855

return false;

1856

}

1857

else {

1858

unsigned indexOffset = CheckEven ? 0 : 4;

1859

if (ShuffleKind == 1) // Unary

1860

return isVMerge(N, indexOffset, 0);

1861

else if (ShuffleKind == 0) // Normal

1862

return isVMerge(N, indexOffset, 16);

1863

else

1864

return false;

1865

}

1866

return false;

1867

}

1868

1869

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

1870

/// amount, otherwise return -1.

1871

/// The ShuffleKind distinguishes between big-endian operations with two

1872

/// different inputs (0), either-endian operations with two identical inputs

1873

/// (1), and little-endian operations with two different inputs (2). For the

1874

/// latter, the input operands are swapped (see PPCInstrAltivec.td).

1875

int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,

1876

SelectionDAG &DAG) {

1877

if (N->getValueType(0) != MVT::v16i8)

1878

return -1;

1879

1880

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

1881

1882

// Find the first non-undef value in the shuffle mask.

1883

unsigned i;

1884

for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)

1885

/*search*/;

1886

1887

if (i == 16) return -1; // all undef.

1888

1889

// Otherwise, check to see if the rest of the elements are consecutively

1890

// numbered from this value.

1891

unsigned ShiftAmt = SVOp->getMaskElt(i);

1892

if (ShiftAmt < i) return -1;

1893

1894

ShiftAmt -= i;

1895

bool isLE = DAG.getDataLayout().isLittleEndian();

1896

1897

if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {

1898

// Check the rest of the elements to see if they are consecutive.

1899

for (++i; i != 16; ++i)

1900

if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))

1901

return -1;

1902

} else if (ShuffleKind == 1) {

1903

// Check the rest of the elements to see if they are consecutive.

1904

for (++i; i != 16; ++i)

1905

if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))

1906

return -1;

1907

} else

1908

return -1;

1909

1910

if (isLE)

1911

ShiftAmt = 16 - ShiftAmt;

1912

1913

return ShiftAmt;

1914

}

1915

1916

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand

1917

/// specifies a splat of a single element that is suitable for input to

1918

/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).

1919

bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {

1920

assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__))

1921

EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__));

1922

1923

// The consecutive indices need to specify an element, not part of two

1924

// different elements. So abandon ship early if this isn't the case.

1925

if (N->getMaskElt(0) % EltSize != 0)

1926

return false;

1927

1928

// This is a splat operation if each element of the permute is the same, and

1929

// if the value doesn't reference the second vector.

1930

unsigned ElementBase = N->getMaskElt(0);

1931

1932

// FIXME: Handle UNDEF elements too!

1933

if (ElementBase >= 16)

1934

return false;

1935

1936

// Check that the indices are consecutive, in the case of a multi-byte element

1937

// splatted with a v16i8 mask.

1938

for (unsigned i = 1; i != EltSize; ++i)

1939

if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))

1940

return false;

1941

1942

for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {

1943

if (N->getMaskElt(i) < 0) continue;

1944

for (unsigned j = 0; j != EltSize; ++j)

1945

if (N->getMaskElt(i+j) != N->getMaskElt(j))

1946

return false;

1947

}

1948

return true;

1949

}

1950

1951

/// Check that the mask is shuffling N byte elements. Within each N byte

1952

/// element of the mask, the indices could be either in increasing or

1953

/// decreasing order as long as they are consecutive.

1954

/// \param[in] N the shuffle vector SD Node to analyze

1955

/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/

1956

/// Word/DoubleWord/QuadWord).

1957

/// \param[in] StepLen the delta indices number among the N byte element, if

1958

/// the mask is in increasing/decreasing order then it is 1/-1.

1959

/// \return true iff the mask is shuffling N byte elements.

1960

static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,

1961

int StepLen) {

1962

assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))

1963

"Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__));

1964

assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1964, __PRETTY_FUNCTION__));

1965

1966

unsigned NumOfElem = 16 / Width;

1967

unsigned MaskVal[16]; // Width is never greater than 16

1968

for (unsigned i = 0; i < NumOfElem; ++i) {

1969

MaskVal[0] = N->getMaskElt(i * Width);

1970

if ((StepLen == 1) && (MaskVal[0] % Width)) {

1971

return false;

1972

} else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {

1973

return false;

1974

}

1975

1976

for (unsigned int j = 1; j < Width; ++j) {

1977

MaskVal[j] = N->getMaskElt(i * Width + j);

1978

if (MaskVal[j] != MaskVal[j-1] + StepLen) {

1979

return false;

1980

}

1981

}

1982

}

1983

1984

return true;

1985

}

1986

1987

bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,

1988

unsigned &InsertAtByte, bool &Swap, bool IsLE) {

1989

if (!isNByteElemShuffleMask(N, 4, 1))

1990

return false;

1991

1992

// Now we look at mask elements 0,4,8,12

1993

unsigned M0 = N->getMaskElt(0) / 4;

1994

unsigned M1 = N->getMaskElt(4) / 4;

1995

unsigned M2 = N->getMaskElt(8) / 4;

1996

unsigned M3 = N->getMaskElt(12) / 4;

1997

unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };

1998

unsigned BigEndianShifts[] = { 3, 0, 1, 2 };

1999

2000

// Below, let H and L be arbitrary elements of the shuffle mask

2001

// where H is in the range [4,7] and L is in the range [0,3].

2002

// H, 1, 2, 3 or L, 5, 6, 7

2003

if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||

2004

(M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {

2005

ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];

2006

InsertAtByte = IsLE ? 12 : 0;

2007

Swap = M0 < 4;

2008

return true;

2009

}

2010

// 0, H, 2, 3 or 4, L, 6, 7

2011

if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||

2012

(M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {

2013

ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];

2014

InsertAtByte = IsLE ? 8 : 4;

2015

Swap = M1 < 4;

2016

return true;

2017

}

2018

// 0, 1, H, 3 or 4, 5, L, 7

2019

if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||

2020

(M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {

2021

ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];

2022

InsertAtByte = IsLE ? 4 : 8;

2023

Swap = M2 < 4;

2024

return true;

2025

}

2026

// 0, 1, 2, H or 4, 5, 6, L

2027

if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||

2028

(M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {

2029

ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];

2030

InsertAtByte = IsLE ? 0 : 12;

2031

Swap = M3 < 4;

2032

return true;

2033

}

2034

2035

// If both vector operands for the shuffle are the same vector, the mask will

2036

// contain only elements from the first one and the second one will be undef.

2037

if (N->getOperand(1).isUndef()) {

2038

ShiftElts = 0;

2039

Swap = true;

2040

unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;

2041

if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {

2042

InsertAtByte = IsLE ? 12 : 0;

2043

return true;

2044

}

2045

if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {

2046

InsertAtByte = IsLE ? 8 : 4;

2047

return true;

2048

}

2049

if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {

2050

InsertAtByte = IsLE ? 4 : 8;

2051

return true;

2052

}

2053

if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {

2054

InsertAtByte = IsLE ? 0 : 12;

2055

return true;

2056

}

2057

}

2058

2059

return false;

2060

}

2061

2062

bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,

2063

bool &Swap, bool IsLE) {

2064

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__));

2065

// Ensure each byte index of the word is consecutive.

2066

if (!isNByteElemShuffleMask(N, 4, 1))

2067

return false;

2068

2069

// Now we look at mask elements 0,4,8,12, which are the beginning of words.

2070

unsigned M0 = N->getMaskElt(0) / 4;

2071

unsigned M1 = N->getMaskElt(4) / 4;

2072

unsigned M2 = N->getMaskElt(8) / 4;

2073

unsigned M3 = N->getMaskElt(12) / 4;

2074

2075

// If both vector operands for the shuffle are the same vector, the mask will

2076

// contain only elements from the first one and the second one will be undef.

2077

if (N->getOperand(1).isUndef()) {

2078

assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2078, __PRETTY_FUNCTION__));

2079

if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)

2080

return false;

2081

2082

ShiftElts = IsLE ? (4 - M0) % 4 : M0;

2083

Swap = false;

2084

return true;

2085

}

2086

2087

// Ensure each word index of the ShuffleVector Mask is consecutive.

2088

if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)

2089

return false;

2090

2091

if (IsLE) {

2092

if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {

2093

// Input vectors don't need to be swapped if the leading element

2094

// of the result is one of the 3 left elements of the second vector

2095

// (or if there is no shift to be done at all).

2096

Swap = false;

2097

ShiftElts = (8 - M0) % 8;

2098

} else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {

2099

// Input vectors need to be swapped if the leading element

2100

// of the result is one of the 3 left elements of the first vector

2101

// (or if we're shifting by 4 - thereby simply swapping the vectors).

2102

Swap = true;

2103

ShiftElts = (4 - M0) % 4;

2104

}

2105

2106

return true;

2107

} else { // BE

2108

if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {

2109

// Input vectors don't need to be swapped if the leading element

2110

// of the result is one of the 4 elements of the first vector.

2111

Swap = false;

2112

ShiftElts = M0;

2113

} else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {

2114

// Input vectors need to be swapped if the leading element

2115

// of the result is one of the 4 elements of the right vector.

2116

Swap = true;

2117

ShiftElts = M0 - 4;

2118

}

2119

2120

return true;

2121

}

2122

}

2123

2124

bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {

2125

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2125, __PRETTY_FUNCTION__));

2126

2127

if (!isNByteElemShuffleMask(N, Width, -1))

2128

return false;

2129

2130

for (int i = 0; i < 16; i += Width)

2131

if (N->getMaskElt(i) != i + Width - 1)

2132

return false;

2133

2134

return true;

2135

}

2136

2137

bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {

2138

return isXXBRShuffleMaskHelper(N, 2);

2139

}

2140

2141

bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {

2142

return isXXBRShuffleMaskHelper(N, 4);

2143

}

2144

2145

bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {

2146

return isXXBRShuffleMaskHelper(N, 8);

2147

}

2148

2149

bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {

2150

return isXXBRShuffleMaskHelper(N, 16);

2151

}

2152

2153

/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap

2154

/// if the inputs to the instruction should be swapped and set \p DM to the

2155

/// value for the immediate.

2156

/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI

2157

/// AND element 0 of the result comes from the first input (LE) or second input

2158

/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.

2159

/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle

2160

/// mask.

2161

bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,

2162

bool &Swap, bool IsLE) {

2163

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2163, __PRETTY_FUNCTION__));

2164

2165

// Ensure each byte index of the double word is consecutive.

2166

if (!isNByteElemShuffleMask(N, 8, 1))

2167

return false;

2168

2169

unsigned M0 = N->getMaskElt(0) / 8;

2170

unsigned M1 = N->getMaskElt(8) / 8;

2171

assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2171, __PRETTY_FUNCTION__));

2172

2173

// If both vector operands for the shuffle are the same vector, the mask will

2174

// contain only elements from the first one and the second one will be undef.

2175

if (N->getOperand(1).isUndef()) {

2176

if ((M0 | M1) < 2) {

2177

DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);

2178

Swap = false;

2179

return true;

2180

} else

2181

return false;

2182

}

2183

2184

if (IsLE) {

2185

if (M0 > 1 && M1 < 2) {

2186

Swap = false;

2187

} else if (M0 < 2 && M1 > 1) {

2188

M0 = (M0 + 2) % 4;

2189

M1 = (M1 + 2) % 4;

2190

Swap = true;

2191

} else

2192

return false;

2193

2194

// Note: if control flow comes here that means Swap is already set above

2195

DM = (((~M1) & 1) << 1) + ((~M0) & 1);

2196

return true;

2197

} else { // BE

2198

if (M0 < 2 && M1 > 1) {

2199

Swap = false;

2200

} else if (M0 > 1 && M1 < 2) {

2201

M0 = (M0 + 2) % 4;

2202

M1 = (M1 + 2) % 4;

2203

Swap = true;

2204

} else

2205

return false;

2206

2207

// Note: if control flow comes here that means Swap is already set above

2208

DM = (M0 << 1) + (M1 & 1);

2209

return true;

2210

}

2211

}

2212

2213

2214

/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is

2215

/// appropriate for PPC mnemonics (which have a big endian bias - namely

2216

/// elements are counted from the left of the vector register).

2217

unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,

2218

SelectionDAG &DAG) {

2219

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

2220

assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2220, __PRETTY_FUNCTION__));

2221

if (DAG.getDataLayout().isLittleEndian())

2222

return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);

2223

else

2224

return SVOp->getMaskElt(0) / EltSize;

2225

}

2226

2227

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

2228

/// by using a vspltis[bhw] instruction of the specified element size, return

2229

/// the constant being splatted. The ByteSize field indicates the number of

2230

/// bytes of each element [124] -> [bhw].

2231

SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

2232

SDValue OpVal(nullptr, 0);

2233

2234

// If ByteSize of the splat is bigger than the element size of the

2235

// build_vector, then we have a case where we are checking for a splat where

2236

// multiple elements of the buildvector are folded together into a single

2237

// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).

2238

unsigned EltSize = 16/N->getNumOperands();

2239

if (EltSize < ByteSize) {

2240

unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.

2241

SDValue UniquedVals[4];

2242

assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2242, __PRETTY_FUNCTION__));

2243

2244

// See if all of the elements in the buildvector agree across.

2245

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

2246

if (N->getOperand(i).isUndef()) continue;

2247

// If the element isn't a constant, bail fully out.

2248

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();

2249

2250

if (!UniquedVals[i&(Multiple-1)].getNode())

2251

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

2252

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

2253

return SDValue(); // no match.

2254

}

2255

2256

// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains

2257

// either constant or undef values that are identical for each chunk. See

2258

// if these chunks can form into a larger vspltis*.

2259

2260

// Check to see if all of the leading entries are either 0 or -1. If

2261

// neither, then this won't fit into the immediate field.

2262

bool LeadingZero = true;

2263

bool LeadingOnes = true;

2264

for (unsigned i = 0; i != Multiple-1; ++i) {

2265

if (!UniquedVals[i].getNode()) continue; // Must have been undefs.

2266

2267

LeadingZero &= isNullConstant(UniquedVals[i]);

2268

LeadingOnes &= isAllOnesConstant(UniquedVals[i]);

2269

}

2270

// Finally, check the least significant entry.

2271

if (LeadingZero) {

2272

if (!UniquedVals[Multiple-1].getNode())

2273

return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef

2274

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();

2275

if (Val < 16) // 0,0,0,4 -> vspltisw(4)

2276

return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);

2277

}

2278

if (LeadingOnes) {

2279

if (!UniquedVals[Multiple-1].getNode())

2280

return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef

2281

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();

2282

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

2283

return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);

2284

}

2285

2286

return SDValue();

2287

}

2288

2289

// Check to see if this buildvec has a single non-undef value in its elements.

2290

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

2291

if (N->getOperand(i).isUndef()) continue;

2292

if (!OpVal.getNode())

2293

OpVal = N->getOperand(i);

2294

else if (OpVal != N->getOperand(i))

2295

return SDValue();

2296

}

2297

2298

if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.

2299

2300

unsigned ValSizeInBytes = EltSize;

2301

uint64_t Value = 0;

2302

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

2303

Value = CN->getZExtValue();

2304

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

2305

assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2305, __PRETTY_FUNCTION__));

2306

Value = FloatToBits(CN->getValueAPF().convertToFloat());

2307

}

2308

2309

// If the splat value is larger than the element value, then we can never do

2310

// this splat. The only case that we could fit the replicated bits into our

2311

// immediate field for would be zero, and we prefer to use vxor for it.

2312

if (ValSizeInBytes < ByteSize) return SDValue();

2313

2314

// If the element value is larger than the splat value, check if it consists

2315

// of a repeated bit pattern of size ByteSize.

2316

if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))

2317

return SDValue();

2318

2319

// Properly sign extend the value.

2320

int MaskVal = SignExtend32(Value, ByteSize * 8);

2321

2322

// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.

2323

if (MaskVal == 0) return SDValue();

2324

2325

// Finally, if this value fits in a 5 bit sext field, return it

2326

if (SignExtend32<5>(MaskVal) == MaskVal)

2327

return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);

2328

return SDValue();

2329

}

2330

2331

/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift

2332

/// amount, otherwise return -1.

2333

int PPC::isQVALIGNIShuffleMask(SDNode *N) {

2334

EVT VT = N->getValueType(0);

2335

if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)

2336

return -1;

2337

2338

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

2339

2340

// Find the first non-undef value in the shuffle mask.

2341

unsigned i;

2342

for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)

2343

/*search*/;

2344

2345

if (i == 4) return -1; // all undef.

2346

2347

// Otherwise, check to see if the rest of the elements are consecutively

2348

// numbered from this value.

2349

unsigned ShiftAmt = SVOp->getMaskElt(i);

2350

if (ShiftAmt < i) return -1;

2351

ShiftAmt -= i;

2352

2353

// Check the rest of the elements to see if they are consecutive.

2354

for (++i; i != 4; ++i)

2355

if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))

2356

return -1;

2357

2358

return ShiftAmt;

2359

}

2360

2361

//===----------------------------------------------------------------------===//

2362

// Addressing Mode Selection

2363

//===----------------------------------------------------------------------===//

2364

2365

/// isIntS16Immediate - This method tests to see if the node is either a 32-bit

2366

/// or 64-bit immediate, and if the value can be accurately represented as a

2367

/// sign extension from a 16-bit value. If so, this returns true and the

2368

/// immediate.

2369

bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {

2370

if (!isa<ConstantSDNode>(N))

2371

return false;

2372

2373

Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();

2374

if (N->getValueType(0) == MVT::i32)

2375

return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();

2376

else

2377

return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();

2378

}

2379

bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {

2380

return isIntS16Immediate(Op.getNode(), Imm);

2381

}

2382

2383

2384

/// SelectAddressEVXRegReg - Given the specified address, check to see if it can

2385

/// be represented as an indexed [r+r] operation.

2386

bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,

2387

SDValue &Index,

2388

SelectionDAG &DAG) const {

2389

for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();

2390

UI != E; ++UI) {

2391

if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {

2392

if (Memop->getMemoryVT() == MVT::f64) {

2393

Base = N.getOperand(0);

2394

Index = N.getOperand(1);

2395

return true;

2396

}

2397

}

2398

}

2399

return false;

2400

}

2401

2402

/// SelectAddressRegReg - Given the specified addressed, check to see if it

2403

/// can be represented as an indexed [r+r] operation. Returns false if it

2404

/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is

2405

/// non-zero and N can be represented by a base register plus a signed 16-bit

2406

/// displacement, make a more precise judgement by checking (displacement % \p

2407

/// EncodingAlignment).

2408

bool PPCTargetLowering::SelectAddressRegReg(

2409

SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,

2410

MaybeAlign EncodingAlignment) const {

2411

// If we have a PC Relative target flag don't select as [reg+reg]. It will be

2412

// a [pc+imm].

2413

if (SelectAddressPCRel(N, Base))

2414

return false;

2415

2416

int16_t Imm = 0;

2417

if (N.getOpcode() == ISD::ADD) {

2418

// Is there any SPE load/store (f64), which can't handle 16bit offset?

2419

// SPE load/store can only handle 8-bit offsets.

2420

if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))

2421

return true;

2422

if (isIntS16Immediate(N.getOperand(1), Imm) &&

2423

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))

2424

return false; // r+i

2425

if (N.getOperand(1).getOpcode() == PPCISD::Lo)

2426

return false; // r+i

2427

2428

Base = N.getOperand(0);

2429

Index = N.getOperand(1);

2430

return true;

2431

} else if (N.getOpcode() == ISD::OR) {

2432

if (isIntS16Immediate(N.getOperand(1), Imm) &&

2433

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))

2434

return false; // r+i can fold it if we can.

2435

2436

// If this is an or of disjoint bitfields, we can codegen this as an add

2437

// (for better address arithmetic) if the LHS and RHS of the OR are provably

2438

// disjoint.

2439

KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

2440

2441

if (LHSKnown.Zero.getBoolValue()) {

2442

KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));

2443

// If all of the bits are known zero on the LHS or RHS, the add won't

2444

// carry.

2445

if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {

2446

Base = N.getOperand(0);

2447

Index = N.getOperand(1);

2448

return true;

2449

}

2450

}

2451

}

2452

2453

return false;

2454

}

2455

2456

// If we happen to be doing an i64 load or store into a stack slot that has

2457

// less than a 4-byte alignment, then the frame-index elimination may need to

2458

// use an indexed load or store instruction (because the offset may not be a

2459

// multiple of 4). The extra register needed to hold the offset comes from the

2460

// register scavenger, and it is possible that the scavenger will need to use

2461

// an emergency spill slot. As a result, we need to make sure that a spill slot

2462

// is allocated when doing an i64 load/store into a less-than-4-byte-aligned

2463

// stack slot.

2464

static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {

2465

// FIXME: This does not handle the LWA case.

2466

if (VT != MVT::i64)

2467

return;

2468

2469

// NOTE: We'll exclude negative FIs here, which come from argument

2470

// lowering, because there are no known test cases triggering this problem

2471

// using packed structures (or similar). We can remove this exclusion if

2472

// we find such a test case. The reason why this is so test-case driven is

2473

// because this entire 'fixup' is only to prevent crashes (from the

2474

// register scavenger) on not-really-valid inputs. For example, if we have:

2475

// %a = alloca i1

2476

// %b = bitcast i1* %a to i64*

2477

// store i64* a, i64 b

2478

// then the store should really be marked as 'align 1', but is not. If it

2479

// were marked as 'align 1' then the indexed form would have been

2480

// instruction-selected initially, and the problem this 'fixup' is preventing

2481

// won't happen regardless.

2482

if (FrameIdx < 0)

2483

return;

2484

2485

MachineFunction &MF = DAG.getMachineFunction();

2486

MachineFrameInfo &MFI = MF.getFrameInfo();

2487

2488

if (MFI.getObjectAlign(FrameIdx) >= Align(4))

2489

return;

2490

2491

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

2492

FuncInfo->setHasNonRISpills();

2493

}

2494

2495

/// Returns true if the address N can be represented by a base register plus

2496

/// a signed 16-bit displacement [r+imm], and if it is not better

2497

/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept

2498

/// displacements that are multiples of that value.

2499

bool PPCTargetLowering::SelectAddressRegImm(

2500

SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,

2501

MaybeAlign EncodingAlignment) const {

2502

// FIXME dl should come from parent load or store, not from address

2503

SDLoc dl(N);

2504

2505

// If we have a PC Relative target flag don't select as [reg+imm]. It will be

2506

// a [pc+imm].

2507

if (SelectAddressPCRel(N, Base))

2508

return false;

2509

2510

// If this can be more profitably realized as r+r, fail.

2511

if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))

2512

return false;

2513

2514

if (N.getOpcode() == ISD::ADD) {

2515

int16_t imm = 0;

2516

if (isIntS16Immediate(N.getOperand(1), imm) &&

2517

(!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {

2518

Disp = DAG.getTargetConstant(imm, dl, N.getValueType());

2519

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

2520

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2521

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2522

} else {

2523

Base = N.getOperand(0);

2524

}

2525

return true; // [r+i]

2526

} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {

2527

// Match LOAD (ADD (X, Lo(G))).

2528

assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__))

2529

&& "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__));

2530

Disp = N.getOperand(1).getOperand(0); // The global address.

2531

assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))

2532

Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))

2533

Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))

2534

Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__));

2535

Base = N.getOperand(0);

2536

return true; // [&g+r]

2537

}

2538

} else if (N.getOpcode() == ISD::OR) {

2539

int16_t imm = 0;

2540

if (isIntS16Immediate(N.getOperand(1), imm) &&

2541

(!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {

2542

// If this is an or of disjoint bitfields, we can codegen this as an add

2543

// (for better address arithmetic) if the LHS and RHS of the OR are

2544

// provably disjoint.

2545

KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

2546

2547

if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {

2548

// If all of the bits are known zero on the LHS or RHS, the add won't

2549

// carry.

2550

if (FrameIndexSDNode *FI =

2551

dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

2552

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2553

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2554

} else {

2555

Base = N.getOperand(0);

2556

}

2557

Disp = DAG.getTargetConstant(imm, dl, N.getValueType());

2558

return true;

2559

}

2560

}

2561

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {

2562

// Loading from a constant address.

2563

2564

// If this address fits entirely in a 16-bit sext immediate field, codegen

2565

// this as "d, 0"

2566

int16_t Imm;

2567

if (isIntS16Immediate(CN, Imm) &&

2568

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {

2569

Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));

2570

Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

2571

CN->getValueType(0));

2572

return true;

2573

}

2574

2575

// Handle 32-bit sext immediates with LIS + addr mode.

2576

if ((CN->getValueType(0) == MVT::i32 ||

2577

(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&

2578

(!EncodingAlignment ||

2579

isAligned(*EncodingAlignment, CN->getZExtValue()))) {

2580

int Addr = (int)CN->getZExtValue();

2581

2582

// Otherwise, break this down into an LIS + disp.

2583

Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);

2584

2585

Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,

2586

MVT::i32);

2587

unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;

2588

Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);

2589

return true;

2590

}

2591

}

2592

2593

Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));

2594

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {

2595

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2596

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2597

} else

2598

Base = N;

2599

return true; // [r+0]

2600

}

2601

2602

/// SelectAddressRegRegOnly - Given the specified addressed, force it to be

2603

/// represented as an indexed [r+r] operation.

2604

bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,

2605

SDValue &Index,

2606

SelectionDAG &DAG) const {

2607

// Check to see if we can easily represent this as an [r+r] address. This

2608

// will fail if it thinks that the address is more profitably represented as

2609

// reg+imm, e.g. where imm = 0.

2610

if (SelectAddressRegReg(N, Base, Index, DAG))

2611

return true;

2612

2613

// If the address is the result of an add, we will utilize the fact that the

2614

// address calculation includes an implicit add. However, we can reduce

2615

// register pressure if we do not materialize a constant just for use as the

2616

// index register. We only get rid of the add if it is not an add of a

2617

// value and a 16-bit signed constant and both have a single use.

2618

int16_t imm = 0;

2619

if (N.getOpcode() == ISD::ADD &&

2620

(!isIntS16Immediate(N.getOperand(1), imm) ||

2621

!N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {

2622

Base = N.getOperand(0);

2623

Index = N.getOperand(1);

2624

return true;

2625

}

2626

2627

// Otherwise, do it the hard way, using R0 as the base register.

2628

Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

2629

N.getValueType());

2630

Index = N;

2631

return true;

2632

}

2633

2634

template <typename Ty> static bool isValidPCRelNode(SDValue N) {

2635

Ty *PCRelCand = dyn_cast<Ty>(N);

2636

return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);

2637

}

2638

2639

/// Returns true if this address is a PC Relative address.

2640

/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG

2641

/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.

2642

bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {

2643

// This is a materialize PC Relative node. Always select this as PC Relative.

2644

Base = N;

2645

if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)

2646

return true;

2647

if (isValidPCRelNode<ConstantPoolSDNode>(N) ||

2648

isValidPCRelNode<GlobalAddressSDNode>(N) ||

2649

isValidPCRelNode<JumpTableSDNode>(N) ||

2650

isValidPCRelNode<BlockAddressSDNode>(N))

2651

return true;

2652

return false;

2653

}

2654

2655

/// Returns true if we should use a direct load into vector instruction

2656

/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.

2657

static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {

2658

2659

// If there are any other uses other than scalar to vector, then we should

2660

// keep it as a scalar load -> direct move pattern to prevent multiple

2661

// loads.

2662

LoadSDNode *LD = dyn_cast<LoadSDNode>(N);

2663

if (!LD)

2664

return false;

2665

2666

EVT MemVT = LD->getMemoryVT();

2667

if (!MemVT.isSimple())

2668

return false;

2669

switch(MemVT.getSimpleVT().SimpleTy) {

2670

case MVT::i64:

2671

break;

2672

case MVT::i32:

2673

if (!ST.hasP8Vector())

2674

return false;

2675

break;

2676

case MVT::i16:

2677

case MVT::i8:

2678

if (!ST.hasP9Vector())

2679

return false;

2680

break;

2681

default:

2682

return false;

2683

}

2684

2685

SDValue LoadedVal(N, 0);

2686

if (!LoadedVal.hasOneUse())

2687

return false;

2688

2689

for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();

2690

UI != UE; ++UI)

2691

if (UI.getUse().get().getResNo() == 0 &&

2692

UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&

2693

UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)

2694

return false;

2695

2696

return true;

2697

}

2698

2699

/// getPreIndexedAddressParts - returns true by value, base pointer and

2700

/// offset pointer and addressing mode by reference if the node's address

2701

/// can be legally represented as pre-indexed load / store address.

2702

bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

2703

SDValue &Offset,

2704

ISD::MemIndexedMode &AM,

2705

SelectionDAG &DAG) const {

2706

if (DisablePPCPreinc) return false;

2707

2708

bool isLoad = true;

2709

SDValue Ptr;

2710

EVT VT;

2711

unsigned Alignment;

2712

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

2713

Ptr = LD->getBasePtr();

2714

VT = LD->getMemoryVT();

2715

Alignment = LD->getAlignment();

2716

} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

2717

Ptr = ST->getBasePtr();

2718

VT = ST->getMemoryVT();

2719

Alignment = ST->getAlignment();

2720

isLoad = false;

2721

} else

2722

return false;

2723

2724

// Do not generate pre-inc forms for specific loads that feed scalar_to_vector

2725

// instructions because we can fold these into a more efficient instruction

2726

// instead, (such as LXSD).

2727

if (isLoad && usePartialVectorLoads(N, Subtarget)) {

2728

return false;

2729

}

2730

2731

// PowerPC doesn't have preinc load/store instructions for vectors

2732

if (VT.isVector())

2733

return false;

2734

2735

if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {

2736

// Common code will reject creating a pre-inc form if the base pointer

2737

// is a frame index, or if N is a store and the base pointer is either

2738

// the same as or a predecessor of the value being stored. Check for

2739

// those situations here, and try with swapped Base/Offset instead.

2740

bool Swap = false;

2741

2742

if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))

2743

Swap = true;

2744

else if (!isLoad) {

2745

SDValue Val = cast<StoreSDNode>(N)->getValue();

2746

if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))

2747

Swap = true;

2748

}

2749

2750

if (Swap)

2751

std::swap(Base, Offset);

2752

2753

AM = ISD::PRE_INC;

2754

return true;

2755

}

2756

2757

// LDU/STU can only handle immediates that are a multiple of 4.

2758

if (VT != MVT::i64) {

2759

if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))

2760

return false;

2761

} else {

2762

// LDU/STU need an address with at least 4-byte alignment.

2763

if (Alignment < 4)

2764

return false;

2765

2766

if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))

2767

return false;

2768

}

2769

2770

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

2771

// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of

2772

// sext i32 to i64 when addr mode is r+i.

2773

if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&

2774

LD->getExtensionType() == ISD::SEXTLOAD &&

2775

isa<ConstantSDNode>(Offset))

2776

return false;

2777

}

2778

2779

AM = ISD::PRE_INC;

2780

return true;

2781

}

2782

2783

//===----------------------------------------------------------------------===//

2784

// LowerOperation implementation

2785

//===----------------------------------------------------------------------===//

2786

2787

/// Return true if we should reference labels using a PICBase, set the HiOpFlags

2788

/// and LoOpFlags to the target MO flags.

2789

static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,

2790

unsigned &HiOpFlags, unsigned &LoOpFlags,

2791

const GlobalValue *GV = nullptr) {

2792

HiOpFlags = PPCII::MO_HA;

2793

LoOpFlags = PPCII::MO_LO;

2794

2795

// Don't use the pic base if not in PIC relocation model.

2796

if (IsPIC) {

2797

HiOpFlags |= PPCII::MO_PIC_FLAG;

2798

LoOpFlags |= PPCII::MO_PIC_FLAG;

2799

}

2800

}

2801

2802

static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,

2803

SelectionDAG &DAG) {

2804

SDLoc DL(HiPart);

2805

EVT PtrVT = HiPart.getValueType();

2806

SDValue Zero = DAG.getConstant(0, DL, PtrVT);

2807

2808

SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);

2809

SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);

2810

2811

// With PIC, the first instruction is actually "GR+hi(&G)".

2812

if (isPIC)

2813

Hi = DAG.getNode(ISD::ADD, DL, PtrVT,

2814

DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);

2815

2816

// Generate non-pic code that has direct accesses to the constant pool.

2817

// The address of the global is just (hi(&g)+lo(&g)).

2818

return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);

2819

}

2820

2821

static void setUsesTOCBasePtr(MachineFunction &MF) {

2822

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

2823

FuncInfo->setUsesTOCBasePtr();

2824

}

2825

2826

static void setUsesTOCBasePtr(SelectionDAG &DAG) {

2827

setUsesTOCBasePtr(DAG.getMachineFunction());

2828

}

2829

2830

SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,

2831

SDValue GA) const {

2832

const bool Is64Bit = Subtarget.isPPC64();

2833

EVT VT = Is64Bit ? MVT::i64 : MVT::i32;

2834

SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)

2835

: Subtarget.isAIXABI()

2836

? DAG.getRegister(PPC::R2, VT)

2837

: DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);

2838

SDValue Ops[] = { GA, Reg };

2839

return DAG.getMemIntrinsicNode(

2840

PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,

2841

MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,

2842

MachineMemOperand::MOLoad);

2843

}

2844

2845

SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,

2846

SelectionDAG &DAG) const {

2847

EVT PtrVT = Op.getValueType();

2848

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

2849

const Constant *C = CP->getConstVal();

2850

2851

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2852

// The actual address of the GlobalValue is stored in the TOC.

2853

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2854

if (Subtarget.isUsingPCRelativeCalls()) {

2855

SDLoc DL(CP);

2856

EVT Ty = getPointerTy(DAG.getDataLayout());

2857

SDValue ConstPool = DAG.getTargetConstantPool(

2858

C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);

2859

return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);

2860

}

2861

setUsesTOCBasePtr(DAG);

2862

SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);

2863

return getTOCEntry(DAG, SDLoc(CP), GA);

2864

}

2865

2866

unsigned MOHiFlag, MOLoFlag;

2867

bool IsPIC = isPositionIndependent();

2868

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

2869

2870

if (IsPIC && Subtarget.isSVR4ABI()) {

2871

SDValue GA =

2872

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);

2873

return getTOCEntry(DAG, SDLoc(CP), GA);

2874

}

2875

2876

SDValue CPIHi =

2877

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);

2878

SDValue CPILo =

2879

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);

2880

return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);

2881

}

2882

2883

// For 64-bit PowerPC, prefer the more compact relative encodings.

2884

// This trades 32 bits per jump table entry for one or two instructions

2885

// on the jump site.

2886

unsigned PPCTargetLowering::getJumpTableEncoding() const {

2887

if (isJumpTableRelative())

2888

return MachineJumpTableInfo::EK_LabelDifference32;

2889

2890

return TargetLowering::getJumpTableEncoding();

2891

}

2892

2893

bool PPCTargetLowering::isJumpTableRelative() const {

2894

if (UseAbsoluteJumpTables)

2895

return false;

2896

if (Subtarget.isPPC64() || Subtarget.isAIXABI())

2897

return true;

2898

return TargetLowering::isJumpTableRelative();

2899

}

2900

2901

SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,

2902

SelectionDAG &DAG) const {

2903

if (!Subtarget.isPPC64() || Subtarget.isAIXABI())

2904

return TargetLowering::getPICJumpTableRelocBase(Table, DAG);

2905

2906

switch (getTargetMachine().getCodeModel()) {

2907

case CodeModel::Small:

2908

case CodeModel::Medium:

2909

return TargetLowering::getPICJumpTableRelocBase(Table, DAG);

2910

default:

2911

return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),

2912

getPointerTy(DAG.getDataLayout()));

2913

}

2914

}

2915

2916

const MCExpr *

2917

PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,

2918

unsigned JTI,

2919

MCContext &Ctx) const {

2920

if (!Subtarget.isPPC64() || Subtarget.isAIXABI())

2921

return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

2922

2923

switch (getTargetMachine().getCodeModel()) {

2924

case CodeModel::Small:

2925

case CodeModel::Medium:

2926

return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

2927

default:

2928

return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);

2929

}

2930

}

2931

2932

SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {

2933

EVT PtrVT = Op.getValueType();

2934

JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

2935

2936

// isUsingPCRelativeCalls() returns true when PCRelative is enabled

2937

if (Subtarget.isUsingPCRelativeCalls()) {

2938

SDLoc DL(JT);

2939

EVT Ty = getPointerTy(DAG.getDataLayout());

2940

SDValue GA =

2941

DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);

2942

SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

2943

return MatAddr;

2944

}

2945

2946

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2947

// The actual address of the GlobalValue is stored in the TOC.

2948

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2949

setUsesTOCBasePtr(DAG);

2950

SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);

2951

return getTOCEntry(DAG, SDLoc(JT), GA);

2952

}

2953

2954

unsigned MOHiFlag, MOLoFlag;

2955

bool IsPIC = isPositionIndependent();

2956

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

2957

2958

if (IsPIC && Subtarget.isSVR4ABI()) {

2959

SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,

2960

PPCII::MO_PIC_FLAG);

2961

return getTOCEntry(DAG, SDLoc(GA), GA);

2962

}

2963

2964

SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);

2965

SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);

2966

return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);

2967

}

2968

2969

SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,

2970

SelectionDAG &DAG) const {

2971

EVT PtrVT = Op.getValueType();

2972

BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);

2973

const BlockAddress *BA = BASDN->getBlockAddress();

2974

2975

// isUsingPCRelativeCalls() returns true when PCRelative is enabled

2976

if (Subtarget.isUsingPCRelativeCalls()) {

2977

SDLoc DL(BASDN);

2978

EVT Ty = getPointerTy(DAG.getDataLayout());

2979

SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),

2980

PPCII::MO_PCREL_FLAG);

2981

SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

2982

return MatAddr;

2983

}

2984

2985

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2986

// The actual BlockAddress is stored in the TOC.

2987

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2988

setUsesTOCBasePtr(DAG);

2989

SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());

2990

return getTOCEntry(DAG, SDLoc(BASDN), GA);

2991

}

2992

2993

// 32-bit position-independent ELF stores the BlockAddress in the .got.

2994

if (Subtarget.is32BitELFABI() && isPositionIndependent())

2995

return getTOCEntry(

2996

DAG, SDLoc(BASDN),

2997

DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));

2998

2999

unsigned MOHiFlag, MOLoFlag;

3000

bool IsPIC = isPositionIndependent();

3001

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

3002

SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);

3003

SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);

3004

return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);

3005

}

3006

3007

SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,

3008

SelectionDAG &DAG) const {

3009

// FIXME: TLS addresses currently use medium model code sequences,

3010

// which is the most useful form. Eventually support for small and

3011

// large models could be added if users need it, at the cost of

3012

// additional complexity.

3013

GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

3014

if (DAG.getTarget().useEmulatedTLS())

3015

return LowerToTLSEmulatedModel(GA, DAG);

3016

3017

SDLoc dl(GA);

3018

const GlobalValue *GV = GA->getGlobal();

3019

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3020

bool is64bit = Subtarget.isPPC64();

3021

const Module *M = DAG.getMachineFunction().getFunction().getParent();

3022

PICLevel::Level picLevel = M->getPICLevel();

3023

3024

const TargetMachine &TM = getTargetMachine();

3025

TLSModel::Model Model = TM.getTLSModel(GV);

3026

3027

if (Model == TLSModel::LocalExec) {

3028

if (Subtarget.isUsingPCRelativeCalls()) {

3029

SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);

3030

SDValue TGA = DAG.getTargetGlobalAddress(

3031

GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));

3032

SDValue MatAddr =

3033

DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);

3034

return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);

3035

}

3036

3037

SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3038

PPCII::MO_TPREL_HA);

3039

SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3040

PPCII::MO_TPREL_LO);

3041

SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)

3042

: DAG.getRegister(PPC::R2, MVT::i32);

3043

3044

SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);

3045

return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);

3046

}

3047

3048

if (Model == TLSModel::InitialExec) {

3049

bool IsPCRel = Subtarget.isUsingPCRelativeCalls();

3050

SDValue TGA = DAG.getTargetGlobalAddress(

3051

GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);

3052

SDValue TGATLS = DAG.getTargetGlobalAddress(

3053

GV, dl, PtrVT, 0,

3054

IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);

3055

SDValue TPOffset;

3056

if (IsPCRel) {

3057

SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);

3058

TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,

3059

MachinePointerInfo());

3060

} else {

3061

SDValue GOTPtr;

3062

if (is64bit) {

3063

setUsesTOCBasePtr(DAG);

3064

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3065

GOTPtr =

3066

DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);

3067

} else {

3068

if (!TM.isPositionIndependent())

3069

GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);

3070

else if (picLevel == PICLevel::SmallPIC)

3071

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3072

else

3073

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3074

}

3075

TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);

3076

}

3077

return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);

3078

}

3079

3080

if (Model == TLSModel::GeneralDynamic) {

3081

if (Subtarget.isUsingPCRelativeCalls()) {

3082

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3083

PPCII::MO_GOT_TLSGD_PCREL_FLAG);

3084

return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);

3085

}

3086

3087

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);

3088

SDValue GOTPtr;

3089

if (is64bit) {

3090

setUsesTOCBasePtr(DAG);

3091

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3092

GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,

3093

GOTReg, TGA);

3094

} else {

3095

if (picLevel == PICLevel::SmallPIC)

3096

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3097

else

3098

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3099

}

3100

return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,

3101

GOTPtr, TGA, TGA);

3102

}

3103

3104

if (Model == TLSModel::LocalDynamic) {

3105

if (Subtarget.isUsingPCRelativeCalls()) {

3106

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3107

PPCII::MO_GOT_TLSLD_PCREL_FLAG);

3108

SDValue MatPCRel =

3109

DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);

3110

return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);

3111

}

3112

3113

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);

3114

SDValue GOTPtr;

3115

if (is64bit) {

3116

setUsesTOCBasePtr(DAG);

3117

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3118

GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,

3119

GOTReg, TGA);

3120

} else {

3121

if (picLevel == PICLevel::SmallPIC)

3122

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3123

else

3124

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3125

}

3126

SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,

3127

PtrVT, GOTPtr, TGA, TGA);

3128

SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,

3129

PtrVT, TLSAddr, TGA);

3130

return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);

3131

}

3132

3133

llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3133);

3134

}

3135

3136

SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

3137

SelectionDAG &DAG) const {

3138

EVT PtrVT = Op.getValueType();

3139

GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);

3140

SDLoc DL(GSDN);

3141

const GlobalValue *GV = GSDN->getGlobal();

3142

3143

// 64-bit SVR4 ABI & AIX ABI code is always position-independent.

3144

// The actual address of the GlobalValue is stored in the TOC.

3145

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

3146

if (Subtarget.isUsingPCRelativeCalls()) {

3147

EVT Ty = getPointerTy(DAG.getDataLayout());

3148

if (isAccessedAsGotIndirect(Op)) {

3149

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),

3150

PPCII::MO_PCREL_FLAG |

3151

PPCII::MO_GOT_FLAG);

3152

SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

3153

SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,

3154

MachinePointerInfo());

3155

return Load;

3156

} else {

3157

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),

3158

PPCII::MO_PCREL_FLAG);

3159

return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

3160

}

3161

}

3162

setUsesTOCBasePtr(DAG);

3163

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());

3164

return getTOCEntry(DAG, DL, GA);

3165

}

3166

3167

unsigned MOHiFlag, MOLoFlag;

3168

bool IsPIC = isPositionIndependent();

3169

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);

3170

3171

if (IsPIC && Subtarget.isSVR4ABI()) {

3172

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,

3173

GSDN->getOffset(),

3174

PPCII::MO_PIC_FLAG);

3175

return getTOCEntry(DAG, DL, GA);

3176

}

3177

3178

SDValue GAHi =

3179

DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);

3180

SDValue GALo =

3181

DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);

3182

3183

return LowerLabelRef(GAHi, GALo, IsPIC, DAG);

3184

}

3185

3186

SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

3187

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

3188

SDLoc dl(Op);

3189

3190

if (Op.getValueType() == MVT::v2i64) {

3191

// When the operands themselves are v2i64 values, we need to do something

3192

// special because VSX has no underlying comparison operations for these.

3193

if (Op.getOperand(0).getValueType() == MVT::v2i64) {

3194

// Equality can be handled by casting to the legal type for Altivec

3195

// comparisons, everything else needs to be expanded.

3196

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

3197

return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,

3198

DAG.getSetCC(dl, MVT::v4i32,

3199

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),

3200

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),

3201

CC));

3202

}

3203

3204

return SDValue();

3205

}

3206

3207

// We handle most of these in the usual way.

3208

return Op;

3209

}

3210

3211

// If we're comparing for equality to zero, expose the fact that this is

3212

// implemented as a ctlz/srl pair on ppc, so that the dag combiner can

3213

// fold the new nodes.

3214

if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))

3215

return V;

3216

3217

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

3218

// Leave comparisons against 0 and -1 alone for now, since they're usually

3219

// optimized. FIXME: revisit this when we can custom lower all setcc

3220

// optimizations.

3221

if (C->isAllOnesValue() || C->isNullValue())

3222

return SDValue();

3223

}

3224

3225

// If we have an integer seteq/setne, turn it into a compare against zero

3226

// by xor'ing the rhs with the lhs, which is faster than setting a

3227

// condition register, reading it back out, and masking the correct bit. The

3228

// normal approach here uses sub to do this instead of xor. Using xor exposes

3229

// the result to other bit-twiddling opportunities.

3230

EVT LHSVT = Op.getOperand(0).getValueType();

3231

if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

3232

EVT VT = Op.getValueType();

3233

SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),

3234

Op.getOperand(1));

3235

return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);

3236

}

3237

return SDValue();

3238

}

3239

3240

SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {

3241

SDNode *Node = Op.getNode();

3242

EVT VT = Node->getValueType(0);

3243

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3244

SDValue InChain = Node->getOperand(0);

3245

SDValue VAListPtr = Node->getOperand(1);

3246

const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();

3247

SDLoc dl(Node);

3248

3249

assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3249, __PRETTY_FUNCTION__));

3250

3251

// gpr_index

3252

SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,

3253

VAListPtr, MachinePointerInfo(SV), MVT::i8);

3254

InChain = GprIndex.getValue(1);

3255

3256

if (VT == MVT::i64) {

3257

// Check if GprIndex is even

3258

SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,

3259

DAG.getConstant(1, dl, MVT::i32));

3260

SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,

3261

DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);

3262

SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,

3263

DAG.getConstant(1, dl, MVT::i32));

3264

// Align GprIndex to be even if it isn't

3265

GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,

3266

GprIndex);

3267

}

3268

3269

// fpr index is 1 byte after gpr

3270

SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3271

DAG.getConstant(1, dl, MVT::i32));

3272

3273

// fpr

3274

SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,

3275

FprPtr, MachinePointerInfo(SV), MVT::i8);

3276

InChain = FprIndex.getValue(1);

3277

3278

SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3279

DAG.getConstant(8, dl, MVT::i32));

3280

3281

SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3282

DAG.getConstant(4, dl, MVT::i32));

3283

3284

// areas

3285

SDValue OverflowArea =

3286

DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());

3287

InChain = OverflowArea.getValue(1);

3288

3289

SDValue RegSaveArea =

3290

DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());

3291

InChain = RegSaveArea.getValue(1);

3292

3293

// select overflow_area if index > 8

3294

SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,

3295

DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);

3296

3297

// adjustment constant gpr_index * 4/8

3298

SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,

3299

VT.isInteger() ? GprIndex : FprIndex,

3300

DAG.getConstant(VT.isInteger() ? 4 : 8, dl,

3301

MVT::i32));

3302

3303

// OurReg = RegSaveArea + RegConstant

3304

SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,

3305

RegConstant);

3306

3307

// Floating types are 32 bytes into RegSaveArea

3308

if (VT.isFloatingPoint())

3309

OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,

3310

DAG.getConstant(32, dl, MVT::i32));

3311

3312

// increase {f,g}pr_index by 1 (or 2 if VT is i64)

3313

SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,

3314

VT.isInteger() ? GprIndex : FprIndex,

3315

DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,

3316

MVT::i32));

3317

3318

InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,

3319

VT.isInteger() ? VAListPtr : FprPtr,

3320

MachinePointerInfo(SV), MVT::i8);

3321

3322

// determine if we should load from reg_save_area or overflow_area

3323

SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);

3324

3325

// increase overflow_area by 4/8 if gpr/fpr > 8

3326

SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,

3327

DAG.getConstant(VT.isInteger() ? 4 : 8,

3328

dl, MVT::i32));

3329

3330

OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,

3331

OverflowAreaPlusN);

3332

3333

InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,

3334

MachinePointerInfo(), MVT::i32);

3335

3336

return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());

3337

}

3338

3339

SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {

3340

assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3340, __PRETTY_FUNCTION__));

3341

3342

// We have to copy the entire va_list struct:

3343

// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte

3344

return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),

3345

DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),

3346

false, true, false, MachinePointerInfo(),

3347

MachinePointerInfo());

3348

}

3349

3350

SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,

3351

SelectionDAG &DAG) const {

3352

if (Subtarget.isAIXABI())

3353

report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");

3354

3355

return Op.getOperand(0);

3356

}

3357

3358

SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,

3359

SelectionDAG &DAG) const {

3360

if (Subtarget.isAIXABI())

3361

report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");

3362

3363

SDValue Chain = Op.getOperand(0);

3364

SDValue Trmp = Op.getOperand(1); // trampoline

3365

SDValue FPtr = Op.getOperand(2); // nested function

3366

SDValue Nest = Op.getOperand(3); // 'nest' parameter value

3367

SDLoc dl(Op);

3368

3369

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3370

bool isPPC64 = (PtrVT == MVT::i64);

3371

Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

3372

3373

TargetLowering::ArgListTy Args;

3374

TargetLowering::ArgListEntry Entry;

3375

3376

Entry.Ty = IntPtrTy;

3377

Entry.Node = Trmp; Args.push_back(Entry);

3378

3379

// TrampSize == (isPPC64 ? 48 : 40);

3380

Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,

3381

isPPC64 ? MVT::i64 : MVT::i32);

3382

Args.push_back(Entry);

3383

3384

Entry.Node = FPtr; Args.push_back(Entry);

3385

Entry.Node = Nest; Args.push_back(Entry);

3386

3387

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)

3388

TargetLowering::CallLoweringInfo CLI(DAG);

3389

CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(

3390

CallingConv::C, Type::getVoidTy(*DAG.getContext()),

3391

DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

3392

3393

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

3394

return CallResult.second;

3395

}

3396

3397

SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {

3398

MachineFunction &MF = DAG.getMachineFunction();

3399

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3400

EVT PtrVT = getPointerTy(MF.getDataLayout());

3401

3402

SDLoc dl(Op);

3403

3404

if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {

3405

// vastart just stores the address of the VarArgsFrameIndex slot into the

3406

// memory location argument.

3407

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

3408

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

3409

return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),

3410

MachinePointerInfo(SV));

3411

}

3412

3413

// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.

3414

// We suppose the given va_list is already allocated.

3415

//

3416

// typedef struct {

3417

// char gpr; /* index into the array of 8 GPRs

3418

// * stored in the register save area

3419

// * gpr=0 corresponds to r3,

3420

// * gpr=1 to r4, etc.

3421

// */

3422

// char fpr; /* index into the array of 8 FPRs

3423

// * stored in the register save area

3424

// * fpr=0 corresponds to f1,

3425

// * fpr=1 to f2, etc.

3426

// */

3427

// char *overflow_arg_area;

3428

// /* location on stack that holds

3429

// * the next overflow argument

3430

// */

3431

// char *reg_save_area;

3432

// /* where r3:r10 and f1:f8 (if saved)

3433

// * are stored

3434

// */

3435

// } va_list[1];

3436

3437

SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);

3438

SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);

3439

SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),

3440

PtrVT);

3441

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),

3442

PtrVT);

3443

3444

uint64_t FrameOffset = PtrVT.getSizeInBits()/8;

3445

SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);

3446

3447

uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;

3448

SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);

3449

3450

uint64_t FPROffset = 1;

3451

SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);

3452

3453

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

3454

3455

// Store first byte : number of int regs

3456

SDValue firstStore =

3457

DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),

3458

MachinePointerInfo(SV), MVT::i8);

3459

uint64_t nextOffset = FPROffset;

3460

SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),

3461

ConstFPROffset);

3462

3463

// Store second byte : number of float regs

3464

SDValue secondStore =

3465

DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,

3466

MachinePointerInfo(SV, nextOffset), MVT::i8);

3467

nextOffset += StackOffset;

3468

nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);

3469

3470

// Store second word : arguments given on stack

3471

SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,

3472

MachinePointerInfo(SV, nextOffset));

3473

nextOffset += FrameOffset;

3474

nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);

3475

3476

// Store third word : arguments given in registers

3477

return DAG.getStore(thirdStore, dl, FR, nextPtr,

3478

MachinePointerInfo(SV, nextOffset));

3479

}

3480

3481

/// FPR - The set of FP registers that should be allocated for arguments

3482

/// on Darwin and AIX.

3483

static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,

3484

PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,

3485

PPC::F11, PPC::F12, PPC::F13};

3486

3487

/// CalculateStackSlotSize - Calculates the size reserved for this argument on

3488

/// the stack.

3489

static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,

3490

unsigned PtrByteSize) {

3491

unsigned ArgSize = ArgVT.getStoreSize();

3492

if (Flags.isByVal())

3493

ArgSize = Flags.getByValSize();

3494

3495

// Round up to multiples of the pointer size, except for array members,

3496

// which are always packed.

3497

if (!Flags.isInConsecutiveRegs())

3498

ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

3499

3500

return ArgSize;

3501

}

3502

3503

/// CalculateStackSlotAlignment - Calculates the alignment of this argument

3504

/// on the stack.

3505

static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,

3506

ISD::ArgFlagsTy Flags,

3507

unsigned PtrByteSize) {

3508

Align Alignment(PtrByteSize);

3509

3510

// Altivec parameters are padded to a 16 byte boundary.

3511

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

3512

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

3513

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||

3514

ArgVT == MVT::v1i128 || ArgVT == MVT::f128)

3515

Alignment = Align(16);

3516

3517

// ByVal parameters are aligned as requested.

3518

if (Flags.isByVal()) {

3519

auto BVAlign = Flags.getNonZeroByValAlign();

3520

if (BVAlign > PtrByteSize) {

3521

if (BVAlign.value() % PtrByteSize != 0)

3522

llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523)

3523

"ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523);

3524

3525

Alignment = BVAlign;

3526

}

3527

}

3528

3529

// Array members are always packed to their original alignment.

3530

if (Flags.isInConsecutiveRegs()) {

3531

// If the array member was split into multiple registers, the first

3532

// needs to be aligned to the size of the full type. (Except for

3533

// ppcf128, which is only aligned as its f64 components.)

3534

if (Flags.isSplit() && OrigVT != MVT::ppcf128)

3535

Alignment = Align(OrigVT.getStoreSize());

3536

else

3537

Alignment = Align(ArgVT.getStoreSize());

3538

}

3539

3540

return Alignment;

3541

}

3542

3543

/// CalculateStackSlotUsed - Return whether this argument will use its

3544

/// stack slot (instead of being passed in registers). ArgOffset,

3545

/// AvailableFPRs, and AvailableVRs must hold the current argument

3546

/// position, and will be updated to account for this argument.

3547

static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,

3548

unsigned PtrByteSize, unsigned LinkageSize,

3549

unsigned ParamAreaSize, unsigned &ArgOffset,

3550

unsigned &AvailableFPRs,

3551

unsigned &AvailableVRs) {

3552

bool UseMemory = false;

3553

3554

// Respect alignment of argument on the stack.

3555

Align Alignment =

3556

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

3557

ArgOffset = alignTo(ArgOffset, Alignment);

3558

// If there's no space left in the argument save area, we must

3559

// use memory (this check also catches zero-sized arguments).

3560

if (ArgOffset >= LinkageSize + ParamAreaSize)

3561

UseMemory = true;

3562

3563

// Allocate argument on the stack.

3564

ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

3565

if (Flags.isInConsecutiveRegsLast())

3566

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

3567

// If we overran the argument save area, we must use memory

3568

// (this check catches arguments passed partially in memory)

3569

if (ArgOffset > LinkageSize + ParamAreaSize)

3570

UseMemory = true;

3571

3572

// However, if the argument is actually passed in an FPR or a VR,

3573

// we don't use memory after all.

3574

if (!Flags.isByVal()) {

3575

if (ArgVT == MVT::f32 || ArgVT == MVT::f64)

3576

if (AvailableFPRs > 0) {

3577

--AvailableFPRs;

3578

return false;

3579

}

3580

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

3581

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

3582

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||

3583

ArgVT == MVT::v1i128 || ArgVT == MVT::f128)

3584

if (AvailableVRs > 0) {

3585

--AvailableVRs;

3586

return false;

3587

}

3588

}

3589

3590

return UseMemory;

3591

}

3592

3593

/// EnsureStackAlignment - Round stack frame size up from NumBytes to

3594

/// ensure minimum alignment required for target.

3595

static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,

3596

unsigned NumBytes) {

3597

return alignTo(NumBytes, Lowering->getStackAlign());

3598

}

3599

3600

SDValue PPCTargetLowering::LowerFormalArguments(

3601

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3602

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3603

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3604

if (Subtarget.isAIXABI())

3605

return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,

3606

InVals);

3607

if (Subtarget.is64BitELFABI())

3608

return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,

3609

InVals);

3610

if (Subtarget.is32BitELFABI())

3611

return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,

3612

InVals);

3613

3614

return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,

3615

InVals);

3616

}

3617

3618

SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(

3619

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3620

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3621

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3622

3623

// 32-bit SVR4 ABI Stack Frame Layout:

3624

// +-----------------------------------+

3625

// +--> | Back chain |

3626

// | +-----------------------------------+

3627

// | | Floating-point register save area |

3628

// | +-----------------------------------+

3629

// | | General register save area |

3630

// | +-----------------------------------+

3631

// | | CR save word |

3632

// | +-----------------------------------+

3633

// | | VRSAVE save word |

3634

// | +-----------------------------------+

3635

// | | Alignment padding |

3636

// | +-----------------------------------+

3637

// | | Vector register save area |

3638

// | +-----------------------------------+

3639

// | | Local variable space |

3640

// | +-----------------------------------+

3641

// | | Parameter list area |

3642

// | +-----------------------------------+

3643

// | | LR save word |

3644

// | +-----------------------------------+

3645

// SP--> +--- | Back chain |

3646

// +-----------------------------------+

3647

//

3648

// Specifications:

3649

// System V Application Binary Interface PowerPC Processor Supplement

3650

// AltiVec Technology Programming Interface Manual

3651

3652

MachineFunction &MF = DAG.getMachineFunction();

3653

MachineFrameInfo &MFI = MF.getFrameInfo();

3654

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3655

3656

EVT PtrVT = getPointerTy(MF.getDataLayout());

3657

// Potential tail calls could cause overwriting of argument stack slots.

3658

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

3659

(CallConv == CallingConv::Fast));

3660

const Align PtrAlign(4);

3661

3662

// Assign locations to all of the incoming arguments.

3663

SmallVector<CCValAssign, 16> ArgLocs;

3664

PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

3665

*DAG.getContext());

3666

3667

// Reserve space for the linkage area on the stack.

3668

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

3669

CCInfo.AllocateStack(LinkageSize, PtrAlign);

3670

if (useSoftFloat())

3671

CCInfo.PreAnalyzeFormalArguments(Ins);

3672

3673

CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);

3674

CCInfo.clearWasPPCF128();

3675

3676

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

3677

CCValAssign &VA = ArgLocs[i];

3678

3679

// Arguments stored in registers.

3680

if (VA.isRegLoc()) {

3681

const TargetRegisterClass *RC;

3682

EVT ValVT = VA.getValVT();

3683

3684

switch (ValVT.getSimpleVT().SimpleTy) {

3685

default:

3686

llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3686);

3687

case MVT::i1:

3688

case MVT::i32:

3689

RC = &PPC::GPRCRegClass;

3690

break;

3691

case MVT::f32:

3692

if (Subtarget.hasP8Vector())

3693

RC = &PPC::VSSRCRegClass;

3694

else if (Subtarget.hasSPE())

3695

RC = &PPC::GPRCRegClass;

3696

else

3697

RC = &PPC::F4RCRegClass;

3698

break;

3699

case MVT::f64:

3700

if (Subtarget.hasVSX())

3701

RC = &PPC::VSFRCRegClass;

3702

else if (Subtarget.hasSPE())

3703

// SPE passes doubles in GPR pairs.

3704

RC = &PPC::GPRCRegClass;

3705

else

3706

RC = &PPC::F8RCRegClass;

3707

break;

3708

case MVT::v16i8:

3709

case MVT::v8i16:

3710

case MVT::v4i32:

3711

RC = &PPC::VRRCRegClass;

3712

break;

3713

case MVT::v4f32:

3714

RC = &PPC::VRRCRegClass;

3715

break;

3716

case MVT::v2f64:

3717

case MVT::v2i64:

3718

RC = &PPC::VRRCRegClass;

3719

break;

3720

}

3721

3722

SDValue ArgValue;

3723

// Transform the arguments stored in physical registers into

3724

// virtual ones.

3725

if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {

3726

assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3726, __PRETTY_FUNCTION__));

3727

unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);

3728

unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);

3729

SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);

3730

SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);

3731

if (!Subtarget.isLittleEndian())

3732

std::swap (ArgValueLo, ArgValueHi);

3733

ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,

3734

ArgValueHi);

3735

} else {

3736

unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);

3737

ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,

3738

ValVT == MVT::i1 ? MVT::i32 : ValVT);

3739

if (ValVT == MVT::i1)

3740

ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);

3741

}

3742

3743

InVals.push_back(ArgValue);

3744

} else {

3745

// Argument stored in memory.

3746

assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3746, __PRETTY_FUNCTION__));

3747

3748

// Get the extended size of the argument type in stack

3749

unsigned ArgSize = VA.getLocVT().getStoreSize();

3750

// Get the actual size of the argument type

3751

unsigned ObjSize = VA.getValVT().getStoreSize();

3752

unsigned ArgOffset = VA.getLocMemOffset();

3753

// Stack objects in PPC32 are right justified.

3754

ArgOffset += ArgSize - ObjSize;

3755

int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);

3756

3757

// Create load nodes to retrieve arguments from the stack.

3758

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

3759

InVals.push_back(

3760

DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));

3761

}

3762

}

3763

3764

// Assign locations to all of the incoming aggregate by value arguments.

3765

// Aggregates passed by value are stored in the local variable space of the

3766

// caller's stack frame, right above the parameter list area.

3767

SmallVector<CCValAssign, 16> ByValArgLocs;

3768

CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),

3769

ByValArgLocs, *DAG.getContext());

3770

3771

// Reserve stack space for the allocations in CCInfo.

3772

CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);

3773

3774

CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);

3775

3776

// Area that is at least reserved in the caller of this function.

3777

unsigned MinReservedArea = CCByValInfo.getNextStackOffset();

3778

MinReservedArea = std::max(MinReservedArea, LinkageSize);

3779

3780

// Set the size that is at least reserved in caller of this function. Tail

3781

// call optimized function's reserved stack space needs to be aligned so that

3782

// taking the difference between two stack areas will result in an aligned

3783

// stack.

3784

MinReservedArea =

3785

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

3786

FuncInfo->setMinReservedArea(MinReservedArea);

3787

3788

SmallVector<SDValue, 8> MemOps;

3789

3790

// If the function takes variable number of arguments, make a frame index for

3791

// the start of the first vararg value... for expansion of llvm.va_start.

3792

if (isVarArg) {

3793

static const MCPhysReg GPArgRegs[] = {

3794

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

3795

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

3796

};

3797

const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);

3798

3799

static const MCPhysReg FPArgRegs[] = {

3800

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

3801

PPC::F8

3802

};

3803

unsigned NumFPArgRegs = array_lengthof(FPArgRegs);

3804

3805

if (useSoftFloat() || hasSPE())

3806

NumFPArgRegs = 0;

3807

3808

FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));

3809

FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));

3810

3811

// Make room for NumGPArgRegs and NumFPArgRegs.

3812

int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +

3813

NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;

3814

3815

FuncInfo->setVarArgsStackOffset(

3816

MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,

3817

CCInfo.getNextStackOffset(), true));

3818

3819

FuncInfo->setVarArgsFrameIndex(

3820

MFI.CreateStackObject(Depth, Align(8), false));

3821

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

3822

3823

// The fixed integer arguments of a variadic function are stored to the

3824

// VarArgsFrameIndex on the stack so that they may be loaded by

3825

// dereferencing the result of va_next.

3826

for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {

3827

// Get an existing live-in vreg, or add a new one.

3828

unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);

3829

if (!VReg)

3830

VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);

3831

3832

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

3833

SDValue Store =

3834

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

3835

MemOps.push_back(Store);

3836

// Increment the address by four for the next argument to store

3837

SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);

3838

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

3839

}

3840

3841

// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6

3842

// is set.

3843

// The double arguments are stored to the VarArgsFrameIndex

3844

// on the stack.

3845

for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {

3846

// Get an existing live-in vreg, or add a new one.

3847

unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);

3848

if (!VReg)

3849

VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);

3850

3851

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);

3852

SDValue Store =

3853

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

3854

MemOps.push_back(Store);

3855

// Increment the address by eight for the next argument to store

3856

SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,

3857

PtrVT);

3858

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

3859

}

3860

}

3861

3862

if (!MemOps.empty())

3863

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

3864

3865

return Chain;

3866

}

3867

3868

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

3869

// value to MVT::i64 and then truncate to the correct register size.

3870

SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,

3871

EVT ObjectVT, SelectionDAG &DAG,

3872

SDValue ArgVal,

3873

const SDLoc &dl) const {

3874

if (Flags.isSExt())

3875

ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,

3876

DAG.getValueType(ObjectVT));

3877

else if (Flags.isZExt())

3878

ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,

3879

DAG.getValueType(ObjectVT));

3880

3881

return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);

3882

}

3883

3884

SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(

3885

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3886

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3887

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3888

// TODO: add description of PPC stack frame format, or at least some docs.

3889

//

3890

bool isELFv2ABI = Subtarget.isELFv2ABI();

3891

bool isLittleEndian = Subtarget.isLittleEndian();

3892

MachineFunction &MF = DAG.getMachineFunction();

3893

MachineFrameInfo &MFI = MF.getFrameInfo();

3894

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3895

3896

assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))

3897

"fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__));

3898

3899

EVT PtrVT = getPointerTy(MF.getDataLayout());

3900

// Potential tail calls could cause overwriting of argument stack slots.

3901

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

3902

(CallConv == CallingConv::Fast));

3903

unsigned PtrByteSize = 8;

3904

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

3905

3906

static const MCPhysReg GPR[] = {

3907

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

3908

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

3909

};

3910

static const MCPhysReg VR[] = {

3911

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

3912

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

3913

};

3914

3915

const unsigned Num_GPR_Regs = array_lengthof(GPR);

3916

const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;

3917

const unsigned Num_VR_Regs = array_lengthof(VR);

3918

3919

// Do a first pass over the arguments to determine whether the ABI

3920

// guarantees that our caller has allocated the parameter save area

3921

// on its stack frame. In the ELFv1 ABI, this is always the case;

3922

// in the ELFv2 ABI, it is true if this is a vararg function or if

3923

// any parameter is located in a stack slot.

3924

3925

bool HasParameterArea = !isELFv2ABI || isVarArg;

3926

unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;

3927

unsigned NumBytes = LinkageSize;

3928

unsigned AvailableFPRs = Num_FPR_Regs;

3929

unsigned AvailableVRs = Num_VR_Regs;

3930

for (unsigned i = 0, e = Ins.size(); i != e; ++i) {

3931

if (Ins[i].Flags.isNest())

3932

continue;

3933

3934

if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,

3935

PtrByteSize, LinkageSize, ParamAreaSize,

3936

NumBytes, AvailableFPRs, AvailableVRs))

3937

HasParameterArea = true;

3938

}

3939

3940

// Add DAG nodes to load the arguments or copy them out of registers. On

3941

// entry to a function on PPC, the arguments start after the linkage area,

3942

// although the first ones are often in registers.

3943

3944

unsigned ArgOffset = LinkageSize;

3945

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

3946

SmallVector<SDValue, 8> MemOps;

3947

Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();

3948

unsigned CurArgIdx = 0;

3949

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {

3950

SDValue ArgVal;

3951

bool needsLoad = false;

3952

EVT ObjectVT = Ins[ArgNo].VT;

3953

EVT OrigVT = Ins[ArgNo].ArgVT;

3954

unsigned ObjSize = ObjectVT.getStoreSize();

3955

unsigned ArgSize = ObjSize;

3956

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

3957

if (Ins[ArgNo].isOrigArg()) {

3958

std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);

3959

CurArgIdx = Ins[ArgNo].getOrigArgIndex();

3960

}

3961

// We re-align the argument offset for each argument, except when using the

3962

// fast calling convention, when we need to make sure we do that only when

3963

// we'll actually use a stack slot.

3964

unsigned CurArgOffset;

3965

Align Alignment;

3966

auto ComputeArgOffset = [&]() {

3967

/* Respect alignment of argument on the stack. */

3968

Alignment =

3969

CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);

3970

ArgOffset = alignTo(ArgOffset, Alignment);

3971

CurArgOffset = ArgOffset;

3972

};

3973

3974

if (CallConv != CallingConv::Fast) {

3975

ComputeArgOffset();

3976

3977

/* Compute GPR index associated with argument offset. */

3978

GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

3979

GPR_idx = std::min(GPR_idx, Num_GPR_Regs);

3980

}

3981

3982

// FIXME the codegen can be much improved in some cases.

3983

// We do not have to keep everything in memory.

3984

if (Flags.isByVal()) {

3985

assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__));

3986

3987

if (CallConv == CallingConv::Fast)

3988

ComputeArgOffset();

3989

3990

// ObjSize is the true size, ArgSize rounded up to multiple of registers.

3991

ObjSize = Flags.getByValSize();

3992

ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

3993

// Empty aggregate parameters do not take up registers. Examples:

3994

// struct { } a;

3995

// union { } b;

3996

// int c[0];

3997

// etc. However, we have to provide a place-holder in InVals, so

3998

// pretend we have an 8-byte item at the current address for that

3999

// purpose.

4000

if (!ObjSize) {

4001

int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);

4002

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4003

InVals.push_back(FIN);

4004

continue;

4005

}

4006

4007

// Create a stack object covering all stack doublewords occupied

4008

// by the argument. If the argument is (fully or partially) on

4009

// the stack, or if the argument is fully in registers but the

4010

// caller has allocated the parameter save anyway, we can refer

4011

// directly to the caller's stack frame. Otherwise, create a

4012

// local copy in our own frame.

4013

int FI;

4014

if (HasParameterArea ||

4015

ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)

4016

FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);

4017

else

4018

FI = MFI.CreateStackObject(ArgSize, Alignment, false);

4019

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4020

4021

// Handle aggregates smaller than 8 bytes.

4022

if (ObjSize < PtrByteSize) {

4023

// The value of the object is its address, which differs from the

4024

// address of the enclosing doubleword on big-endian systems.

4025

SDValue Arg = FIN;

4026

if (!isLittleEndian) {

4027

SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);

4028

Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);

4029

}

4030

InVals.push_back(Arg);

4031

4032

if (GPR_idx != Num_GPR_Regs) {

4033

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4034

FuncInfo->addLiveInAttr(VReg, Flags);

4035

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4036

SDValue Store;

4037

4038

if (ObjSize==1 || ObjSize==2 || ObjSize==4) {

4039

EVT ObjType = (ObjSize == 1 ? MVT::i8 :

4040

(ObjSize == 2 ? MVT::i16 : MVT::i32));

4041

Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,

4042

MachinePointerInfo(&*FuncArg), ObjType);

4043

} else {

4044

// For sizes that don't fit a truncating store (3, 5, 6, 7),

4045

// store the whole register as-is to the parameter save area

4046

// slot.

4047

Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

4048

MachinePointerInfo(&*FuncArg));

4049

}

4050

4051

MemOps.push_back(Store);

4052

}

4053

// Whether we copied from a register or not, advance the offset

4054

// into the parameter save area by a full doubleword.

4055

ArgOffset += PtrByteSize;

4056

continue;

4057

}

4058

4059

// The value of the object is its address, which is the address of

4060

// its first stack doubleword.

4061

InVals.push_back(FIN);

4062

4063

// Store whatever pieces of the object are in registers to memory.

4064

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

4065

if (GPR_idx == Num_GPR_Regs)

4066

break;

4067

4068

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4069

FuncInfo->addLiveInAttr(VReg, Flags);

4070

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4071

SDValue Addr = FIN;

4072

if (j) {

4073

SDValue Off = DAG.getConstant(j, dl, PtrVT);

4074

Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);

4075

}

4076

SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,

4077

MachinePointerInfo(&*FuncArg, j));

4078

MemOps.push_back(Store);

4079

++GPR_idx;

4080

}

4081

ArgOffset += ArgSize;

4082

continue;

4083

}

4084

4085

switch (ObjectVT.getSimpleVT().SimpleTy) {

4086

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4086);

4087

case MVT::i1:

4088

case MVT::i32:

4089

case MVT::i64:

4090

if (Flags.isNest()) {

4091

// The 'nest' parameter, if any, is passed in R11.

4092

unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);

4093

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4094

4095

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4096

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4097

4098

break;

4099

}

4100

4101

// These can be scalar arguments or elements of an integer array type

4102

// passed directly. Clang may use those instead of "byval" aggregate

4103

// types to avoid forcing arguments to memory unnecessarily.

4104

if (GPR_idx != Num_GPR_Regs) {

4105

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4106

FuncInfo->addLiveInAttr(VReg, Flags);

4107

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4108

4109

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4110

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

4111

// value to MVT::i64 and then truncate to the correct register size.

4112

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4113

} else {

4114

if (CallConv == CallingConv::Fast)

4115

ComputeArgOffset();

4116

4117

needsLoad = true;

4118

ArgSize = PtrByteSize;

4119

}

4120

if (CallConv != CallingConv::Fast || needsLoad)

4121

ArgOffset += 8;

4122

break;

4123

4124

case MVT::f32:

4125

case MVT::f64:

4126

// These can be scalar arguments or elements of a float array type

4127

// passed directly. The latter are used to implement ELFv2 homogenous

4128

// float aggregates.

4129

if (FPR_idx != Num_FPR_Regs) {

4130

unsigned VReg;

4131

4132

if (ObjectVT == MVT::f32)

4133

VReg = MF.addLiveIn(FPR[FPR_idx],

4134

Subtarget.hasP8Vector()

4135

? &PPC::VSSRCRegClass

4136

: &PPC::F4RCRegClass);

4137

else

4138

VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()

4139

? &PPC::VSFRCRegClass

4140

: &PPC::F8RCRegClass);

4141

4142

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4143

++FPR_idx;

4144

} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {

4145

// FIXME: We may want to re-enable this for CallingConv::Fast on the P8

4146

// once we support fp <-> gpr moves.

4147

4148

// This can only ever happen in the presence of f32 array types,

4149

// since otherwise we never run out of FPRs before running out

4150

// of GPRs.

4151

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4152

FuncInfo->addLiveInAttr(VReg, Flags);

4153

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4154

4155

if (ObjectVT == MVT::f32) {

4156

if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))

4157

ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,

4158

DAG.getConstant(32, dl, MVT::i32));

4159

ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);

4160

}

4161

4162

ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);

4163

} else {

4164

if (CallConv == CallingConv::Fast)

4165

ComputeArgOffset();

4166

4167

needsLoad = true;

4168

}

4169

4170

// When passing an array of floats, the array occupies consecutive

4171

// space in the argument area; only round up to the next doubleword

4172

// at the end of the array. Otherwise, each float takes 8 bytes.

4173

if (CallConv != CallingConv::Fast || needsLoad) {

4174

ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;

4175

ArgOffset += ArgSize;

4176

if (Flags.isInConsecutiveRegsLast())

4177

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4178

}

4179

break;

4180

case MVT::v4f32:

4181

case MVT::v4i32:

4182

case MVT::v8i16:

4183

case MVT::v16i8:

4184

case MVT::v2f64:

4185

case MVT::v2i64:

4186

case MVT::v1i128:

4187

case MVT::f128:

4188

// These can be scalar arguments or elements of a vector array type

4189

// passed directly. The latter are used to implement ELFv2 homogenous

4190

// vector aggregates.

4191

if (VR_idx != Num_VR_Regs) {

4192

unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

4193

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4194

++VR_idx;

4195

} else {

4196

if (CallConv == CallingConv::Fast)

4197

ComputeArgOffset();

4198

needsLoad = true;

4199

}

4200

if (CallConv != CallingConv::Fast || needsLoad)

4201

ArgOffset += 16;

4202

break;

4203

}

4204

4205

// We need to load the argument to a virtual register if we determined

4206

// above that we ran out of physical registers of the appropriate type.

4207

if (needsLoad) {

4208

if (ObjSize < ArgSize && !isLittleEndian)

4209

CurArgOffset += ArgSize - ObjSize;

4210

int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);

4211

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4212

ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());

4213

}

4214

4215

InVals.push_back(ArgVal);

4216

}

4217

4218

// Area that is at least reserved in the caller of this function.

4219

unsigned MinReservedArea;

4220

if (HasParameterArea)

4221

MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);

4222

else

4223

MinReservedArea = LinkageSize;

4224

4225

// Set the size that is at least reserved in caller of this function. Tail

4226

// call optimized functions' reserved stack space needs to be aligned so that

4227

// taking the difference between two stack areas will result in an aligned

4228

// stack.

4229

MinReservedArea =

4230

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

4231

FuncInfo->setMinReservedArea(MinReservedArea);

4232

4233

// If the function takes variable number of arguments, make a frame index for

4234

// the start of the first vararg value... for expansion of llvm.va_start.

4235

// On ELFv2ABI spec, it writes:

4236

// C programs that are intended to be *portable* across different compilers

4237

// and architectures must use the header file <stdarg.h> to deal with variable

4238

// argument lists.

4239

if (isVarArg && MFI.hasVAStart()) {

4240

int Depth = ArgOffset;

4241

4242

FuncInfo->setVarArgsFrameIndex(

4243

MFI.CreateFixedObject(PtrByteSize, Depth, true));

4244

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

4245

4246

// If this function is vararg, store any remaining integer argument regs

4247

// to their spots on the stack so that they may be loaded by dereferencing

4248

// the result of va_next.

4249

for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

4250

GPR_idx < Num_GPR_Regs; ++GPR_idx) {

4251

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4252

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4253

SDValue Store =

4254

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

4255

MemOps.push_back(Store);

4256

// Increment the address by four for the next argument to store

4257

SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);

4258

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

4259

}

4260

}

4261

4262

if (!MemOps.empty())

4263

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

4264

4265

return Chain;

4266

}

4267

4268

SDValue PPCTargetLowering::LowerFormalArguments_Darwin(

4269

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

4270

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

4271

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

4272

// TODO: add description of PPC stack frame format, or at least some docs.

4273

//

4274

MachineFunction &MF = DAG.getMachineFunction();

4275

MachineFrameInfo &MFI = MF.getFrameInfo();

4276

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

4277

4278

EVT PtrVT = getPointerTy(MF.getDataLayout());

4279

bool isPPC64 = PtrVT == MVT::i64;

4280

// Potential tail calls could cause overwriting of argument stack slots.

4281

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

4282

(CallConv == CallingConv::Fast));

4283

unsigned PtrByteSize = isPPC64 ? 8 : 4;

4284

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

4285

unsigned ArgOffset = LinkageSize;

4286

// Area that is at least reserved in caller of this function.

4287

unsigned MinReservedArea = ArgOffset;

4288

4289

static const MCPhysReg GPR_32[] = { // 32-bit registers.

4290

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

4291

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

4292

};

4293

static const MCPhysReg GPR_64[] = { // 64-bit registers.

4294

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

4295

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

4296

};

4297

static const MCPhysReg VR[] = {

4298

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

4299

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

4300

};

4301

4302

const unsigned Num_GPR_Regs = array_lengthof(GPR_32);

4303

const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;

4304

const unsigned Num_VR_Regs = array_lengthof( VR);

4305

4306

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

4307

4308

const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

4309

4310

// In 32-bit non-varargs functions, the stack space for vectors is after the

4311

// stack space for non-vectors. We do not use this space unless we have

4312

// too many vectors to fit in registers, something that only occurs in

4313

// constructed examples:), but we have to walk the arglist to figure

4314

// that out...for the pathological case, compute VecArgOffset as the

4315

// start of the vector parameter area. Computing VecArgOffset is the

4316

// entire point of the following loop.

4317

unsigned VecArgOffset = ArgOffset;

4318

if (!isVarArg && !isPPC64) {

4319

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;

4320

++ArgNo) {

4321

EVT ObjectVT = Ins[ArgNo].VT;

4322

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

4323

4324

if (Flags.isByVal()) {

4325

// ObjSize is the true size, ArgSize rounded up to multiple of regs.

4326

unsigned ObjSize = Flags.getByValSize();

4327

unsigned ArgSize =

4328

((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4329

VecArgOffset += ArgSize;

4330

continue;

4331

}

4332

4333

switch(ObjectVT.getSimpleVT().SimpleTy) {

4334

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4334);

4335

case MVT::i1:

4336

case MVT::i32:

4337

case MVT::f32:

4338

VecArgOffset += 4;

4339

break;

4340

case MVT::i64: // PPC64

4341

case MVT::f64:

4342

// FIXME: We are guaranteed to be !isPPC64 at this point.

4343

// Does MVT::i64 apply?

4344

VecArgOffset += 8;

4345

break;

4346

case MVT::v4f32:

4347

case MVT::v4i32:

4348

case MVT::v8i16:

4349

case MVT::v16i8:

4350

// Nothing to do, we're only looking at Nonvector args here.

4351

break;

4352

}

4353

}

4354

}

4355

// We've found where the vector parameter area in memory is. Skip the

4356

// first 12 parameters; these don't use that memory.

4357

VecArgOffset = ((VecArgOffset+15)/16)*16;

4358

VecArgOffset += 12*16;

4359

4360

// Add DAG nodes to load the arguments or copy them out of registers. On

4361

// entry to a function on PPC, the arguments start after the linkage area,

4362

// although the first ones are often in registers.

4363

4364

SmallVector<SDValue, 8> MemOps;

4365

unsigned nAltivecParamsAtEnd = 0;

4366

Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();

4367

unsigned CurArgIdx = 0;

4368

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {

4369

SDValue ArgVal;

4370

bool needsLoad = false;

4371

EVT ObjectVT = Ins[ArgNo].VT;

4372

unsigned ObjSize = ObjectVT.getSizeInBits()/8;

4373

unsigned ArgSize = ObjSize;

4374

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

4375

if (Ins[ArgNo].isOrigArg()) {

4376

std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);

4377

CurArgIdx = Ins[ArgNo].getOrigArgIndex();

4378

}

4379

unsigned CurArgOffset = ArgOffset;

4380

4381

// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.

4382

if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||

4383

ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {

4384

if (isVarArg || isPPC64) {

4385

MinReservedArea = ((MinReservedArea+15)/16)*16;

4386

MinReservedArea += CalculateStackSlotSize(ObjectVT,

4387

Flags,

4388

PtrByteSize);

4389

} else nAltivecParamsAtEnd++;

4390

} else

4391

// Calculate min reserved area.

4392

MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,

4393

Flags,

4394

PtrByteSize);

4395

4396

// FIXME the codegen can be much improved in some cases.

4397

// We do not have to keep everything in memory.

4398

if (Flags.isByVal()) {

4399

assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4399, __PRETTY_FUNCTION__));

4400

4401

// ObjSize is the true size, ArgSize rounded up to multiple of registers.

4402

ObjSize = Flags.getByValSize();

4403

ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4404

// Objects of size 1 and 2 are right justified, everything else is

4405

// left justified. This means the memory address is adjusted forwards.

4406

if (ObjSize==1 || ObjSize==2) {

4407

CurArgOffset = CurArgOffset + (4 - ObjSize);

4408

}

4409

// The value of the object is its address.

4410

int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);

4411

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4412

InVals.push_back(FIN);

4413

if (ObjSize==1 || ObjSize==2) {

4414

if (GPR_idx != Num_GPR_Regs) {

4415

unsigned VReg;

4416

if (isPPC64)

4417

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4418

else

4419

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4420

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4421

EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;

4422

SDValue Store =

4423

DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,

4424

MachinePointerInfo(&*FuncArg), ObjType);

4425

MemOps.push_back(Store);

4426

++GPR_idx;

4427

}

4428

4429

ArgOffset += PtrByteSize;

4430

4431

continue;

4432

}

4433

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

4434

// Store whatever pieces of the object are in registers

4435

// to memory. ArgOffset will be the address of the beginning

4436

// of the object.

4437

if (GPR_idx != Num_GPR_Regs) {

4438

unsigned VReg;

4439

if (isPPC64)

4440

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4441

else

4442

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4443

int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);

4444

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4445

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4446

SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

4447

MachinePointerInfo(&*FuncArg, j));

4448

MemOps.push_back(Store);

4449

++GPR_idx;

4450

ArgOffset += PtrByteSize;

4451

} else {

4452

ArgOffset += ArgSize - (ArgOffset-CurArgOffset);

4453

break;

4454

}

4455

}

4456

continue;

4457

}

4458

4459

switch (ObjectVT.getSimpleVT().SimpleTy) {

4460

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4460);

4461

case MVT::i1:

4462

case MVT::i32:

4463

if (!isPPC64) {

4464

if (GPR_idx != Num_GPR_Regs) {

4465

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4466

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);

4467

4468

if (ObjectVT == MVT::i1)

4469

ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);

4470

4471

++GPR_idx;

4472

} else {

4473

needsLoad = true;

4474

ArgSize = PtrByteSize;

4475

}

4476

// All int arguments reserve stack space in the Darwin ABI.

4477

ArgOffset += PtrByteSize;

4478

break;

4479

}

4480

LLVM_FALLTHROUGH[[gnu::fallthrough]];

4481

case MVT::i64: // PPC64

4482

if (GPR_idx != Num_GPR_Regs) {

4483

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4484

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4485

4486

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4487

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

4488

// value to MVT::i64 and then truncate to the correct register size.

4489

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4490

4491

++GPR_idx;

4492

} else {

4493

needsLoad = true;

4494

ArgSize = PtrByteSize;

4495

}

4496

// All int arguments reserve stack space in the Darwin ABI.

4497

ArgOffset += 8;

4498

break;

4499

4500

case MVT::f32:

4501

case MVT::f64:

4502

// Every 4 bytes of argument space consumes one of the GPRs available for

4503

// argument passing.

4504

if (GPR_idx != Num_GPR_Regs) {

4505

++GPR_idx;

4506

if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)

4507

++GPR_idx;

4508

}

4509

if (FPR_idx != Num_FPR_Regs) {

4510

unsigned VReg;

4511

4512

if (ObjectVT == MVT::f32)

4513

VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);

4514

else

4515

VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);

4516

4517

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4518

++FPR_idx;

4519

} else {

4520

needsLoad = true;

4521

}

4522

4523

// All FP arguments reserve stack space in the Darwin ABI.

4524

ArgOffset += isPPC64 ? 8 : ObjSize;

4525

break;

4526

case MVT::v4f32:

4527

case MVT::v4i32:

4528

case MVT::v8i16:

4529

case MVT::v16i8:

4530

// Note that vector arguments in registers don't reserve stack space,

4531

// except in varargs functions.

4532

if (VR_idx != Num_VR_Regs) {

4533

unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

4534

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4535

if (isVarArg) {

4536

while ((ArgOffset % 16) != 0) {

4537

ArgOffset += PtrByteSize;

4538

if (GPR_idx != Num_GPR_Regs)

4539

GPR_idx++;

4540

}

4541

ArgOffset += 16;

4542

GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?

4543

}

4544

++VR_idx;

4545

} else {

4546

if (!isVarArg && !isPPC64) {

4547

// Vectors go after all the nonvectors.

4548

CurArgOffset = VecArgOffset;

4549

VecArgOffset += 16;

4550

} else {

4551

// Vectors are aligned.

4552

ArgOffset = ((ArgOffset+15)/16)*16;

4553

CurArgOffset = ArgOffset;

4554

ArgOffset += 16;

4555

}

4556

needsLoad = true;

4557

}

4558

break;

4559

}

4560

4561

// We need to load the argument to a virtual register if we determined above

4562

// that we ran out of physical registers of the appropriate type.

4563

if (needsLoad) {

4564

int FI = MFI.CreateFixedObject(ObjSize,

4565

CurArgOffset + (ArgSize - ObjSize),

4566

isImmutable);

4567

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4568

ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());

4569

}

4570

4571

InVals.push_back(ArgVal);

4572

}

4573

4574

// Allow for Altivec parameters at the end, if needed.

4575

if (nAltivecParamsAtEnd) {

4576

MinReservedArea = ((MinReservedArea+15)/16)*16;

4577

MinReservedArea += 16*nAltivecParamsAtEnd;

4578

}

4579

4580

// Area that is at least reserved in the caller of this function.

4581

MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);

4582

4583

// Set the size that is at least reserved in caller of this function. Tail

4584

// call optimized functions' reserved stack space needs to be aligned so that

4585

// taking the difference between two stack areas will result in an aligned

4586

// stack.

4587

MinReservedArea =

4588

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

4589

FuncInfo->setMinReservedArea(MinReservedArea);

4590

4591

// If the function takes variable number of arguments, make a frame index for

4592

// the start of the first vararg value... for expansion of llvm.va_start.

4593

if (isVarArg) {

4594

int Depth = ArgOffset;

4595

4596

FuncInfo->setVarArgsFrameIndex(

4597

MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,

4598

Depth, true));

4599

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

4600

4601

// If this function is vararg, store any remaining integer argument regs

4602

// to their spots on the stack so that they may be loaded by dereferencing

4603

// the result of va_next.

4604

for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

4605

unsigned VReg;

4606

4607

if (isPPC64)

4608

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4609

else

4610

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4611

4612

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4613

SDValue Store =

4614

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

4615

MemOps.push_back(Store);

4616

// Increment the address by four for the next argument to store

4617

SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);

4618

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

4619

}

4620

}

4621

4622

if (!MemOps.empty())

4623

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

4624

4625

return Chain;

4626

}

4627

4628

/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be

4629

/// adjusted to accommodate the arguments for the tailcall.

4630

static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,

4631

unsigned ParamSize) {

4632

4633

if (!isTailCall) return 0;

4634

4635

PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();

4636

unsigned CallerMinReservedArea = FI->getMinReservedArea();

4637

int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;

4638

// Remember only if the new adjustment is bigger.

4639

if (SPDiff < FI->getTailCallSPDelta())

4640

FI->setTailCallSPDelta(SPDiff);

4641

4642

return SPDiff;

4643

}

4644

4645

static bool isFunctionGlobalAddress(SDValue Callee);

4646

4647

static bool callsShareTOCBase(const Function *Caller, SDValue Callee,

4648

const TargetMachine &TM) {

4649

// It does not make sense to call callsShareTOCBase() with a caller that

4650

// is PC Relative since PC Relative callers do not have a TOC.

4651

#ifndef NDEBUG

4652

const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);

4653

assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__))

4654

"PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__));

4655

#endif

4656

4657

// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols

4658

// don't have enough information to determine if the caller and callee share

4659

// the same TOC base, so we have to pessimistically assume they don't for

4660

// correctness.

4661

GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);

4662

if (!G)

4663

return false;

4664

4665

const GlobalValue *GV = G->getGlobal();

4666

4667

// If the callee is preemptable, then the static linker will use a plt-stub

4668

// which saves the toc to the stack, and needs a nop after the call

4669

// instruction to convert to a toc-restore.

4670

if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))

4671

return false;

4672

4673

// Functions with PC Relative enabled may clobber the TOC in the same DSO.

4674

// We may need a TOC restore in the situation where the caller requires a

4675

// valid TOC but the callee is PC Relative and does not.

4676

const Function *F = dyn_cast<Function>(GV);

4677

const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);

4678

4679

// If we have an Alias we can try to get the function from there.

4680

if (Alias) {

4681

const GlobalObject *GlobalObj = Alias->getBaseObject();

4682

F = dyn_cast<Function>(GlobalObj);

4683

}

4684

4685

// If we still have no valid function pointer we do not have enough

4686

// information to determine if the callee uses PC Relative calls so we must

4687

// assume that it does.

4688

if (!F)

4689

return false;

4690

4691

// If the callee uses PC Relative we cannot guarantee that the callee won't

4692

// clobber the TOC of the caller and so we must assume that the two

4693

// functions do not share a TOC base.

4694

const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);

4695

if (STICallee->isUsingPCRelativeCalls())

4696

return false;

4697

4698

// The medium and large code models are expected to provide a sufficiently

4699

// large TOC to provide all data addressing needs of a module with a

4700

// single TOC.

4701

if (CodeModel::Medium == TM.getCodeModel() ||

4702

CodeModel::Large == TM.getCodeModel())

4703

return true;

4704

4705

// Otherwise we need to ensure callee and caller are in the same section,

4706

// since the linker may allocate multiple TOCs, and we don't know which

4707

// sections will belong to the same TOC base.

4708

if (!GV->isStrongDefinitionForLinker())

4709

return false;

4710

4711

// Any explicitly-specified sections and section prefixes must also match.

4712

// Also, if we're using -ffunction-sections, then each function is always in

4713

// a different section (the same is true for COMDAT functions).

4714

if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||

4715

GV->getSection() != Caller->getSection())

4716

return false;

4717

if (const auto *F = dyn_cast<Function>(GV)) {

4718

if (F->getSectionPrefix() != Caller->getSectionPrefix())

4719

return false;

4720

}

4721

4722

return true;

4723

}

4724

4725

static bool

4726

needStackSlotPassParameters(const PPCSubtarget &Subtarget,

4727

const SmallVectorImpl<ISD::OutputArg> &Outs) {

4728

assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4728, __PRETTY_FUNCTION__));

4729

4730

const unsigned PtrByteSize = 8;

4731

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

4732

4733

static const MCPhysReg GPR[] = {

4734

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

4735

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

4736

};

4737

static const MCPhysReg VR[] = {

4738

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

4739

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

4740

};

4741

4742

const unsigned NumGPRs = array_lengthof(GPR);

4743

const unsigned NumFPRs = 13;

4744

const unsigned NumVRs = array_lengthof(VR);

4745

const unsigned ParamAreaSize = NumGPRs * PtrByteSize;

4746

4747

unsigned NumBytes = LinkageSize;

4748

unsigned AvailableFPRs = NumFPRs;

4749

unsigned AvailableVRs = NumVRs;

4750

4751

for (const ISD::OutputArg& Param : Outs) {

4752

if (Param.Flags.isNest()) continue;

4753

4754

if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,

4755

LinkageSize, ParamAreaSize, NumBytes,

4756

AvailableFPRs, AvailableVRs))

4757

return true;

4758

}

4759

return false;

4760

}

4761

4762

static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {

4763

if (CB.arg_size() != CallerFn->arg_size())

4764

return false;

4765

4766

auto CalleeArgIter = CB.arg_begin();

4767

auto CalleeArgEnd = CB.arg_end();

4768

Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();

4769

4770

for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {

4771

const Value* CalleeArg = *CalleeArgIter;

4772

const Value* CallerArg = &(*CallerArgIter);

4773

if (CalleeArg == CallerArg)

4774

continue;

4775

4776

// e.g. @caller([4 x i64] %a, [4 x i64] %b) {

4777

// tail call @callee([4 x i64] undef, [4 x i64] %b)

4778

// }

4779

// 1st argument of callee is undef and has the same type as caller.

4780

if (CalleeArg->getType() == CallerArg->getType() &&

4781

isa<UndefValue>(CalleeArg))

4782

continue;

4783

4784

return false;

4785

}

4786

4787

return true;

4788

}

4789

4790

// Returns true if TCO is possible between the callers and callees

4791

// calling conventions.

4792

static bool

4793

areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,

4794

CallingConv::ID CalleeCC) {

4795

// Tail calls are possible with fastcc and ccc.

4796

auto isTailCallableCC = [] (CallingConv::ID CC){

4797

return CC == CallingConv::C || CC == CallingConv::Fast;

4798

};

4799

if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))

4800

return false;

4801

4802

// We can safely tail call both fastcc and ccc callees from a c calling

4803

// convention caller. If the caller is fastcc, we may have less stack space

4804

// than a non-fastcc caller with the same signature so disable tail-calls in

4805

// that case.

4806

return CallerCC == CallingConv::C || CallerCC == CalleeCC;

4807

}

4808

4809

bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(

4810

SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,

4811

const SmallVectorImpl<ISD::OutputArg> &Outs,

4812

const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {

4813

bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;

4814

4815

if (DisableSCO && !TailCallOpt) return false;

4816

4817

// Variadic argument functions are not supported.

4818

if (isVarArg) return false;

4819

4820

auto &Caller = DAG.getMachineFunction().getFunction();

4821

// Check that the calling conventions are compatible for tco.

4822

if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))

4823

return false;

4824

4825

// Caller contains any byval parameter is not supported.

4826

if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))

4827

return false;

4828

4829

// Callee contains any byval parameter is not supported, too.

4830

// Note: This is a quick work around, because in some cases, e.g.

4831

// caller's stack size > callee's stack size, we are still able to apply

4832

// sibling call optimization. For example, gcc is able to do SCO for caller1

4833

// in the following example, but not for caller2.

4834

// struct test {

4835

// long int a;

4836

// char ary[56];

4837

// } gTest;

4838

// __attribute__((noinline)) int callee(struct test v, struct test *b) {

4839

// b->a = v.a;

4840

// return 0;

4841

// }

4842

// void caller1(struct test a, struct test c, struct test *b) {

4843

// callee(gTest, b); }

4844

// void caller2(struct test *b) { callee(gTest, b); }

4845

if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))

4846

return false;

4847

4848

// If callee and caller use different calling conventions, we cannot pass

4849

// parameters on stack since offsets for the parameter area may be different.

4850

if (Caller.getCallingConv() != CalleeCC &&

4851

needStackSlotPassParameters(Subtarget, Outs))

4852

return false;

4853

4854

// All variants of 64-bit ELF ABIs without PC-Relative addressing require that

4855

// the caller and callee share the same TOC for TCO/SCO. If the caller and

4856

// callee potentially have different TOC bases then we cannot tail call since

4857

// we need to restore the TOC pointer after the call.

4858

// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977

4859

// We cannot guarantee this for indirect calls or calls to external functions.

4860

// When PC-Relative addressing is used, the concept of the TOC is no longer

4861

// applicable so this check is not required.

4862

// Check first for indirect calls.

4863

if (!Subtarget.isUsingPCRelativeCalls() &&

4864

!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))

4865

return false;

4866

4867

// Check if we share the TOC base.

4868

if (!Subtarget.isUsingPCRelativeCalls() &&

4869

!callsShareTOCBase(&Caller, Callee, getTargetMachine()))

4870

return false;

4871

4872

// TCO allows altering callee ABI, so we don't have to check further.

4873

if (CalleeCC == CallingConv::Fast && TailCallOpt)

4874

return true;

4875

4876

if (DisableSCO) return false;

4877

4878

// If callee use the same argument list that caller is using, then we can

4879

// apply SCO on this case. If it is not, then we need to check if callee needs

4880

// stack for passing arguments.

4881

// PC Relative tail calls may not have a CallBase.

4882

// If there is no CallBase we cannot verify if we have the same argument

4883

// list so assume that we don't have the same argument list.

4884

if (CB && !hasSameArgumentList(&Caller, *CB) &&

4885

needStackSlotPassParameters(Subtarget, Outs))

4886

return false;

4887

else if (!CB && needStackSlotPassParameters(Subtarget, Outs))

4888

return false;

4889

4890

return true;

4891

}

4892

4893

/// IsEligibleForTailCallOptimization - Check whether the call is eligible

4894

/// for tail call optimization. Targets which want to do tail call

4895

/// optimization should implement this function.

4896

bool

4897

PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,

4898

CallingConv::ID CalleeCC,

4899

bool isVarArg,

4900

const SmallVectorImpl<ISD::InputArg> &Ins,

4901

SelectionDAG& DAG) const {

4902

if (!getTargetMachine().Options.GuaranteedTailCallOpt)

4903

return false;

4904

4905

// Variable argument functions are not supported.

4906

if (isVarArg)

4907

return false;

4908

4909

MachineFunction &MF = DAG.getMachineFunction();

4910

CallingConv::ID CallerCC = MF.getFunction().getCallingConv();

4911

if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {

4912

// Functions containing by val parameters are not supported.

4913

for (unsigned i = 0; i != Ins.size(); i++) {

4914

ISD::ArgFlagsTy Flags = Ins[i].Flags;

4915

if (Flags.isByVal()) return false;

4916

}

4917

4918

// Non-PIC/GOT tail calls are supported.

4919

if (getTargetMachine().getRelocationModel() != Reloc::PIC_)

4920

return true;

4921

4922

// At the moment we can only do local tail calls (in same module, hidden

4923

// or protected) if we are generating PIC.

4924

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

4925

return G->getGlobal()->hasHiddenVisibility()

4926

|| G->getGlobal()->hasProtectedVisibility();

4927

}

4928

4929

return false;

4930

}

4931

4932

/// isCallCompatibleAddress - Return the immediate to use if the specified

4933

/// 32-bit value is representable in the immediate field of a BxA instruction.

4934

static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {

4935

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

4936

if (!C) return nullptr;

4937

4938

int Addr = C->getZExtValue();

4939

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

4940

SignExtend32<26>(Addr) != Addr)

4941

return nullptr; // Top 6 bits have to be sext of immediate.

4942

4943

return DAG

4944

.getConstant(

4945

(int)C->getZExtValue() >> 2, SDLoc(Op),

4946

DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))

4947

.getNode();

4948

}

4949

4950

namespace {

4951

4952

struct TailCallArgumentInfo {

4953

SDValue Arg;

4954

SDValue FrameIdxOp;

4955

int FrameIdx = 0;

4956

4957

TailCallArgumentInfo() = default;

4958

};

4959

4960

} // end anonymous namespace

4961

4962

/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.

4963

static void StoreTailCallArgumentsToStackSlot(

4964

SelectionDAG &DAG, SDValue Chain,

4965

const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,

4966

SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {

4967

for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {

4968

SDValue Arg = TailCallArgs[i].Arg;

4969

SDValue FIN = TailCallArgs[i].FrameIdxOp;

4970

int FI = TailCallArgs[i].FrameIdx;

4971

// Store relative to framepointer.

4972

MemOpChains.push_back(DAG.getStore(

4973

Chain, dl, Arg, FIN,

4974

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));

4975

}

4976

}

4977

4978

/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to

4979

/// the appropriate stack slot for the tail call optimized function call.

4980

static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,

4981

SDValue OldRetAddr, SDValue OldFP,

4982

int SPDiff, const SDLoc &dl) {

4983

if (SPDiff) {

4984

// Calculate the new stack slot for the return address.

4985

MachineFunction &MF = DAG.getMachineFunction();

4986

const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();

4987

const PPCFrameLowering *FL = Subtarget.getFrameLowering();

4988

bool isPPC64 = Subtarget.isPPC64();

4989

int SlotSize = isPPC64 ? 8 : 4;

4990

int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();

4991

int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,

4992

NewRetAddrLoc, true);

4993

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

4994

SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);

4995

Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,

4996

MachinePointerInfo::getFixedStack(MF, NewRetAddr));

4997

}

4998

return Chain;

4999

}

5000

5001

/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate

5002

/// the position of the argument.

5003

static void

5004

CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,

5005

SDValue Arg, int SPDiff, unsigned ArgOffset,

5006

SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {

5007

int Offset = ArgOffset + SPDiff;

5008

uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;

5009

int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);

5010

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

5011

SDValue FIN = DAG.getFrameIndex(FI, VT);

5012

TailCallArgumentInfo Info;

5013

Info.Arg = Arg;

5014

Info.FrameIdxOp = FIN;

5015

Info.FrameIdx = FI;

5016

TailCallArguments.push_back(Info);

5017

}

5018

5019

/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address

5020

/// stack slot. Returns the chain as result and the loaded frame pointers in

5021

/// LROpOut/FPOpout. Used when tail calling.

5022

SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(

5023

SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,

5024

SDValue &FPOpOut, const SDLoc &dl) const {

5025

if (SPDiff) {

5026

// Load the LR and FP stack slot for later adjusting.

5027

EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

5028

LROpOut = getReturnAddrFrameIndex(DAG);

5029

LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());

5030

Chain = SDValue(LROpOut.getNode(), 1);

5031

}

5032

return Chain;

5033

}

5034

5035

/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified

5036

/// by "Src" to address "Dst" of size "Size". Alignment information is

5037

/// specified by the specific parameter attribute. The copy will be passed as

5038

/// a byval function parameter.

5039

/// Sometimes what we are copying is the end of a larger object, the part that

5040

/// does not fit in registers.

5041

static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,

5042

SDValue Chain, ISD::ArgFlagsTy Flags,

5043

SelectionDAG &DAG, const SDLoc &dl) {

5044

SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);

5045

return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,

5046

Flags.getNonZeroByValAlign(), false, false, false,

5047

MachinePointerInfo(), MachinePointerInfo());

5048

}

5049

5050

/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of

5051

/// tail calls.

5052

static void LowerMemOpCallTo(

5053

SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,

5054

SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,

5055

bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,

5056

SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {

5057

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

5058

if (!isTailCall) {

5059

if (isVector) {

5060

SDValue StackPtr;

5061

if (isPPC64)

5062

StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

5063

else

5064

StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

5065

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,

5066

DAG.getConstant(ArgOffset, dl, PtrVT));

5067

}

5068

MemOpChains.push_back(

5069

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

5070

// Calculate and remember argument location.

5071

} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,

5072

TailCallArguments);

5073

}

5074

5075

static void

5076

PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,

5077

const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,

5078

SDValue FPOp,

5079

SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {

5080

// Emit a sequence of copyto/copyfrom virtual registers for arguments that

5081

// might overwrite each other in case of tail call optimization.

5082

SmallVector<SDValue, 8> MemOpChains2;

5083

// Do not flag preceding copytoreg stuff together with the following stuff.

5084

InFlag = SDValue();

5085

StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,

5086

MemOpChains2, dl);

5087

if (!MemOpChains2.empty())

5088

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

5089

5090

// Store the return address to the appropriate stack slot.

5091

Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);

5092

5093

// Emit callseq_end just before tailcall node.

5094

Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),

5095

DAG.getIntPtrConstant(0, dl, true), InFlag, dl);

5096

InFlag = Chain.getValue(1);

5097

}

5098

5099

// Is this global address that of a function that can be called by name? (as

5100

// opposed to something that must hold a descriptor for an indirect call).

5101

static bool isFunctionGlobalAddress(SDValue Callee) {

5102

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

5103

if (Callee.getOpcode() == ISD::GlobalTLSAddress ||

5104

Callee.getOpcode() == ISD::TargetGlobalTLSAddress)

5105

return false;

5106

5107

return G->getGlobal()->getValueType()->isFunctionTy();

5108

}

5109

5110

return false;

5111

}

5112

5113

SDValue PPCTargetLowering::LowerCallResult(

5114

SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,

5115

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5116

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

5117

SmallVector<CCValAssign, 16> RVLocs;

5118

CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

5119

*DAG.getContext());

5120

5121

CCRetInfo.AnalyzeCallResult(

5122

Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

5123

? RetCC_PPC_Cold

5124

: RetCC_PPC);

5125

5126

// Copy all of the result registers out of their specified physreg.

5127

for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

5128

CCValAssign &VA = RVLocs[i];

5129

assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__));

5130

5131

SDValue Val;

5132

5133

if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {

5134

SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

5135

InFlag);

5136

Chain = Lo.getValue(1);

5137

InFlag = Lo.getValue(2);

5138

VA = RVLocs[++i]; // skip ahead to next loc

5139

SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

5140

InFlag);

5141

Chain = Hi.getValue(1);

5142

InFlag = Hi.getValue(2);

5143

if (!Subtarget.isLittleEndian())

5144

std::swap (Lo, Hi);

5145

Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);

5146

} else {

5147

Val = DAG.getCopyFromReg(Chain, dl,

5148

VA.getLocReg(), VA.getLocVT(), InFlag);

5149

Chain = Val.getValue(1);

5150

InFlag = Val.getValue(2);

5151

}

5152

5153

switch (VA.getLocInfo()) {

5154

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5154);

5155

case CCValAssign::Full: break;

5156

case CCValAssign::AExt:

5157

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5158

break;

5159

case CCValAssign::ZExt:

5160

Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,

5161

DAG.getValueType(VA.getValVT()));

5162

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5163

break;

5164

case CCValAssign::SExt:

5165

Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,

5166

DAG.getValueType(VA.getValVT()));

5167

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5168

break;

5169

}

5170

5171

InVals.push_back(Val);

5172

}

5173

5174

return Chain;

5175

}

5176

5177

static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,

5178

const PPCSubtarget &Subtarget, bool isPatchPoint) {

5179

// PatchPoint calls are not indirect.

5180

if (isPatchPoint)

5181

return false;

5182

5183

if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))

5184

return false;

5185

5186

// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not

5187

// becuase the immediate function pointer points to a descriptor instead of

5188

// a function entry point. The ELFv2 ABI cannot use a BLA because the function

5189

// pointer immediate points to the global entry point, while the BLA would

5190

// need to jump to the local entry point (see rL211174).

5191

if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&

5192

isBLACompatibleAddress(Callee, DAG))

5193

return false;

5194

5195

return true;

5196

}

5197

5198

// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.

5199

static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {

5200

return Subtarget.isAIXABI() ||

5201

(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());

5202

}

5203

5204

static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,

5205

const Function &Caller,

5206

const SDValue &Callee,

5207

const PPCSubtarget &Subtarget,

5208

const TargetMachine &TM) {

5209

if (CFlags.IsTailCall)

5210

return PPCISD::TC_RETURN;

5211

5212

// This is a call through a function pointer.

5213

if (CFlags.IsIndirect) {

5214

// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross

5215

// indirect calls. The save of the caller's TOC pointer to the stack will be

5216

// inserted into the DAG as part of call lowering. The restore of the TOC

5217

// pointer is modeled by using a pseudo instruction for the call opcode that

5218

// represents the 2 instruction sequence of an indirect branch and link,

5219

// immediately followed by a load of the TOC pointer from the the stack save

5220

// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC

5221

// as it is not saved or used.

5222

return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC

5223

: PPCISD::BCTRL;

5224

}

5225

5226

if (Subtarget.isUsingPCRelativeCalls()) {

5227

assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5227, __PRETTY_FUNCTION__));

5228

return PPCISD::CALL_NOTOC;

5229

}

5230

5231

// The ABIs that maintain a TOC pointer accross calls need to have a nop

5232

// immediately following the call instruction if the caller and callee may

5233

// have different TOC bases. At link time if the linker determines the calls

5234

// may not share a TOC base, the call is redirected to a trampoline inserted

5235

// by the linker. The trampoline will (among other things) save the callers

5236

// TOC pointer at an ABI designated offset in the linkage area and the linker

5237

// will rewrite the nop to be a load of the TOC pointer from the linkage area

5238

// into gpr2.

5239

if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())

5240

return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL

5241

: PPCISD::CALL_NOP;

5242

5243

return PPCISD::CALL;

5244

}

5245

5246

static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,

5247

const SDLoc &dl, const PPCSubtarget &Subtarget) {

5248

if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())

5249

if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))

5250

return SDValue(Dest, 0);

5251

5252

// Returns true if the callee is local, and false otherwise.

5253

auto isLocalCallee = [&]() {

5254

const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);

5255

const Module *Mod = DAG.getMachineFunction().getFunction().getParent();

5256

const GlobalValue *GV = G ? G->getGlobal() : nullptr;

5257

5258

return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&

5259

!dyn_cast_or_null<GlobalIFunc>(GV);

5260

};

5261

5262

// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in

5263

// a static relocation model causes some versions of GNU LD (2.17.50, at

5264

// least) to force BSS-PLT, instead of secure-PLT, even if all objects are

5265

// built with secure-PLT.

5266

bool UsePlt =

5267

Subtarget.is32BitELFABI() && !isLocalCallee() &&

5268

Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;

5269

5270

const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {

5271

const TargetMachine &TM = Subtarget.getTargetMachine();

5272

const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();

5273

MCSymbolXCOFF *S =

5274

cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));

5275

5276

MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

5277

return DAG.getMCSymbol(S, PtrVT);

5278

};

5279

5280

if (isFunctionGlobalAddress(Callee)) {

5281

const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();

5282

5283

if (Subtarget.isAIXABI()) {

5284

assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5284, __PRETTY_FUNCTION__));

5285

return getAIXFuncEntryPointSymbolSDNode(GV);

5286

}

5287

return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,

5288

UsePlt ? PPCII::MO_PLT : 0);

5289

}

5290

5291

if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

5292

const char *SymName = S->getSymbol();

5293

if (Subtarget.isAIXABI()) {

5294

// If there exists a user-declared function whose name is the same as the

5295

// ExternalSymbol's, then we pick up the user-declared version.

5296

const Module *Mod = DAG.getMachineFunction().getFunction().getParent();

5297

if (const Function *F =

5298

dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))

5299

return getAIXFuncEntryPointSymbolSDNode(F);

5300

5301

// On AIX, direct function calls reference the symbol for the function's

5302

// entry point, which is named by prepending a "." before the function's

5303

// C-linkage name. A Qualname is returned here because an external

5304

// function entry point is a csect with XTY_ER property.

5305

const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {

5306

auto &Context = DAG.getMachineFunction().getMMI().getContext();

5307

MCSectionXCOFF *Sec = Context.getXCOFFSection(

5308

(Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,

5309

SectionKind::getMetadata());

5310

return Sec->getQualNameSymbol();

5311

};

5312

5313

SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();

5314

}

5315

return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),

5316

UsePlt ? PPCII::MO_PLT : 0);

5317

}

5318

5319

// No transformation needed.

5320

assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5320, __PRETTY_FUNCTION__));

5321

return Callee;

5322

}

5323

5324

static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {

5325

assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))

5326

"Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__));

5327

5328

// The last operand is the chain, except when the node has glue. If the node

5329

// has glue, then the last operand is the glue, and the chain is the second

5330

// last operand.

5331

SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);

5332

if (LastValue.getValueType() != MVT::Glue)

5333

return LastValue;

5334

5335

return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);

5336

}

5337

5338

// Creates the node that moves a functions address into the count register

5339

// to prepare for an indirect call instruction.

5340

static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,

5341

SDValue &Glue, SDValue &Chain,

5342

const SDLoc &dl) {

5343

SDValue MTCTROps[] = {Chain, Callee, Glue};

5344

EVT ReturnTypes[] = {MVT::Other, MVT::Glue};

5345

Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),

5346

makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));

5347

// The glue is the second value produced.

5348

Glue = Chain.getValue(1);

5349

}

5350

5351

static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,

5352

SDValue &Glue, SDValue &Chain,

5353

SDValue CallSeqStart,

5354

const CallBase *CB, const SDLoc &dl,

5355

bool hasNest,

5356

const PPCSubtarget &Subtarget) {

5357

// Function pointers in the 64-bit SVR4 ABI do not point to the function

5358

// entry point, but to the function descriptor (the function entry point

5359

// address is part of the function descriptor though).

5360

// The function descriptor is a three doubleword structure with the

5361

// following fields: function entry point, TOC base address and

5362

// environment pointer.

5363

// Thus for a call through a function pointer, the following actions need

5364

// to be performed:

5365

// 1. Save the TOC of the caller in the TOC save area of its stack

5366

// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).

5367

// 2. Load the address of the function entry point from the function

5368

// descriptor.

5369

// 3. Load the TOC of the callee from the function descriptor into r2.

5370

// 4. Load the environment pointer from the function descriptor into

5371

// r11.

5372

// 5. Branch to the function entry point address.

5373

// 6. On return of the callee, the TOC of the caller needs to be

5374

// restored (this is done in FinishCall()).

5375

//

5376

// The loads are scheduled at the beginning of the call sequence, and the

5377

// register copies are flagged together to ensure that no other

5378

// operations can be scheduled in between. E.g. without flagging the

5379

// copies together, a TOC access in the caller could be scheduled between

5380

// the assignment of the callee TOC and the branch to the callee, which leads

5381

// to incorrect code.

5382

5383

// Start by loading the function address from the descriptor.

5384

SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);

5385

auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()

5386

? (MachineMemOperand::MODereferenceable |

5387

MachineMemOperand::MOInvariant)

5388

: MachineMemOperand::MONone;

5389

5390

MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);

5391

5392

// Registers used in building the DAG.

5393

const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();

5394

const MCRegister TOCReg = Subtarget.getTOCPointerRegister();

5395

5396

// Offsets of descriptor members.

5397

const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();

5398

const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();

5399

5400

const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

5401

const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;

5402

5403

// One load for the functions entry point address.

5404

SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,

5405

Alignment, MMOFlags);

5406

5407

// One for loading the TOC anchor for the module that contains the called

5408

// function.

5409

SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);

5410

SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);

5411

SDValue TOCPtr =

5412

DAG.getLoad(RegVT, dl, LDChain, AddTOC,

5413

MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);

5414

5415

// One for loading the environment pointer.

5416

SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);

5417

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);

5418

SDValue LoadEnvPtr =

5419

DAG.getLoad(RegVT, dl, LDChain, AddPtr,

5420

MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);

5421

5422

5423

// Then copy the newly loaded TOC anchor to the TOC pointer.

5424

SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);

5425

Chain = TOCVal.getValue(0);

5426

Glue = TOCVal.getValue(1);

5427

5428

// If the function call has an explicit 'nest' parameter, it takes the

5429

// place of the environment pointer.

5430

assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__))

5431

"Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__));

5432

if (!hasNest) {

5433

SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);

5434

Chain = EnvVal.getValue(0);

5435

Glue = EnvVal.getValue(1);

5436

}

5437

5438

// The rest of the indirect call sequence is the same as the non-descriptor

5439

// DAG.

5440

prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);

5441

}

5442

5443

static void

5444

buildCallOperands(SmallVectorImpl<SDValue> &Ops,

5445

PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,

5446

SelectionDAG &DAG,

5447

SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,

5448

SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,

5449

const PPCSubtarget &Subtarget) {

5450

const bool IsPPC64 = Subtarget.isPPC64();

5451

// MVT for a general purpose register.

5452

const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

5453

5454

// First operand is always the chain.

5455

Ops.push_back(Chain);

5456

5457

// If it's a direct call pass the callee as the second operand.

5458

if (!CFlags.IsIndirect)

5459

Ops.push_back(Callee);

5460

else {

5461

assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5461, __PRETTY_FUNCTION__));

5462

5463

// For the TOC based ABIs, we have saved the TOC pointer to the linkage area

5464

// on the stack (this would have been done in `LowerCall_64SVR4` or

5465

// `LowerCall_AIX`). The call instruction is a pseudo instruction that

5466

// represents both the indirect branch and a load that restores the TOC

5467

// pointer from the linkage area. The operand for the TOC restore is an add

5468

// of the TOC save offset to the stack pointer. This must be the second

5469

// operand: after the chain input but before any other variadic arguments.

5470

// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not

5471

// saved or used.

5472

if (isTOCSaveRestoreRequired(Subtarget)) {

5473

const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();

5474

5475

SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);

5476

unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();

5477

SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

5478

SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);

5479

Ops.push_back(AddTOC);

5480

}

5481

5482

// Add the register used for the environment pointer.

5483

if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)

5484

Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),

5485

RegVT));

5486

5487

5488

// Add CTR register as callee so a bctr can be emitted later.

5489

if (CFlags.IsTailCall)

5490

Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));

5491

}

5492

5493

// If this is a tail call add stack pointer delta.

5494

if (CFlags.IsTailCall)

5495

Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));

5496

5497

// Add argument registers to the end of the list so that they are known live

5498

// into the call.

5499

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

5500

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

5501

RegsToPass[i].second.getValueType()));

5502

5503

// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is

5504

// no way to mark dependencies as implicit here.

5505

// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.

5506

if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&

5507

!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())

5508

Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));

5509

5510

// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls

5511

if (CFlags.IsVarArg && Subtarget.is32BitELFABI())

5512

Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));

5513

5514

// Add a register mask operand representing the call-preserved registers.

5515

const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

5516

const uint32_t *Mask =

5517

TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);

5518

assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5518, __PRETTY_FUNCTION__));

5519

Ops.push_back(DAG.getRegisterMask(Mask));

5520

5521

// If the glue is valid, it is the last operand.

5522

if (Glue.getNode())

5523

Ops.push_back(Glue);

5524

}

5525

5526

SDValue PPCTargetLowering::FinishCall(

5527

CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,

5528

SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,

5529

SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,

5530

unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,

5531

SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {

5532

5533

if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||

5534

Subtarget.isAIXABI())

5535

setUsesTOCBasePtr(DAG);

5536

5537

unsigned CallOpc =

5538

getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,

5539

Subtarget, DAG.getTarget());

5540

5541

if (!CFlags.IsIndirect)

5542

Callee = transformCallee(Callee, DAG, dl, Subtarget);

5543

else if (Subtarget.usesFunctionDescriptors())

5544

prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,

5545

dl, CFlags.HasNest, Subtarget);

5546

else

5547

prepareIndirectCall(DAG, Callee, Glue, Chain, dl);

5548

5549

// Build the operand list for the call instruction.

5550

SmallVector<SDValue, 8> Ops;

5551

buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,

5552

SPDiff, Subtarget);

5553

5554

// Emit tail call.

5555

if (CFlags.IsTailCall) {

5556

// Indirect tail call when using PC Relative calls do not have the same

5557

// constraints.

5558

assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5559

cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5560

Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5561

Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5562

isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5563

(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5564

"Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5565

"register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))

5566

"used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__));

5567

// PC Relative calls also use TC_RETURN as the way to mark tail calls.

5568

assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))

5569

"Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__));

5570

DAG.getMachineFunction().getFrameInfo().setHasTailCall();

5571

return DAG.getNode(CallOpc, dl, MVT::Other, Ops);

5572

}

5573

5574

std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};

5575

Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);

5576

DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);

5577

Glue = Chain.getValue(1);

5578

5579

// When performing tail call optimization the callee pops its arguments off

5580

// the stack. Account for this here so these bytes can be pushed back on in

5581

// PPCFrameLowering::eliminateCallFramePseudoInstr.

5582

int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&

5583

getTargetMachine().Options.GuaranteedTailCallOpt)

5584

? NumBytes

5585

: 0;

5586

5587

Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),

5588

DAG.getIntPtrConstant(BytesCalleePops, dl, true),

5589

Glue, dl);

5590

Glue = Chain.getValue(1);

5591

5592

return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,

5593

DAG, InVals);

5594

}

5595

5596

SDValue

5597

PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

5598

SmallVectorImpl<SDValue> &InVals) const {

5599

SelectionDAG &DAG = CLI.DAG;

5600

SDLoc &dl = CLI.DL;

5601

SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

5602

SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

5603

SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

5604

SDValue Chain = CLI.Chain;

5605

SDValue Callee = CLI.Callee;

5606

bool &isTailCall = CLI.IsTailCall;

5607

CallingConv::ID CallConv = CLI.CallConv;

5608

bool isVarArg = CLI.IsVarArg;

5609

bool isPatchPoint = CLI.IsPatchPoint;

5610

const CallBase *CB = CLI.CB;

5611

5612

if (isTailCall) {

5613

if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))

5614

isTailCall = false;

5615

else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())

5616

isTailCall = IsEligibleForTailCallOptimization_64SVR4(

5617

Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);

5618

else

5619

isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,

5620

Ins, DAG);

5621

if (isTailCall) {

5622

++NumTailCalls;

5623

if (!getTargetMachine().Options.GuaranteedTailCallOpt)

5624

++NumSiblingCalls;

5625

5626

// PC Relative calls no longer guarantee that the callee is a Global

5627

// Address Node. The callee could be an indirect tail call in which

5628

// case the SDValue for the callee could be a load (to load the address

5629

// of a function pointer) or it may be a register copy (to move the

5630

// address of the callee from a function parameter into a virtual

5631

// register). It may also be an ExternalSymbolSDNode (ex memcopy).

5632

assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))

5633

isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))

5634

"Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__));

5635

5636

LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)

5637

<< "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false);

5638

LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false);

5639

}

5640

}

5641

5642

if (!isTailCall && CB && CB->isMustTailCall())

5643

report_fatal_error("failed to perform tail call elimination on a call "

5644

"site marked musttail");

5645

5646

// When long calls (i.e. indirect calls) are always used, calls are always

5647

// made via function pointer. If we have a function name, first translate it

5648

// into a pointer.

5649

if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&

5650

!isTailCall)

5651

Callee = LowerGlobalAddress(Callee, DAG);

5652

5653

CallFlags CFlags(

5654

CallConv, isTailCall, isVarArg, isPatchPoint,

5655

isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),

5656

// hasNest

5657

Subtarget.is64BitELFABI() &&

5658

any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),

5659

CLI.NoMerge);

5660

5661

if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())

5662

return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5663

InVals, CB);

5664

5665

if (Subtarget.isSVR4ABI())

5666

return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5667

InVals, CB);

5668

5669

if (Subtarget.isAIXABI())

5670

return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5671

InVals, CB);

5672

5673

return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5674

InVals, CB);

5675

}

5676

5677

SDValue PPCTargetLowering::LowerCall_32SVR4(

5678

SDValue Chain, SDValue Callee, CallFlags CFlags,

5679

const SmallVectorImpl<ISD::OutputArg> &Outs,

5680

const SmallVectorImpl<SDValue> &OutVals,

5681

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5682

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

5683

const CallBase *CB) const {

5684

// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description

5685

// of the 32-bit SVR4 ABI stack frame layout.

5686

5687

const CallingConv::ID CallConv = CFlags.CallConv;

5688

const bool IsVarArg = CFlags.IsVarArg;

5689

const bool IsTailCall = CFlags.IsTailCall;

5690

5691

assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))

5692

CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))

5693

CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__));

5694

5695

const Align PtrAlign(4);

5696

5697

MachineFunction &MF = DAG.getMachineFunction();

5698

5699

// Mark this function as potentially containing a function that contains a

5700

// tail call. As a consequence the frame pointer will be used for dynamicalloc

5701

// and restoring the callers stack pointer in this functions epilog. This is

5702

// done because by tail calling the called function might overwrite the value

5703

// in this function's (MF) stack pointer stack slot 0(SP).

5704

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

5705

CallConv == CallingConv::Fast)

5706

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

5707

5708

// Count how many bytes are to be pushed on the stack, including the linkage

5709

// area, parameter list area and the part of the local variable space which

5710

// contains copies of aggregates which are passed by value.

5711

5712

// Assign locations to all of the outgoing arguments.

5713

SmallVector<CCValAssign, 16> ArgLocs;

5714

PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

5715

5716

// Reserve space for the linkage area on the stack.

5717

CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),

5718

PtrAlign);

5719

if (useSoftFloat())

5720

CCInfo.PreAnalyzeCallOperands(Outs);

5721

5722

if (IsVarArg) {

5723

// Handle fixed and variable vector arguments differently.

5724

// Fixed vector arguments go into registers as long as registers are

5725

// available. Variable vector arguments always go into memory.

5726

unsigned NumArgs = Outs.size();

5727

5728

for (unsigned i = 0; i != NumArgs; ++i) {

5729

MVT ArgVT = Outs[i].VT;

5730

ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

5731

bool Result;

5732

5733

if (Outs[i].IsFixed) {

5734

Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,

5735

CCInfo);

5736

} else {

5737

Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,

5738

ArgFlags, CCInfo);

5739

}

5740

5741

if (Result) {

5742

#ifndef NDEBUG

5743

errs() << "Call operand #" << i << " has unhandled type "

5744

<< EVT(ArgVT).getEVTString() << "\n";

5745

#endif

5746

llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5746);

5747

}

5748

}

5749

} else {

5750

// All arguments are treated the same.

5751

CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);

5752

}

5753

CCInfo.clearWasPPCF128();

5754

5755

// Assign locations to all of the outgoing aggregate by value arguments.

5756

SmallVector<CCValAssign, 16> ByValArgLocs;

5757

CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());

5758

5759

// Reserve stack space for the allocations in CCInfo.

5760

CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);

5761

5762

CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);

5763

5764

// Size of the linkage area, parameter list area and the part of the local

5765

// space variable where copies of aggregates which are passed by value are

5766

// stored.

5767

unsigned NumBytes = CCByValInfo.getNextStackOffset();

5768

5769

// Calculate by how many bytes the stack has to be adjusted in case of tail

5770

// call optimization.

5771

int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);

5772

5773

// Adjust the stack pointer for the new arguments...

5774

// These operations are automatically eliminated by the prolog/epilog pass

5775

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

5776

SDValue CallSeqStart = Chain;

5777

5778

// Load the return address and frame pointer so it can be moved somewhere else

5779

// later.

5780

SDValue LROp, FPOp;

5781

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

5782

5783

// Set up a copy of the stack pointer for use loading and storing any

5784

// arguments that may not fit in the registers available for argument

5785

// passing.

5786

SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

5787

5788

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

5789

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

5790

SmallVector<SDValue, 8> MemOpChains;

5791

5792

bool seenFloatArg = false;

5793

// Walk the register/memloc assignments, inserting copies/loads.

5794

// i - Tracks the index into the list of registers allocated for the call

5795

// RealArgIdx - Tracks the index into the list of actual function arguments

5796

// j - Tracks the index into the list of byval arguments

5797

for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();

5798

i != e;

5799

++i, ++RealArgIdx) {

5800

CCValAssign &VA = ArgLocs[i];

5801

SDValue Arg = OutVals[RealArgIdx];

5802

ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;

5803

5804

if (Flags.isByVal()) {

5805

// Argument is an aggregate which is passed by value, thus we need to

5806

// create a copy of it in the local variable space of the current stack

5807

// frame (which is the stack frame of the caller) and pass the address of

5808

// this copy to the callee.

5809

assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5809, __PRETTY_FUNCTION__));

5810

CCValAssign &ByValVA = ByValArgLocs[j++];

5811

assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5811, __PRETTY_FUNCTION__));

5812

5813

// Memory reserved in the local variable space of the callers stack frame.

5814

unsigned LocMemOffset = ByValVA.getLocMemOffset();

5815

5816

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

5817

PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),

5818

StackPtr, PtrOff);

5819

5820

// Create a copy of the argument in the local area of the current

5821

// stack frame.

5822

SDValue MemcpyCall =

5823

CreateCopyOfByValArgument(Arg, PtrOff,

5824

CallSeqStart.getNode()->getOperand(0),

5825

Flags, DAG, dl);

5826

5827

// This must go outside the CALLSEQ_START..END.

5828

SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,

5829

SDLoc(MemcpyCall));

5830

DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),

5831

NewCallSeqStart.getNode());

5832

Chain = CallSeqStart = NewCallSeqStart;

5833

5834

// Pass the address of the aggregate copy on the stack either in a

5835

// physical register or in the parameter list area of the current stack

5836

// frame to the callee.

5837

Arg = PtrOff;

5838

}

5839

5840

// When useCRBits() is true, there can be i1 arguments.

5841

// It is because getRegisterType(MVT::i1) => MVT::i1,

5842

// and for other integer types getRegisterType() => MVT::i32.

5843

// Extend i1 and ensure callee will get i32.

5844

if (Arg.getValueType() == MVT::i1)

5845

Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,

5846

dl, MVT::i32, Arg);

5847

5848

if (VA.isRegLoc()) {

5849

seenFloatArg |= VA.getLocVT().isFloatingPoint();

5850

// Put argument in a physical register.

5851

if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {

5852

bool IsLE = Subtarget.isLittleEndian();

5853

SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

5854

DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));

5855

RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));

5856

SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

5857

DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));

5858

RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),

5859

SVal.getValue(0)));

5860

} else

5861

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

5862

} else {

5863

// Put argument in the parameter list area of the current stack frame.

5864

assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5864, __PRETTY_FUNCTION__));

5865

unsigned LocMemOffset = VA.getLocMemOffset();

5866

5867

if (!IsTailCall) {

5868

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

5869

PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),

5870

StackPtr, PtrOff);

5871

5872

MemOpChains.push_back(

5873

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

5874

} else {

5875

// Calculate and remember argument location.

5876

CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,

5877

TailCallArguments);

5878

}

5879

}

5880

}

5881

5882

if (!MemOpChains.empty())

5883

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

5884

5885

// Build a sequence of copy-to-reg nodes chained together with token chain

5886

// and flag operands which copy the outgoing args into the appropriate regs.

5887

SDValue InFlag;

5888

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

5889

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

5890

RegsToPass[i].second, InFlag);

5891

InFlag = Chain.getValue(1);

5892

}

5893

5894

// Set CR bit 6 to true if this is a vararg call with floating args passed in

5895

// registers.

5896

if (IsVarArg) {

5897

SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

5898

SDValue Ops[] = { Chain, InFlag };

5899

5900

Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,

5901

dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));

5902

5903

InFlag = Chain.getValue(1);

5904

}

5905

5906

if (IsTailCall)

5907

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

5908

TailCallArguments);

5909

5910

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

5911

Callee, SPDiff, NumBytes, Ins, InVals, CB);

5912

}

5913

5914

// Copy an argument into memory, being careful to do this outside the

5915

// call sequence for the call to which the argument belongs.

5916

SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(

5917

SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,

5918

SelectionDAG &DAG, const SDLoc &dl) const {

5919

SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,

5920

CallSeqStart.getNode()->getOperand(0),

5921

Flags, DAG, dl);

5922

// The MEMCPY must go outside the CALLSEQ_START..END.

5923

int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);

5924

SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,

5925

SDLoc(MemcpyCall));

5926

DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),

5927

NewCallSeqStart.getNode());

5928

return NewCallSeqStart;

5929

}

5930

5931

SDValue PPCTargetLowering::LowerCall_64SVR4(

5932

SDValue Chain, SDValue Callee, CallFlags CFlags,

5933

const SmallVectorImpl<ISD::OutputArg> &Outs,

5934

const SmallVectorImpl<SDValue> &OutVals,

5935

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5936

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

5937

const CallBase *CB) const {

5938

bool isELFv2ABI = Subtarget.isELFv2ABI();

5939

bool isLittleEndian = Subtarget.isLittleEndian();

5940

unsigned NumOps = Outs.size();

5941

bool IsSibCall = false;

5942

bool IsFastCall = CFlags.CallConv == CallingConv::Fast;

5943

5944

EVT PtrVT = getPointerTy(DAG.getDataLayout());

5945

unsigned PtrByteSize = 8;

5946

5947

MachineFunction &MF = DAG.getMachineFunction();

5948

5949

if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)

5950

IsSibCall = true;

5951

5952

// Mark this function as potentially containing a function that contains a

5953

// tail call. As a consequence the frame pointer will be used for dynamicalloc

5954

// and restoring the callers stack pointer in this functions epilog. This is

5955

// done because by tail calling the called function might overwrite the value

5956

// in this function's (MF) stack pointer stack slot 0(SP).

5957

if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)

5958

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

5959

5960

assert(!(IsFastCall && CFlags.IsVarArg) &&((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5961, __PRETTY_FUNCTION__))

5961

"fastcc not supported on varargs functions")((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5961, __PRETTY_FUNCTION__));

5962

5963

// Count how many bytes are to be pushed on the stack, including the linkage

5964

// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes

5965

// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage

5966

// area is 32 bytes reserved space for [SP][CR][LR][TOC].

5967

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

5968

unsigned NumBytes = LinkageSize;

5969

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

5970

5971

static const MCPhysReg GPR[] = {

5972

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

5973

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

5974

};

5975

static const MCPhysReg VR[] = {

5976

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

5977

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

5978

};

5979

5980

const unsigned NumGPRs = array_lengthof(GPR);

5981

const unsigned NumFPRs = useSoftFloat() ? 0 : 13;

5982

const unsigned NumVRs = array_lengthof(VR);

5983

5984

// On ELFv2, we can avoid allocating the parameter area if all the arguments

5985

// can be passed to the callee in registers.

5986

// For the fast calling convention, there is another check below.

5987

// Note: We should keep consistent with LowerFormalArguments_64SVR4()

5988

bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;

5989

if (!HasParameterArea) {

5990

unsigned ParamAreaSize = NumGPRs * PtrByteSize;

5991

unsigned AvailableFPRs = NumFPRs;

5992

unsigned AvailableVRs = NumVRs;

5993

unsigned NumBytesTmp = NumBytes;

5994

for (unsigned i = 0; i != NumOps; ++i) {

5995

if (Outs[i].Flags.isNest()) continue;

5996

if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,

5997

PtrByteSize, LinkageSize, ParamAreaSize,

5998

NumBytesTmp, AvailableFPRs, AvailableVRs))

5999

HasParameterArea = true;

6000

}

6001

}

6002

6003

// When using the fast calling convention, we don't provide backing for

6004

// arguments that will be in registers.

6005

unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;

6006

6007

// Avoid allocating parameter area for fastcc functions if all the arguments

6008

// can be passed in the registers.

6009

if (IsFastCall)

6010

HasParameterArea = false;

6011

6012

// Add up all the space actually used.

6013

for (unsigned i = 0; i != NumOps; ++i) {

6014

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6015

EVT ArgVT = Outs[i].VT;

6016

EVT OrigVT = Outs[i].ArgVT;

6017

6018

if (Flags.isNest())

6019

continue;

6020

6021

if (IsFastCall) {

6022

if (Flags.isByVal()) {

6023

NumGPRsUsed += (Flags.getByValSize()+7)/8;

6024

if (NumGPRsUsed > NumGPRs)

6025

HasParameterArea = true;

6026

} else {

6027

switch (ArgVT.getSimpleVT().SimpleTy) {

6028

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6028);

6029

case MVT::i1:

6030

case MVT::i32:

6031

case MVT::i64:

6032

if (++NumGPRsUsed <= NumGPRs)

6033

continue;

6034

break;

6035

case MVT::v4i32:

6036

case MVT::v8i16:

6037

case MVT::v16i8:

6038

case MVT::v2f64:

6039

case MVT::v2i64:

6040

case MVT::v1i128:

6041

case MVT::f128:

6042

if (++NumVRsUsed <= NumVRs)

6043

continue;

6044

break;

6045

case MVT::v4f32:

6046

if (++NumVRsUsed <= NumVRs)

6047

continue;

6048

break;

6049

case MVT::f32:

6050

case MVT::f64:

6051

if (++NumFPRsUsed <= NumFPRs)

6052

continue;

6053

break;

6054

}

6055

HasParameterArea = true;

6056

}

6057

}

6058

6059

/* Respect alignment of argument on the stack. */

6060

auto Alignement =

6061

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

6062

NumBytes = alignTo(NumBytes, Alignement);

6063

6064

NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

6065

if (Flags.isInConsecutiveRegsLast())

6066

NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

6067

}

6068

6069

unsigned NumBytesActuallyUsed = NumBytes;

6070

6071

// In the old ELFv1 ABI,

6072

// the prolog code of the callee may store up to 8 GPR argument registers to

6073

// the stack, allowing va_start to index over them in memory if its varargs.

6074

// Because we cannot tell if this is needed on the caller side, we have to

6075

// conservatively assume that it is needed. As such, make sure we have at

6076

// least enough stack space for the caller to store the 8 GPRs.

6077

// In the ELFv2 ABI, we allocate the parameter area iff a callee

6078

// really requires memory operands, e.g. a vararg function.

6079

if (HasParameterArea)

6080

NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

6081

else

6082

NumBytes = LinkageSize;

6083

6084

// Tail call needs the stack to be aligned.

6085

if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)

6086

NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);

6087

6088

int SPDiff = 0;

6089

6090

// Calculate by how many bytes the stack has to be adjusted in case of tail

6091

// call optimization.

6092

if (!IsSibCall)

6093

SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);

6094

6095

// To protect arguments on the stack from being clobbered in a tail call,

6096

// force all the loads to happen before doing any other lowering.

6097

if (CFlags.IsTailCall)

6098

Chain = DAG.getStackArgumentTokenFactor(Chain);

6099

6100

// Adjust the stack pointer for the new arguments...

6101

// These operations are automatically eliminated by the prolog/epilog pass

6102

if (!IsSibCall)

6103

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

6104

SDValue CallSeqStart = Chain;

6105

6106

// Load the return address and frame pointer so it can be move somewhere else

6107

// later.

6108

SDValue LROp, FPOp;

6109

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

6110

6111

// Set up a copy of the stack pointer for use loading and storing any

6112

// arguments that may not fit in the registers available for argument

6113

// passing.

6114

SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

6115

6116

// Figure out which arguments are going to go in registers, and which in

6117

// memory. Also, if this is a vararg function, floating point operations

6118

// must be stored to our stack, and loaded into integer regs as well, if

6119

// any integer regs are available for argument passing.

6120

unsigned ArgOffset = LinkageSize;

6121

6122

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

6123

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

6124

6125

SmallVector<SDValue, 8> MemOpChains;

6126

for (unsigned i = 0; i != NumOps; ++i) {

6127

SDValue Arg = OutVals[i];

6128

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6129

EVT ArgVT = Outs[i].VT;

6130

EVT OrigVT = Outs[i].ArgVT;

6131

6132

// PtrOff will be used to store the current argument to the stack if a

6133

// register cannot be found for it.

6134

SDValue PtrOff;

6135

6136

// We re-align the argument offset for each argument, except when using the

6137

// fast calling convention, when we need to make sure we do that only when

6138

// we'll actually use a stack slot.

6139

auto ComputePtrOff = [&]() {

6140

/* Respect alignment of argument on the stack. */

6141

auto Alignment =

6142

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

6143

ArgOffset = alignTo(ArgOffset, Alignment);

6144

6145

PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());

6146

6147

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6148

};

6149

6150

if (!IsFastCall) {

6151

ComputePtrOff();

6152

6153

/* Compute GPR index associated with argument offset. */

6154

GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

6155

GPR_idx = std::min(GPR_idx, NumGPRs);

6156

}

6157

6158

// Promote integers to 64-bit values.

6159

if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {

6160

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

6161

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

6162

Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);

6163

}

6164

6165

// FIXME memcpy is used way more than necessary. Correctness first.

6166

// Note: "by value" is code for passing a structure by value, not

6167

// basic types.

6168

if (Flags.isByVal()) {

6169

// Note: Size includes alignment padding, so

6170

// struct x { short a; char b; }

6171

// will have Size = 4. With #pragma pack(1), it will have Size = 3.

6172

// These are the proper values we need for right-justifying the

6173

// aggregate in a parameter register.

6174

unsigned Size = Flags.getByValSize();

6175

6176

// An empty aggregate parameter takes up no storage and no

6177

// registers.

6178

if (Size == 0)

6179

continue;

6180

6181

if (IsFastCall)

6182

ComputePtrOff();

6183

6184

// All aggregates smaller than 8 bytes must be passed right-justified.

6185

if (Size==1 || Size==2 || Size==4) {

6186

EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);

6187

if (GPR_idx != NumGPRs) {

6188

SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,

6189

MachinePointerInfo(), VT);

6190

MemOpChains.push_back(Load.getValue(1));

6191

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6192

6193

ArgOffset += PtrByteSize;

6194

continue;

6195

}

6196

}

6197

6198

if (GPR_idx == NumGPRs && Size < 8) {

6199

SDValue AddPtr = PtrOff;

6200

if (!isLittleEndian) {

6201

SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,

6202

PtrOff.getValueType());

6203

AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6204

}

6205

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6206

CallSeqStart,

6207

Flags, DAG, dl);

6208

ArgOffset += PtrByteSize;

6209

continue;

6210

}

6211

// Copy entire object into memory. There are cases where gcc-generated

6212

// code assumes it is there, even if it could be put entirely into

6213

// registers. (This is not what the doc says.)

6214

6215

// FIXME: The above statement is likely due to a misunderstanding of the

6216

// documents. All arguments must be copied into the parameter area BY

6217

// THE CALLEE in the event that the callee takes the address of any

6218

// formal argument. That has not yet been implemented. However, it is

6219

// reasonable to use the stack area as a staging area for the register

6220

// load.

6221

6222

// Skip this for small aggregates, as we will use the same slot for a

6223

// right-justified copy, below.

6224

if (Size >= 8)

6225

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,

6226

CallSeqStart,

6227

Flags, DAG, dl);

6228

6229

// When a register is available, pass a small aggregate right-justified.

6230

if (Size < 8 && GPR_idx != NumGPRs) {

6231

// The easiest way to get this right-justified in a register

6232

// is to copy the structure into the rightmost portion of a

6233

// local variable slot, then load the whole slot into the

6234

// register.

6235

// FIXME: The memcpy seems to produce pretty awful code for

6236

// small aggregates, particularly for packed ones.

6237

// FIXME: It would be preferable to use the slot in the

6238

// parameter save area instead of a new local variable.

6239

SDValue AddPtr = PtrOff;

6240

if (!isLittleEndian) {

6241

SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());

6242

AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6243

}

6244

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6245

CallSeqStart,

6246

Flags, DAG, dl);

6247

6248

// Load the slot into the register.

6249

SDValue Load =

6250

DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());

6251

MemOpChains.push_back(Load.getValue(1));

6252

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6253

6254

// Done with this argument.

6255

ArgOffset += PtrByteSize;

6256

continue;

6257

}

6258

6259

// For aggregates larger than PtrByteSize, copy the pieces of the

6260

// object that fit into registers from the parameter save area.

6261

for (unsigned j=0; j<Size; j+=PtrByteSize) {

6262

SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());

6263

SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

6264

if (GPR_idx != NumGPRs) {

6265

SDValue Load =

6266

DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());

6267

MemOpChains.push_back(Load.getValue(1));

6268

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6269

ArgOffset += PtrByteSize;

6270

} else {

6271

ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;

6272

break;

6273

}

6274

}

6275

continue;

6276

}

6277

6278

switch (Arg.getSimpleValueType().SimpleTy) {

6279

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6279);

6280

case MVT::i1:

6281

case MVT::i32:

6282

case MVT::i64:

6283

if (Flags.isNest()) {

6284

// The 'nest' parameter, if any, is passed in R11.

6285

RegsToPass.push_back(std::make_pair(PPC::X11, Arg));

6286

break;

6287

}

6288

6289

// These can be scalar arguments or elements of an integer array type

6290

// passed directly. Clang may use those instead of "byval" aggregate

6291

// types to avoid forcing arguments to memory unnecessarily.

6292

if (GPR_idx != NumGPRs) {

6293

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

6294

} else {

6295

if (IsFastCall)

6296

ComputePtrOff();

6297

6298

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6299, __PRETTY_FUNCTION__))

6299

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6299, __PRETTY_FUNCTION__));

6300

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6301

true, CFlags.IsTailCall, false, MemOpChains,

6302

TailCallArguments, dl);

6303

if (IsFastCall)

6304

ArgOffset += PtrByteSize;

6305

}

6306

if (!IsFastCall)

6307

ArgOffset += PtrByteSize;

6308

break;

6309

case MVT::f32:

6310

case MVT::f64: {

6311

// These can be scalar arguments or elements of a float array type

6312

// passed directly. The latter are used to implement ELFv2 homogenous

6313

// float aggregates.

6314

6315

// Named arguments go into FPRs first, and once they overflow, the

6316

// remaining arguments go into GPRs and then the parameter save area.

6317

// Unnamed arguments for vararg functions always go to GPRs and

6318

// then the parameter save area. For now, put all arguments to vararg

6319

// routines always in both locations (FPR *and* GPR or stack slot).

6320

bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;

6321

bool NeededLoad = false;

6322

6323

// First load the argument into the next available FPR.

6324

if (FPR_idx != NumFPRs)

6325

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

6326

6327

// Next, load the argument into GPR or stack slot if needed.

6328

if (!NeedGPROrStack)

6329

;

6330

else if (GPR_idx != NumGPRs && !IsFastCall) {

6331

// FIXME: We may want to re-enable this for CallingConv::Fast on the P8

6332

// once we support fp <-> gpr moves.

6333

6334

// In the non-vararg case, this can only ever happen in the

6335

// presence of f32 array types, since otherwise we never run

6336

// out of FPRs before running out of GPRs.

6337

SDValue ArgVal;

6338

6339

// Double values are always passed in a single GPR.

6340

if (Arg.getValueType() != MVT::f32) {

6341

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);

6342

6343

// Non-array float values are extended and passed in a GPR.

6344

} else if (!Flags.isInConsecutiveRegs()) {

6345

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6346

ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

6347

6348

// If we have an array of floats, we collect every odd element

6349

// together with its predecessor into one GPR.

6350

} else if (ArgOffset % PtrByteSize != 0) {

6351

SDValue Lo, Hi;

6352

Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);

6353

Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6354

if (!isLittleEndian)

6355

std::swap(Lo, Hi);

6356

ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

6357

6358

// The final element, if even, goes into the first half of a GPR.

6359

} else if (Flags.isInConsecutiveRegsLast()) {

6360

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6361

ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

6362

if (!isLittleEndian)

6363

ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,

6364

DAG.getConstant(32, dl, MVT::i32));

6365

6366

// Non-final even elements are skipped; they will be handled

6367

// together the with subsequent argument on the next go-around.

6368

} else

6369

ArgVal = SDValue();

6370

6371

if (ArgVal.getNode())

6372

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));

6373

} else {

6374

if (IsFastCall)

6375

ComputePtrOff();

6376

6377

// Single-precision floating-point values are mapped to the

6378

// second (rightmost) word of the stack doubleword.

6379

if (Arg.getValueType() == MVT::f32 &&

6380

!isLittleEndian && !Flags.isInConsecutiveRegs()) {

6381

SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());

6382

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

6383

}

6384

6385

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6386, __PRETTY_FUNCTION__))

6386

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6386, __PRETTY_FUNCTION__));

6387

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6388

true, CFlags.IsTailCall, false, MemOpChains,

6389

TailCallArguments, dl);

6390

6391

NeededLoad = true;

6392

}

6393

// When passing an array of floats, the array occupies consecutive

6394

// space in the argument area; only round up to the next doubleword

6395

// at the end of the array. Otherwise, each float takes 8 bytes.

6396

if (!IsFastCall || NeededLoad) {

6397

ArgOffset += (Arg.getValueType() == MVT::f32 &&

6398

Flags.isInConsecutiveRegs()) ? 4 : 8;

6399

if (Flags.isInConsecutiveRegsLast())

6400

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

6401

}

6402

break;

6403

}

6404

case MVT::v4f32:

6405

case MVT::v4i32:

6406

case MVT::v8i16:

6407

case MVT::v16i8:

6408

case MVT::v2f64:

6409

case MVT::v2i64:

6410

case MVT::v1i128:

6411

case MVT::f128:

6412

// These can be scalar arguments or elements of a vector array type

6413

// passed directly. The latter are used to implement ELFv2 homogenous

6414

// vector aggregates.

6415

6416

// For a varargs call, named arguments go into VRs or on the stack as

6417

// usual; unnamed arguments always go to the stack or the corresponding

6418

// GPRs when within range. For now, we always put the value in both

6419

// locations (or even all three).

6420

if (CFlags.IsVarArg) {

6421

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6422, __PRETTY_FUNCTION__))

6422

"Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6422, __PRETTY_FUNCTION__));

6423

// We could elide this store in the case where the object fits

6424

// entirely in R registers. Maybe later.

6425

SDValue Store =

6426

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6427

MemOpChains.push_back(Store);

6428

if (VR_idx != NumVRs) {

6429

SDValue Load =

6430

DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());

6431

MemOpChains.push_back(Load.getValue(1));

6432

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

6433

}

6434

ArgOffset += 16;

6435

for (unsigned i=0; i<16; i+=PtrByteSize) {

6436

if (GPR_idx == NumGPRs)

6437

break;

6438

SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,

6439

DAG.getConstant(i, dl, PtrVT));

6440

SDValue Load =

6441

DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());

6442

MemOpChains.push_back(Load.getValue(1));

6443

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6444

}

6445

break;

6446

}

6447

6448

// Non-varargs Altivec params go into VRs or on the stack.

6449

if (VR_idx != NumVRs) {

6450

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

6451

} else {

6452

if (IsFastCall)

6453

ComputePtrOff();

6454

6455

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6456, __PRETTY_FUNCTION__))

6456

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6456, __PRETTY_FUNCTION__));

6457

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6458

true, CFlags.IsTailCall, true, MemOpChains,

6459

TailCallArguments, dl);

6460

if (IsFastCall)

6461

ArgOffset += 16;

6462

}

6463

6464

if (!IsFastCall)

6465

ArgOffset += 16;

6466

break;

6467

}

6468

}

6469

6470

assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6471, __PRETTY_FUNCTION__))

6471

"mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6471, __PRETTY_FUNCTION__));

6472

(void)NumBytesActuallyUsed;

6473

6474

if (!MemOpChains.empty())

6475

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

6476

6477

// Check if this is an indirect call (MTCTR/BCTRL).

6478

// See prepareDescriptorIndirectCall and buildCallOperands for more

6479

// information about calls through function pointers in the 64-bit SVR4 ABI.

6480

if (CFlags.IsIndirect) {

6481

// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the

6482

// caller in the TOC save area.

6483

if (isTOCSaveRestoreRequired(Subtarget)) {

6484

assert(!CFlags.IsTailCall && "Indirect tails calls not supported")((!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6484, __PRETTY_FUNCTION__));

6485

// Load r2 into a virtual register and store it to the TOC save area.

6486

setUsesTOCBasePtr(DAG);

6487

SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);

6488

// TOC save area offset.

6489

unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();

6490

SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

6491

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6492

Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,

6493

MachinePointerInfo::getStack(

6494

DAG.getMachineFunction(), TOCSaveOffset));

6495

}

6496

// In the ELFv2 ABI, R12 must contain the address of an indirect callee.

6497

// This does not mean the MTCTR instruction must use R12; it's easier

6498

// to model this as an extra parameter, so do that.

6499

if (isELFv2ABI && !CFlags.IsPatchPoint)

6500

RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));

6501

}

6502

6503

// Build a sequence of copy-to-reg nodes chained together with token chain

6504

// and flag operands which copy the outgoing args into the appropriate regs.

6505

SDValue InFlag;

6506

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

6507

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

6508

RegsToPass[i].second, InFlag);

6509

InFlag = Chain.getValue(1);

6510

}

6511

6512

if (CFlags.IsTailCall && !IsSibCall)

6513

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

6514

TailCallArguments);

6515

6516

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

6517

Callee, SPDiff, NumBytes, Ins, InVals, CB);

6518

}

6519

6520

SDValue PPCTargetLowering::LowerCall_Darwin(

6521

SDValue Chain, SDValue Callee, CallFlags CFlags,

6522

const SmallVectorImpl<ISD::OutputArg> &Outs,

6523

const SmallVectorImpl<SDValue> &OutVals,

6524

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

6525

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

6526

const CallBase *CB) const {

6527

unsigned NumOps = Outs.size();

6528

6529

EVT PtrVT = getPointerTy(DAG.getDataLayout());

6530

bool isPPC64 = PtrVT == MVT::i64;

6531

unsigned PtrByteSize = isPPC64 ? 8 : 4;

6532

6533

MachineFunction &MF = DAG.getMachineFunction();

6534

6535

// Mark this function as potentially containing a function that contains a

6536

// tail call. As a consequence the frame pointer will be used for dynamicalloc

6537

// and restoring the callers stack pointer in this functions epilog. This is

6538

// done because by tail calling the called function might overwrite the value

6539

// in this function's (MF) stack pointer stack slot 0(SP).

6540

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

6541

CFlags.CallConv == CallingConv::Fast)

6542

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

6543

6544

// Count how many bytes are to be pushed on the stack, including the linkage

6545

// area, and parameter passing area. We start with 24/48 bytes, which is

6546

// prereserved space for [SP][CR][LR][3 x unused].

6547

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

6548

unsigned NumBytes = LinkageSize;

6549

6550

// Add up all the space actually used.

6551

// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually

6552

// they all go in registers, but we must reserve stack space for them for

6553

// possible use by the caller. In varargs or 64-bit calls, parameters are

6554

// assigned stack space in order, with padding so Altivec parameters are

6555

// 16-byte aligned.

6556

unsigned nAltivecParamsAtEnd = 0;

6557

for (unsigned i = 0; i != NumOps; ++i) {

6558

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6559

EVT ArgVT = Outs[i].VT;

6560

// Varargs Altivec parameters are padded to a 16 byte boundary.

6561

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

6562

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

6563

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {

6564

if (!CFlags.IsVarArg && !isPPC64) {

6565

// Non-varargs Altivec parameters go after all the non-Altivec

6566

// parameters; handle those later so we know how much padding we need.

6567

nAltivecParamsAtEnd++;

6568

continue;

6569

}

6570

// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.

6571

NumBytes = ((NumBytes+15)/16)*16;

6572

}

6573

NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

6574

}

6575

6576

// Allow for Altivec parameters at the end, if needed.

6577

if (nAltivecParamsAtEnd) {

6578

NumBytes = ((NumBytes+15)/16)*16;

6579

NumBytes += 16*nAltivecParamsAtEnd;

6580

}

6581

6582

// The prolog code of the callee may store up to 8 GPR argument registers to

6583

// the stack, allowing va_start to index over them in memory if its varargs.

6584

// Because we cannot tell if this is needed on the caller side, we have to

6585

// conservatively assume that it is needed. As such, make sure we have at

6586

// least enough stack space for the caller to store the 8 GPRs.

6587

NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

6588

6589

// Tail call needs the stack to be aligned.

6590

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

6591

CFlags.CallConv == CallingConv::Fast)

6592

NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);

6593

6594

// Calculate by how many bytes the stack has to be adjusted in case of tail

6595

// call optimization.

6596

int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);

6597

6598

// To protect arguments on the stack from being clobbered in a tail call,

6599

// force all the loads to happen before doing any other lowering.

6600

if (CFlags.IsTailCall)

6601

Chain = DAG.getStackArgumentTokenFactor(Chain);

6602

6603

// Adjust the stack pointer for the new arguments...

6604

// These operations are automatically eliminated by the prolog/epilog pass

6605

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

6606

SDValue CallSeqStart = Chain;

6607

6608

// Load the return address and frame pointer so it can be move somewhere else

6609

// later.

6610

SDValue LROp, FPOp;

6611

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

6612

6613

// Set up a copy of the stack pointer for use loading and storing any

6614

// arguments that may not fit in the registers available for argument

6615

// passing.

6616

SDValue StackPtr;

6617

if (isPPC64)

6618

StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

6619

else

6620

StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

6621

6622

// Figure out which arguments are going to go in registers, and which in

6623

// memory. Also, if this is a vararg function, floating point operations

6624

// must be stored to our stack, and loaded into integer regs as well, if

6625

// any integer regs are available for argument passing.

6626

unsigned ArgOffset = LinkageSize;

6627

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

6628

6629

static const MCPhysReg GPR_32[] = { // 32-bit registers.

6630

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

6631

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

6632

};

6633

static const MCPhysReg GPR_64[] = { // 64-bit registers.

6634

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

6635

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

6636

};

6637

static const MCPhysReg VR[] = {

6638

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

6639

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

6640

};

6641

const unsigned NumGPRs = array_lengthof(GPR_32);

6642

const unsigned NumFPRs = 13;

6643

const unsigned NumVRs = array_lengthof(VR);

6644

6645

const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

6646

6647

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

6648

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

6649

6650

SmallVector<SDValue, 8> MemOpChains;

6651

for (unsigned i = 0; i != NumOps; ++i) {

6652

SDValue Arg = OutVals[i];

6653

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6654

6655

// PtrOff will be used to store the current argument to the stack if a

6656

// register cannot be found for it.

6657

SDValue PtrOff;

6658

6659

PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());

6660

6661

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6662

6663

// On PPC64, promote integers to 64-bit values.

6664

if (isPPC64 && Arg.getValueType() == MVT::i32) {

6665

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

6666

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

6667

Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);

6668

}

6669

6670

// FIXME memcpy is used way more than necessary. Correctness first.

6671

// Note: "by value" is code for passing a structure by value, not

6672

// basic types.

6673

if (Flags.isByVal()) {

6674

unsigned Size = Flags.getByValSize();

6675

// Very small objects are passed right-justified. Everything else is

6676

// passed left-justified.

6677

if (Size==1 || Size==2) {

6678

EVT VT = (Size==1) ? MVT::i8 : MVT::i16;

6679

if (GPR_idx != NumGPRs) {

6680

SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,

6681

MachinePointerInfo(), VT);

6682

MemOpChains.push_back(Load.getValue(1));

6683

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6684

6685

ArgOffset += PtrByteSize;

6686

} else {

6687

SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,

6688

PtrOff.getValueType());

6689

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6690

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6691

CallSeqStart,

6692

Flags, DAG, dl);

6693

ArgOffset += PtrByteSize;

6694

}

6695

continue;

6696

}

6697

// Copy entire object into memory. There are cases where gcc-generated

6698

// code assumes it is there, even if it could be put entirely into

6699

// registers. (This is not what the doc says.)

6700

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,

6701

CallSeqStart,

6702

Flags, DAG, dl);

6703

6704

// For small aggregates (Darwin only) and aggregates >= PtrByteSize,

6705

// copy the pieces of the object that fit into registers from the

6706

// parameter save area.

6707

for (unsigned j=0; j<Size; j+=PtrByteSize) {

6708

SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());

6709

SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

6710

if (GPR_idx != NumGPRs) {

6711

SDValue Load =

6712

DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());

6713

MemOpChains.push_back(Load.getValue(1));

6714

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6715

ArgOffset += PtrByteSize;

6716

} else {

6717

ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;

6718

break;

6719

}

6720

}

6721

continue;

6722

}

6723

6724

switch (Arg.getSimpleValueType().SimpleTy) {

6725

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6725);

6726

case MVT::i1:

6727

case MVT::i32:

6728

case MVT::i64:

6729

if (GPR_idx != NumGPRs) {

6730

if (Arg.getValueType() == MVT::i1)

6731

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);

6732

6733

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

6734

} else {

6735

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6736

isPPC64, CFlags.IsTailCall, false, MemOpChains,

6737

TailCallArguments, dl);

6738

}

6739

ArgOffset += PtrByteSize;

6740

break;

6741

case MVT::f32:

6742

case MVT::f64:

6743

if (FPR_idx != NumFPRs) {

6744

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

6745

6746

if (CFlags.IsVarArg) {

6747

SDValue Store =

6748

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6749

MemOpChains.push_back(Store);

6750

6751

// Float varargs are always shadowed in available integer registers

6752

if (GPR_idx != NumGPRs) {

6753

SDValue Load =

6754

DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());

6755

MemOpChains.push_back(Load.getValue(1));

6756

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6757

}

6758

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){

6759

SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());

6760

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

6761

SDValue Load =

6762

DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());

6763

MemOpChains.push_back(Load.getValue(1));

6764

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6765

}

6766

} else {

6767

// If we have any FPRs remaining, we may also have GPRs remaining.

6768

// Args passed in FPRs consume either 1 (f32) or 2 (f64) available

6769

// GPRs.

6770

if (GPR_idx != NumGPRs)

6771

++GPR_idx;

6772

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&

6773

!isPPC64) // PPC64 has 64-bit GPR's obviously :)

6774

++GPR_idx;

6775

}

6776

} else

6777

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6778

isPPC64, CFlags.IsTailCall, false, MemOpChains,

6779

TailCallArguments, dl);

6780

if (isPPC64)

6781

ArgOffset += 8;

6782

else

6783

ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;

6784

break;

6785

case MVT::v4f32:

6786

case MVT::v4i32:

6787

case MVT::v8i16:

6788

case MVT::v16i8:

6789

if (CFlags.IsVarArg) {

6790

// These go aligned on the stack, or in the corresponding R registers

6791

// when within range. The Darwin PPC ABI doc claims they also go in

6792

// V registers; in fact gcc does this only for arguments that are

6793

// prototyped, not for those that match the ... We do it for all

6794

// arguments, seems to work.

6795

while (ArgOffset % 16 !=0) {

6796

ArgOffset += PtrByteSize;

6797

if (GPR_idx != NumGPRs)

6798

GPR_idx++;

6799

}

6800

// We could elide this store in the case where the object fits

6801

// entirely in R registers. Maybe later.

6802

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,

6803

DAG.getConstant(ArgOffset, dl, PtrVT));

6804

SDValue Store =

6805

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6806

MemOpChains.push_back(Store);

6807

if (VR_idx != NumVRs) {

6808

SDValue Load =

6809

DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());

6810

MemOpChains.push_back(Load.getValue(1));

6811

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

6812

}

6813

ArgOffset += 16;

6814

for (unsigned i=0; i<16; i+=PtrByteSize) {

6815

if (GPR_idx == NumGPRs)

6816

break;

6817

SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,

6818

DAG.getConstant(i, dl, PtrVT));

6819

SDValue Load =

6820

DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());

6821

MemOpChains.push_back(Load.getValue(1));

6822

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6823

}

6824

break;

6825

}

6826

6827

// Non-varargs Altivec params generally go in registers, but have

6828

// stack space allocated at the end.

6829

if (VR_idx != NumVRs) {

6830

// Doesn't have GPR space allocated.

6831

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

6832

} else if (nAltivecParamsAtEnd==0) {

6833

// We are emitting Altivec params in order.

6834

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6835

isPPC64, CFlags.IsTailCall, true, MemOpChains,

6836

TailCallArguments, dl);

6837

ArgOffset += 16;

6838

}

6839

break;

6840

}

6841

}

6842

// If all Altivec parameters fit in registers, as they usually do,

6843

// they get stack space following the non-Altivec parameters. We

6844

// don't track this here because nobody below needs it.

6845

// If there are more Altivec parameters than fit in registers emit

6846

// the stores here.

6847

if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {

6848

unsigned j = 0;

6849

// Offset is aligned; skip 1st 12 params which go in V registers.

6850

ArgOffset = ((ArgOffset+15)/16)*16;

6851

ArgOffset += 12*16;

6852

for (unsigned i = 0; i != NumOps; ++i) {

6853

SDValue Arg = OutVals[i];

6854

EVT ArgType = Outs[i].VT;

6855

if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||

6856

ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {

6857

if (++j > NumVRs) {

6858

SDValue PtrOff;

6859

// We are emitting Altivec params in order.

6860

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6861

isPPC64, CFlags.IsTailCall, true, MemOpChains,

6862

TailCallArguments, dl);

6863

ArgOffset += 16;

6864

}

6865

}

6866

}

6867

}

6868

6869

if (!MemOpChains.empty())

6870

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

6871

6872

// On Darwin, R12 must contain the address of an indirect callee. This does

6873

// not mean the MTCTR instruction must use R12; it's easier to model this as

6874

// an extra parameter, so do that.

6875

if (CFlags.IsIndirect) {

6876

assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6876, __PRETTY_FUNCTION__));

6877

RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :

6878

PPC::R12), Callee));

6879

}

6880

6881

// Build a sequence of copy-to-reg nodes chained together with token chain

6882

// and flag operands which copy the outgoing args into the appropriate regs.

6883

SDValue InFlag;

6884

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

6885

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

6886

RegsToPass[i].second, InFlag);

6887

InFlag = Chain.getValue(1);

6888

}

6889

6890

if (CFlags.IsTailCall)

6891

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

6892

TailCallArguments);

6893

6894

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

6895

Callee, SPDiff, NumBytes, Ins, InVals, CB);

6896

}

6897

6898

static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,

6899

CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,

6900

CCState &State) {

6901

6902

const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(

6903

State.getMachineFunction().getSubtarget());

6904

const bool IsPPC64 = Subtarget.isPPC64();

6905

const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);

6906

const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

6907

6908

assert((!ValVT.isInteger() ||(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__))

6909

(ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__))

6910

"Integer argument exceeds register size: should have been legalized")(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__));

6911

6912

if (ValVT == MVT::f128)

6913

report_fatal_error("f128 is unimplemented on AIX.");

6914

6915

if (ArgFlags.isNest())

6916

report_fatal_error("Nest arguments are unimplemented.");

6917

6918

if (ValVT.isVector() || LocVT.isVector())

6919

report_fatal_error("Vector arguments are unimplemented on AIX.");

6920

6921

static const MCPhysReg GPR_32[] = {// 32-bit registers.

6922

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

6923

PPC::R7, PPC::R8, PPC::R9, PPC::R10};

6924

static const MCPhysReg GPR_64[] = {// 64-bit registers.

6925

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

6926

PPC::X7, PPC::X8, PPC::X9, PPC::X10};

6927

6928

if (ArgFlags.isByVal()) {

6929

if (ArgFlags.getNonZeroByValAlign() > PtrAlign)

6930

report_fatal_error("Pass-by-value arguments with alignment greater than "

6931

"register width are not supported.");

6932

6933

const unsigned ByValSize = ArgFlags.getByValSize();

6934

6935

// An empty aggregate parameter takes up no storage and no registers,

6936

// but needs a MemLoc for a stack slot for the formal arguments side.

6937

if (ByValSize == 0) {

6938

State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,

6939

State.getNextStackOffset(), RegVT,

6940

LocInfo));

6941

return false;

6942

}

6943

6944

const unsigned StackSize = alignTo(ByValSize, PtrAlign);

6945

unsigned Offset = State.AllocateStack(StackSize, PtrAlign);

6946

for (const unsigned E = Offset + StackSize; Offset < E;

6947

Offset += PtrAlign.value()) {

6948

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))

6949

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));

6950

else {

6951

State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,

6952

Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,

6953

LocInfo));

6954

break;

6955

}

6956

}

6957

return false;

6958

}

6959

6960

// Arguments always reserve parameter save area.

6961

switch (ValVT.SimpleTy) {

6962

default:

6963

report_fatal_error("Unhandled value type for argument.");

6964

case MVT::i64:

6965

// i64 arguments should have been split to i32 for PPC32.

6966

assert(IsPPC64 && "PPC32 should have split i64 values.")((IsPPC64 && "PPC32 should have split i64 values.") ?
static_cast<void> (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6966, __PRETTY_FUNCTION__));

6967

LLVM_FALLTHROUGH[[gnu::fallthrough]];

6968

case MVT::i1:

6969

case MVT::i32: {

6970

const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);

6971

// AIX integer arguments are always passed in register width.

6972

if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())

6973

LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt

6974

: CCValAssign::LocInfo::ZExt;

6975

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))

6976

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));

6977

else

6978

State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));

6979

6980

return false;

6981

}

6982

case MVT::f32:

6983

case MVT::f64: {

6984

// Parameter save area (PSA) is reserved even if the float passes in fpr.

6985

const unsigned StoreSize = LocVT.getStoreSize();

6986

// Floats are always 4-byte aligned in the PSA on AIX.

6987

// This includes f64 in 64-bit mode for ABI compatibility.

6988

const unsigned Offset =

6989

State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));

6990

unsigned FReg = State.AllocateReg(FPR);

6991

if (FReg)

6992

State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));

6993

6994

// Reserve and initialize GPRs or initialize the PSA as required.

6995

for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {

6996

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {

6997

assert(FReg && "An FPR should be available when a GPR is reserved.")((FReg && "An FPR should be available when a GPR is reserved."
) ? static_cast<void> (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6997, __PRETTY_FUNCTION__));

6998

if (State.isVarArg()) {

6999

// Successfully reserved GPRs are only initialized for vararg calls.

7000

// Custom handling is required for:

7001

// f64 in PPC32 needs to be split into 2 GPRs.

7002

// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.

7003

State.addLoc(

7004

CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));

7005

}

7006

} else {

7007

// If there are insufficient GPRs, the PSA needs to be initialized.

7008

// Initialization occurs even if an FPR was initialized for

7009

// compatibility with the AIX XL compiler. The full memory for the

7010

// argument will be initialized even if a prior word is saved in GPR.

7011

// A custom memLoc is used when the argument also passes in FPR so

7012

// that the callee handling can skip over it easily.

7013

State.addLoc(

7014

FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,

7015

LocInfo)

7016

: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));

7017

break;

7018

}

7019

}

7020

7021

return false;

7022

}

7023

}

7024

return true;

7025

}

7026

7027

static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,

7028

bool IsPPC64) {

7029

assert((IsPPC64 || SVT != MVT::i64) &&(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7030, __PRETTY_FUNCTION__))

7030

"i64 should have been split for 32-bit codegen.")(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7030, __PRETTY_FUNCTION__));

7031

7032

switch (SVT) {

7033

default:

7034

report_fatal_error("Unexpected value type for formal argument");

7035

case MVT::i1:

7036

case MVT::i32:

7037

case MVT::i64:

7038

return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

7039

case MVT::f32:

7040

return &PPC::F4RCRegClass;

7041

case MVT::f64:

7042

return &PPC::F8RCRegClass;

7043

}

7044

}

7045

7046

static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,

7047

SelectionDAG &DAG, SDValue ArgValue,

7048

MVT LocVT, const SDLoc &dl) {

7049

assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())((ValVT.isScalarInteger() && LocVT.isScalarInteger())
? static_cast<void> (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7049, __PRETTY_FUNCTION__));

7050

assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())((ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())
? static_cast<void> (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7050, __PRETTY_FUNCTION__));

7051

7052

if (Flags.isSExt())

7053

ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,

7054

DAG.getValueType(ValVT));

7055

else if (Flags.isZExt())

7056

ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,

7057

DAG.getValueType(ValVT));

7058

7059

return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);

7060

}

7061

7062

static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {

7063

const unsigned LASize = FL->getLinkageSize();

7064

7065

if (PPC::GPRCRegClass.contains(Reg)) {

7066

assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7067, __PRETTY_FUNCTION__))

7067

"Reg must be a valid argument register!")((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7067, __PRETTY_FUNCTION__));

7068

return LASize + 4 * (Reg - PPC::R3);

7069

}

7070

7071

if (PPC::G8RCRegClass.contains(Reg)) {

7072

assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7073, __PRETTY_FUNCTION__))

7073

"Reg must be a valid argument register!")((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7073, __PRETTY_FUNCTION__));

7074

return LASize + 8 * (Reg - PPC::X3);

7075

}

7076

7077

llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7077);

7078

}

7079

7080

// AIX ABI Stack Frame Layout:

7081

//

7082

// Low Memory +--------------------------------------------+

7083

// SP +---> | Back chain | ---+

7084

// | +--------------------------------------------+ |

7085

// | | Saved Condition Register | |

7086

// | +--------------------------------------------+ |

7087

// | | Saved Linkage Register | |

7088

// | +--------------------------------------------+ | Linkage Area

7089

// | | Reserved for compilers | |

7090

// | +--------------------------------------------+ |

7091

// | | Reserved for binders | |

7092

// | +--------------------------------------------+ |

7093

// | | Saved TOC pointer | ---+

7094

// | +--------------------------------------------+

7095

// | | Parameter save area |

7096

// | +--------------------------------------------+

7097

// | | Alloca space |

7098

// | +--------------------------------------------+

7099

// | | Local variable space |

7100

// | +--------------------------------------------+

7101

// | | Float/int conversion temporary |

7102

// | +--------------------------------------------+

7103

// | | Save area for AltiVec registers |

7104

// | +--------------------------------------------+

7105

// | | AltiVec alignment padding |

7106

// | +--------------------------------------------+

7107

// | | Save area for VRSAVE register |

7108

// | +--------------------------------------------+

7109

// | | Save area for General Purpose registers |

7110

// | +--------------------------------------------+

7111

// | | Save area for Floating Point registers |

7112

// | +--------------------------------------------+

7113

// +---- | Back chain |

7114

// High Memory +--------------------------------------------+

7115

//

7116

// Specifications:

7117

// AIX 7.2 Assembler Language Reference

7118

// Subroutine linkage convention

7119

7120

SDValue PPCTargetLowering::LowerFormalArguments_AIX(

7121

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

7122

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

7123

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

7124

7125

assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__))

7126

CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__))

7127

"Unexpected calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__));

7128

7129

if (getTargetMachine().Options.GuaranteedTailCallOpt)

7130

report_fatal_error("Tail call support is unimplemented on AIX.");

7131

7132

if (useSoftFloat())

7133

report_fatal_error("Soft float support is unimplemented on AIX.");

7134

7135

const PPCSubtarget &Subtarget =

7136

static_cast<const PPCSubtarget &>(DAG.getSubtarget());

7137

7138

const bool IsPPC64 = Subtarget.isPPC64();

7139

const unsigned PtrByteSize = IsPPC64 ? 8 : 4;

7140

7141

// Assign locations to all of the incoming arguments.

7142

SmallVector<CCValAssign, 16> ArgLocs;

7143

MachineFunction &MF = DAG.getMachineFunction();

7144

MachineFrameInfo &MFI = MF.getFrameInfo();

7145

CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

7146

7147

const EVT PtrVT = getPointerTy(MF.getDataLayout());

7148

// Reserve space for the linkage area on the stack.

7149

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

7150

CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));

7151

CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);

7152

7153

SmallVector<SDValue, 8> MemOps;

7154

7155

for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {

7156

CCValAssign &VA = ArgLocs[I++];

7157

MVT LocVT = VA.getLocVT();

7158

ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;

7159

7160

// For compatibility with the AIX XL compiler, the float args in the

7161

// parameter save area are initialized even if the argument is available

7162

// in register. The caller is required to initialize both the register

7163

// and memory, however, the callee can choose to expect it in either.

7164

// The memloc is dismissed here because the argument is retrieved from

7165

// the register.

7166

if (VA.isMemLoc() && VA.needsCustom())

7167

continue;

7168

7169

if (Flags.isByVal() && VA.isMemLoc()) {

7170

const unsigned Size =

7171

alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,

7172

PtrByteSize);

7173

const int FI = MF.getFrameInfo().CreateFixedObject(

7174

Size, VA.getLocMemOffset(), /* IsImmutable */ false,

7175

/* IsAliased */ true);

7176

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7177

InVals.push_back(FIN);

7178

7179

continue;

7180

}

7181

7182

if (Flags.isByVal()) {

7183

assert(VA.isRegLoc() && "MemLocs should already be handled.")((VA.isRegLoc() && "MemLocs should already be handled."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7183, __PRETTY_FUNCTION__));

7184

7185

const MCPhysReg ArgReg = VA.getLocReg();

7186

const PPCFrameLowering *FL = Subtarget.getFrameLowering();

7187

7188

if (Flags.getNonZeroByValAlign() > PtrByteSize)

7189

report_fatal_error("Over aligned byvals not supported yet.");

7190

7191

const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);

7192

const int FI = MF.getFrameInfo().CreateFixedObject(

7193

StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,

7194

/* IsAliased */ true);

7195

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7196

InVals.push_back(FIN);

7197

7198

// Add live ins for all the RegLocs for the same ByVal.

7199

const TargetRegisterClass *RegClass =

7200

IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

7201

7202

auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,

7203

unsigned Offset) {

7204

const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);

7205

// Since the callers side has left justified the aggregate in the

7206

// register, we can simply store the entire register into the stack

7207

// slot.

7208

SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);

7209

// The store to the fixedstack object is needed becuase accessing a

7210

// field of the ByVal will use a gep and load. Ideally we will optimize

7211

// to extracting the value from the register directly, and elide the

7212

// stores when the arguments address is not taken, but that will need to

7213

// be future work.

7214

SDValue Store = DAG.getStore(

7215

CopyFrom.getValue(1), dl, CopyFrom,

7216

DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),

7217

MachinePointerInfo::getFixedStack(MF, FI, Offset));

7218

7219

MemOps.push_back(Store);

7220

};

7221

7222

unsigned Offset = 0;

7223

HandleRegLoc(VA.getLocReg(), Offset);

7224

Offset += PtrByteSize;

7225

for (; Offset != StackSize && ArgLocs[I].isRegLoc();

7226

Offset += PtrByteSize) {

7227

assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7228, __PRETTY_FUNCTION__))

7228

"RegLocs should be for ByVal argument.")((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7228, __PRETTY_FUNCTION__));

7229

7230

const CCValAssign RL = ArgLocs[I++];

7231

HandleRegLoc(RL.getLocReg(), Offset);

7232

}

7233

7234

if (Offset != StackSize) {

7235

assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))

7236

"Expected MemLoc for remaining bytes.")((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__));

7237

assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")((ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7237, __PRETTY_FUNCTION__));

7238

// Consume the MemLoc.The InVal has already been emitted, so nothing

7239

// more needs to be done.

7240

++I;

7241

}

7242

7243

continue;

7244

}

7245

7246

EVT ValVT = VA.getValVT();

7247

if (VA.isRegLoc() && !VA.needsCustom()) {

7248

MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;

7249

unsigned VReg =

7250

MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));

7251

SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);

7252

if (ValVT.isScalarInteger() &&

7253

(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {

7254

ArgValue =

7255

truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);

7256

}

7257

InVals.push_back(ArgValue);

7258

continue;

7259

}

7260

if (VA.isMemLoc()) {

7261

const unsigned LocSize = LocVT.getStoreSize();

7262

const unsigned ValSize = ValVT.getStoreSize();

7263

assert((ValSize <= LocSize) &&(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7264, __PRETTY_FUNCTION__))

7264

"Object size is larger than size of MemLoc")(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7264, __PRETTY_FUNCTION__));

7265

int CurArgOffset = VA.getLocMemOffset();

7266

// Objects are right-justified because AIX is big-endian.

7267

if (LocSize > ValSize)

7268

CurArgOffset += LocSize - ValSize;

7269

// Potential tail calls could cause overwriting of argument stack slots.

7270

const bool IsImmutable =

7271

!(getTargetMachine().Options.GuaranteedTailCallOpt &&

7272

(CallConv == CallingConv::Fast));

7273

int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);

7274

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7275

SDValue ArgValue =

7276

DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());

7277

InVals.push_back(ArgValue);

7278

continue;

7279

}

7280

}

7281

7282

// On AIX a minimum of 8 words is saved to the parameter save area.

7283

const unsigned MinParameterSaveArea = 8 * PtrByteSize;

7284

// Area that is at least reserved in the caller of this function.

7285

unsigned CallerReservedArea =

7286

std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);

7287

7288

// Set the size that is at least reserved in caller of this function. Tail

7289

// call optimized function's reserved stack space needs to be aligned so

7290

// that taking the difference between two stack areas will result in an

7291

// aligned stack.

7292

CallerReservedArea =

7293

EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);

7294

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

7295

FuncInfo->setMinReservedArea(CallerReservedArea);

7296

7297

if (isVarArg) {

7298

FuncInfo->setVarArgsFrameIndex(

7299

MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));

7300

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

7301

7302

static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,

7303

PPC::R7, PPC::R8, PPC::R9, PPC::R10};

7304

7305

static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,

7306

PPC::X7, PPC::X8, PPC::X9, PPC::X10};

7307

const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);

7308

7309

// The fixed integer arguments of a variadic function are stored to the

7310

// VarArgsFrameIndex on the stack so that they may be loaded by

7311

// dereferencing the result of va_next.

7312

for (unsigned GPRIndex =

7313

(CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;

7314

GPRIndex < NumGPArgRegs; ++GPRIndex) {

7315

7316

const unsigned VReg =

7317

IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)

7318

: MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);

7319

7320

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

7321

SDValue Store =

7322

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

7323

MemOps.push_back(Store);

7324

// Increment the address for the next argument to store.

7325

SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);

7326

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

7327

}

7328

}

7329

7330

if (!MemOps.empty())

7331

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

7332

7333

return Chain;

7334

}

7335

7336

SDValue PPCTargetLowering::LowerCall_AIX(

7337

SDValue Chain, SDValue Callee, CallFlags CFlags,

7338

const SmallVectorImpl<ISD::OutputArg> &Outs,

7339

const SmallVectorImpl<SDValue> &OutVals,

7340

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

7341

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

7342

const CallBase *CB) const {

7343

// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the

7344

// AIX ABI stack frame layout.

7345

7346

assert((CFlags.CallConv == CallingConv::C ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))

7347

CFlags.CallConv == CallingConv::Cold ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))

7348

CFlags.CallConv == CallingConv::Fast) &&(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))

7349

"Unexpected calling convention!")(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__));

7350

7351

if (CFlags.IsPatchPoint)

7352

report_fatal_error("This call type is unimplemented on AIX.");

7353

7354

const PPCSubtarget& Subtarget =

7355

static_cast<const PPCSubtarget&>(DAG.getSubtarget());

7356

if (Subtarget.hasAltivec())

7357

report_fatal_error("Altivec support is unimplemented on AIX.");

7358

7359

MachineFunction &MF = DAG.getMachineFunction();

7360

SmallVector<CCValAssign, 16> ArgLocs;

7361

CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,

7362

*DAG.getContext());

7363

7364

// Reserve space for the linkage save area (LSA) on the stack.

7365

// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:

7366

// [SP][CR][LR][2 x reserved][TOC].

7367

// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.

7368

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

7369

const bool IsPPC64 = Subtarget.isPPC64();

7370

const EVT PtrVT = getPointerTy(DAG.getDataLayout());

7371

const unsigned PtrByteSize = IsPPC64 ? 8 : 4;

7372

CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));

7373

CCInfo.AnalyzeCallOperands(Outs, CC_AIX);

7374

7375

// The prolog code of the callee may store up to 8 GPR argument registers to

7376

// the stack, allowing va_start to index over them in memory if the callee

7377

// is variadic.

7378

// Because we cannot tell if this is needed on the caller side, we have to

7379

// conservatively assume that it is needed. As such, make sure we have at

7380

// least enough stack space for the caller to store the 8 GPRs.

7381

const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;

7382

const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,

7383

CCInfo.getNextStackOffset());

7384

7385

// Adjust the stack pointer for the new arguments...

7386

// These operations are automatically eliminated by the prolog/epilog pass.

7387

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

7388

SDValue CallSeqStart = Chain;

7389

7390

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

7391

SmallVector<SDValue, 8> MemOpChains;

7392

7393

// Set up a copy of the stack pointer for loading and storing any

7394

// arguments that may not fit in the registers available for argument

7395

// passing.

7396

const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)

7397

: DAG.getRegister(PPC::R1, MVT::i32);

7398

7399

for (unsigned I = 0, E = ArgLocs.size(); I != E;) {

7400

const unsigned ValNo = ArgLocs[I].getValNo();

7401

SDValue Arg = OutVals[ValNo];

7402

ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;

7403

7404

if (Flags.isByVal()) {

7405

const unsigned ByValSize = Flags.getByValSize();

7406

7407

// Nothing to do for zero-sized ByVals on the caller side.

7408

if (!ByValSize) {

7409

++I;

7410

continue;

7411

}

7412

7413

auto GetLoad = [&](EVT VT, unsigned LoadOffset) {

7414

return DAG.getExtLoad(

7415

ISD::ZEXTLOAD, dl, PtrVT, Chain,

7416

(LoadOffset != 0)

7417

? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))

7418

: Arg,

7419

MachinePointerInfo(), VT);

7420

};

7421

7422

unsigned LoadOffset = 0;

7423

7424

// Initialize registers, which are fully occupied by the by-val argument.

7425

while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {

7426

SDValue Load = GetLoad(PtrVT, LoadOffset);

7427

MemOpChains.push_back(Load.getValue(1));

7428

LoadOffset += PtrByteSize;

7429

const CCValAssign &ByValVA = ArgLocs[I++];

7430

assert(ByValVA.getValNo() == ValNo &&((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7431, __PRETTY_FUNCTION__))

7431

"Unexpected location for pass-by-value argument.")((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7431, __PRETTY_FUNCTION__));

7432

RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));

7433

}

7434

7435

if (LoadOffset == ByValSize)

7436

continue;

7437

7438

// There must be one more loc to handle the remainder.

7439

assert(ArgLocs[I].getValNo() == ValNo &&((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7440, __PRETTY_FUNCTION__))

7440

"Expected additional location for by-value argument.")((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7440, __PRETTY_FUNCTION__));

7441

7442

if (ArgLocs[I].isMemLoc()) {

7443

assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")((LoadOffset < ByValSize && "Unexpected memloc for by-val arg."
) ? static_cast<void> (0) : __assert_fail ("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7443, __PRETTY_FUNCTION__));

7444

const CCValAssign &ByValVA = ArgLocs[I++];

7445

ISD::ArgFlagsTy MemcpyFlags = Flags;

7446

// Only memcpy the bytes that don't pass in register.

7447

MemcpyFlags.setByValSize(ByValSize - LoadOffset);

7448

Chain = CallSeqStart = createMemcpyOutsideCallSeq(

7449

(LoadOffset != 0)

7450

? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))

7451

: Arg,

7452

DAG.getObjectPtrOffset(dl, StackPtr,

7453

TypeSize::Fixed(ByValVA.getLocMemOffset())),

7454

CallSeqStart, MemcpyFlags, DAG, dl);

7455

continue;

7456

}

7457

7458

// Initialize the final register residue.

7459

// Any residue that occupies the final by-val arg register must be

7460

// left-justified on AIX. Loads must be a power-of-2 size and cannot be

7461

// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,

7462

// 2 and 1 byte loads.

7463

const unsigned ResidueBytes = ByValSize % PtrByteSize;

7464

assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7465, __PRETTY_FUNCTION__))

7465

"Unexpected register residue for by-value argument.")((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7465, __PRETTY_FUNCTION__));

7466

SDValue ResidueVal;

7467

for (unsigned Bytes = 0; Bytes != ResidueBytes;) {

7468

const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);

7469

const MVT VT =

7470

N == 1 ? MVT::i8

7471

: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));

7472

SDValue Load = GetLoad(VT, LoadOffset);

7473

MemOpChains.push_back(Load.getValue(1));

7474

LoadOffset += N;

7475

Bytes += N;

7476

7477

// By-val arguments are passed left-justfied in register.

7478

// Every load here needs to be shifted, otherwise a full register load

7479

// should have been used.

7480

assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__))

7481

"Unexpected load emitted during handling of pass-by-value "((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__))

7482

"argument.")((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__));

7483

unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);

7484

EVT ShiftAmountTy =

7485

getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());

7486

SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);

7487

SDValue ShiftedLoad =

7488

DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);

7489

ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,

7490

ShiftedLoad)

7491

: ShiftedLoad;

7492

}

7493

7494

const CCValAssign &ByValVA = ArgLocs[I++];

7495

RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));

7496

continue;

7497

}

7498

7499

CCValAssign &VA = ArgLocs[I++];

7500

const MVT LocVT = VA.getLocVT();

7501

const MVT ValVT = VA.getValVT();

7502

7503

switch (VA.getLocInfo()) {

7504

default:

7505

report_fatal_error("Unexpected argument extension type.");

7506

case CCValAssign::Full:

7507

break;

7508

case CCValAssign::ZExt:

7509

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

7510

break;

7511

case CCValAssign::SExt:

7512

Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

7513

break;

7514

}

7515

7516

if (VA.isRegLoc() && !VA.needsCustom()) {

7517

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

7518

continue;

7519

}

7520

7521

if (VA.isMemLoc()) {

7522

SDValue PtrOff =

7523

DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());

7524

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

7525

MemOpChains.push_back(

7526

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

7527

7528

continue;

7529

}

7530

7531

// Custom handling is used for GPR initializations for vararg float

7532

// arguments.

7533

assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))

7534

ValVT.isFloatingPoint() && LocVT.isInteger() &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))

7535

"Unexpected register handling for calling convention.")((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__));

7536

7537

SDValue ArgAsInt =

7538

DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);

7539

7540

if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())

7541

// f32 in 32-bit GPR

7542

// f64 in 64-bit GPR

7543

RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));

7544

else if (Arg.getValueType().getFixedSizeInBits() <

7545

LocVT.getFixedSizeInBits())

7546

// f32 in 64-bit GPR.

7547

RegsToPass.push_back(std::make_pair(

7548

VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));

7549

else {

7550

// f64 in two 32-bit GPRs

7551

// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.

7552

assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7553, __PRETTY_FUNCTION__))

7553

"Unexpected custom register for argument!")((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7553, __PRETTY_FUNCTION__));

7554

CCValAssign &GPR1 = VA;

7555

SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,

7556

DAG.getConstant(32, dl, MVT::i8));

7557

RegsToPass.push_back(std::make_pair(

7558

GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));

7559

7560

if (I != E) {

7561

// If only 1 GPR was available, there will only be one custom GPR and

7562

// the argument will also pass in memory.

7563

CCValAssign &PeekArg = ArgLocs[I];

7564

if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {

7565

assert(PeekArg.needsCustom() && "A second custom GPR is expected.")((PeekArg.needsCustom() && "A second custom GPR is expected."
) ? static_cast<void> (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7565, __PRETTY_FUNCTION__));

7566

CCValAssign &GPR2 = ArgLocs[I++];

7567

RegsToPass.push_back(std::make_pair(

7568

GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));

7569

}

7570

}

7571

}

7572

}

7573

7574

if (!MemOpChains.empty())

7575

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

7576

7577

// For indirect calls, we need to save the TOC base to the stack for

7578

// restoration after the call.

7579

if (CFlags.IsIndirect) {

7580

assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7580, __PRETTY_FUNCTION__));

7581

const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();

7582

const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();

7583

const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

7584

const unsigned TOCSaveOffset =

7585

Subtarget.getFrameLowering()->getTOCSaveOffset();

7586

7587

setUsesTOCBasePtr(DAG);

7588

SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);

7589

SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

7590

SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);

7591

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

7592

Chain = DAG.getStore(

7593

Val.getValue(1), dl, Val, AddPtr,

7594

MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));

7595

}

7596

7597

// Build a sequence of copy-to-reg nodes chained together with token chain

7598

// and flag operands which copy the outgoing args into the appropriate regs.

7599

SDValue InFlag;

7600

for (auto Reg : RegsToPass) {

7601

Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);

7602

InFlag = Chain.getValue(1);

7603

}

7604

7605

const int SPDiff = 0;

7606

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

7607

Callee, SPDiff, NumBytes, Ins, InVals, CB);

7608

}

7609

7610

bool

7611

PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,

7612

MachineFunction &MF, bool isVarArg,

7613

const SmallVectorImpl<ISD::OutputArg> &Outs,

7614

LLVMContext &Context) const {

7615

SmallVector<CCValAssign, 16> RVLocs;

7616

CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);

7617

return CCInfo.CheckReturn(

7618

Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

7619

? RetCC_PPC_Cold

7620

: RetCC_PPC);

7621

}

7622

7623

SDValue

7624

PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,

7625

bool isVarArg,

7626

const SmallVectorImpl<ISD::OutputArg> &Outs,

7627

const SmallVectorImpl<SDValue> &OutVals,

7628

const SDLoc &dl, SelectionDAG &DAG) const {

7629

SmallVector<CCValAssign, 16> RVLocs;

7630

CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

7631

*DAG.getContext());

7632

CCInfo.AnalyzeReturn(Outs,

7633

(Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

7634

? RetCC_PPC_Cold

7635

: RetCC_PPC);

7636

7637

SDValue Flag;

7638

SmallVector<SDValue, 4> RetOps(1, Chain);

7639

7640

// Copy the result values into the output registers.

7641

for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {

7642

CCValAssign &VA = RVLocs[i];

7643

assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7643, __PRETTY_FUNCTION__));

7644

7645

SDValue Arg = OutVals[RealResIdx];

7646

7647

if (Subtarget.isAIXABI() &&

7648

(VA.getLocVT().isVector() || VA.getValVT().isVector()))

7649

report_fatal_error("Returning vector types not yet supported on AIX.");

7650

7651

switch (VA.getLocInfo()) {

7652

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7652);

7653

case CCValAssign::Full: break;

7654

case CCValAssign::AExt:

7655

Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);

7656

break;

7657

case CCValAssign::ZExt:

7658

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

7659

break;

7660

case CCValAssign::SExt:

7661

Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

7662

break;

7663

}

7664

if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {

7665

bool isLittleEndian = Subtarget.isLittleEndian();

7666

// Legalize ret f64 -> ret 2 x i32.

7667

SDValue SVal =

7668

DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

7669

DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));

7670

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);

7671

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

7672

SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

7673

DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));

7674

Flag = Chain.getValue(1);

7675

VA = RVLocs[++i]; // skip ahead to next loc

7676

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);

7677

} else

7678

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);

7679

Flag = Chain.getValue(1);

7680

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

7681

}

7682

7683

RetOps[0] = Chain; // Update chain.

7684

7685

// Add the flag if we have it.

7686

if (Flag.getNode())

7687

RetOps.push_back(Flag);

7688

7689

return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);

7690

}

7691

7692

SDValue

7693

PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,

7694

SelectionDAG &DAG) const {

7695

SDLoc dl(Op);

7696

7697

// Get the correct type for integers.

7698

EVT IntVT = Op.getValueType();

7699

7700

// Get the inputs.

7701

SDValue Chain = Op.getOperand(0);

7702

SDValue FPSIdx = getFramePointerFrameIndex(DAG);

7703

// Build a DYNAREAOFFSET node.

7704

SDValue Ops[2] = {Chain, FPSIdx};

7705

SDVTList VTs = DAG.getVTList(IntVT);

7706

return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);

7707

}

7708

7709

SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,

7710

SelectionDAG &DAG) const {

7711

// When we pop the dynamic allocation we need to restore the SP link.

7712

SDLoc dl(Op);

7713

7714

// Get the correct type for pointers.

7715

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7716

7717

// Construct the stack pointer operand.

7718

bool isPPC64 = Subtarget.isPPC64();

7719

unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;

7720

SDValue StackPtr = DAG.getRegister(SP, PtrVT);

7721

7722

// Get the operands for the STACKRESTORE.

7723

SDValue Chain = Op.getOperand(0);

7724

SDValue SaveSP = Op.getOperand(1);

7725

7726

// Load the old link SP.

7727

SDValue LoadLinkSP =

7728

DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());

7729

7730

// Restore the stack pointer.

7731

Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);

7732

7733

// Store the old link SP.

7734

return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());

7735

}

7736

7737

SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {

7738

MachineFunction &MF = DAG.getMachineFunction();

7739

bool isPPC64 = Subtarget.isPPC64();

7740

EVT PtrVT = getPointerTy(MF.getDataLayout());

7741

7742

// Get current frame pointer save index. The users of this index will be

7743

// primarily DYNALLOC instructions.

7744

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

7745

int RASI = FI->getReturnAddrSaveIndex();

7746

7747

// If the frame pointer save index hasn't been defined yet.

7748

if (!RASI) {

7749

// Find out what the fix offset of the frame pointer save area.

7750

int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();

7751

// Allocate the frame index for frame pointer save area.

7752

RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);

7753

// Save the result.

7754

FI->setReturnAddrSaveIndex(RASI);

7755

}

7756

return DAG.getFrameIndex(RASI, PtrVT);

7757

}

7758

7759

SDValue

7760

PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {

7761

MachineFunction &MF = DAG.getMachineFunction();

7762

bool isPPC64 = Subtarget.isPPC64();

7763

EVT PtrVT = getPointerTy(MF.getDataLayout());

7764

7765

// Get current frame pointer save index. The users of this index will be

7766

// primarily DYNALLOC instructions.

7767

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

7768

int FPSI = FI->getFramePointerSaveIndex();

7769

7770

// If the frame pointer save index hasn't been defined yet.

7771

if (!FPSI) {

7772

// Find out what the fix offset of the frame pointer save area.

7773

int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();

7774

// Allocate the frame index for frame pointer save area.

7775

FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);

7776

// Save the result.

7777

FI->setFramePointerSaveIndex(FPSI);

7778

}

7779

return DAG.getFrameIndex(FPSI, PtrVT);

7780

}

7781

7782

SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,

7783

SelectionDAG &DAG) const {

7784

MachineFunction &MF = DAG.getMachineFunction();

7785

// Get the inputs.

7786

SDValue Chain = Op.getOperand(0);

7787

SDValue Size = Op.getOperand(1);

7788

SDLoc dl(Op);

7789

7790

// Get the correct type for pointers.

7791

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7792

// Negate the size.

7793

SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,

7794

DAG.getConstant(0, dl, PtrVT), Size);

7795

// Construct a node for the frame pointer save index.

7796

SDValue FPSIdx = getFramePointerFrameIndex(DAG);

7797

SDValue Ops[3] = { Chain, NegSize, FPSIdx };

7798

SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);

7799

if (hasInlineStackProbe(MF))

7800

return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);

7801

return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);

7802

}

7803

7804

SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,

7805

SelectionDAG &DAG) const {

7806

MachineFunction &MF = DAG.getMachineFunction();

7807

7808

bool isPPC64 = Subtarget.isPPC64();

7809

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7810

7811

int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);

7812

return DAG.getFrameIndex(FI, PtrVT);

7813

}

7814

7815

SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,

7816

SelectionDAG &DAG) const {

7817

SDLoc DL(Op);

7818

return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,

7819

DAG.getVTList(MVT::i32, MVT::Other),

7820

Op.getOperand(0), Op.getOperand(1));

7821

}

7822

7823

SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,

7824

SelectionDAG &DAG) const {

7825

SDLoc DL(Op);

7826

return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,

7827

Op.getOperand(0), Op.getOperand(1));

7828

}

7829

7830

SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {

7831

if (Op.getValueType().isVector())

7832

return LowerVectorLoad(Op, DAG);

7833

7834

assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7835, __PRETTY_FUNCTION__))

7835

"Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7835, __PRETTY_FUNCTION__));

7836

7837

// First, load 8 bits into 32 bits, then truncate to 1 bit.

7838

7839

SDLoc dl(Op);

7840

LoadSDNode *LD = cast<LoadSDNode>(Op);

7841

7842

SDValue Chain = LD->getChain();

7843

SDValue BasePtr = LD->getBasePtr();

7844

MachineMemOperand *MMO = LD->getMemOperand();

7845

7846

SDValue NewLD =

7847

DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,

7848

BasePtr, MVT::i8, MMO);

7849

SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);

7850

7851

SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };

7852

return DAG.getMergeValues(Ops, dl);

7853

}

7854

7855

SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

7856

if (Op.getOperand(1).getValueType().isVector())

7857

return LowerVectorStore(Op, DAG);

7858

7859

assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7860, __PRETTY_FUNCTION__))

7860

"Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7860, __PRETTY_FUNCTION__));

7861

7862

// First, zero extend to 32 bits, then use a truncating store to 8 bits.

7863

7864

SDLoc dl(Op);

7865

StoreSDNode *ST = cast<StoreSDNode>(Op);

7866

7867

SDValue Chain = ST->getChain();

7868

SDValue BasePtr = ST->getBasePtr();

7869

SDValue Value = ST->getValue();

7870

MachineMemOperand *MMO = ST->getMemOperand();

7871

7872

Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),

7873

Value);

7874

return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);

7875

}

7876

7877

// FIXME: Remove this once the ANDI glue bug is fixed:

7878

SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {

7879

assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7880, __PRETTY_FUNCTION__))

7880

"Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7880, __PRETTY_FUNCTION__));

7881

7882

SDLoc DL(Op);

7883

return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));

7884

}

7885

7886

SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,

7887

SelectionDAG &DAG) const {

7888

7889

// Implements a vector truncate that fits in a vector register as a shuffle.

7890

// We want to legalize vector truncates down to where the source fits in

7891

// a vector register (and target is therefore smaller than vector register

7892

// size). At that point legalization will try to custom lower the sub-legal

7893

// result and get here - where we can contain the truncate as a single target

7894

// operation.

7895

7896

// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:

7897

// <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>

7898

//

7899

// We will implement it for big-endian ordering as this (where x denotes

7900

// undefined):

7901

// < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to

7902

// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>

7903

//

7904

// The same operation in little-endian ordering will be:

7905

// <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to

7906

// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>

7907

7908

EVT TrgVT = Op.getValueType();

7909

assert(TrgVT.isVector() && "Vector type expected.")((TrgVT.isVector() && "Vector type expected.") ? static_cast
<void> (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7909, __PRETTY_FUNCTION__));

7910

unsigned TrgNumElts = TrgVT.getVectorNumElements();

7911

EVT EltVT = TrgVT.getVectorElementType();

7912

if (!isOperationCustom(Op.getOpcode(), TrgVT) ||

7913

TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||

7914

!isPowerOf2_32(EltVT.getSizeInBits()))

7915

return SDValue();

7916

7917

SDValue N1 = Op.getOperand(0);

7918

EVT SrcVT = N1.getValueType();

7919

unsigned SrcSize = SrcVT.getSizeInBits();

7920

if (SrcSize > 256 ||

7921

!isPowerOf2_32(SrcVT.getVectorNumElements()) ||

7922

!isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))

7923

return SDValue();

7924

if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)

7925

return SDValue();

7926

7927

unsigned WideNumElts = 128 / EltVT.getSizeInBits();

7928

EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

7929

7930

SDLoc DL(Op);

7931

SDValue Op1, Op2;

7932

if (SrcSize == 256) {

7933

EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());

7934

EVT SplitVT =

7935

N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());

7936

unsigned SplitNumElts = SplitVT.getVectorNumElements();

7937

Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,

7938

DAG.getConstant(0, DL, VecIdxTy));

7939

Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,

7940

DAG.getConstant(SplitNumElts, DL, VecIdxTy));

7941

}

7942

else {

7943

Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);

7944

Op2 = DAG.getUNDEF(WideVT);

7945

}

7946

7947

// First list the elements we want to keep.

7948

unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();

7949

SmallVector<int, 16> ShuffV;

7950

if (Subtarget.isLittleEndian())

7951

for (unsigned i = 0; i < TrgNumElts; ++i)

7952

ShuffV.push_back(i * SizeMult);

7953

else

7954

for (unsigned i = 1; i <= TrgNumElts; ++i)

7955

ShuffV.push_back(i * SizeMult - 1);

7956

7957

// Populate the remaining elements with undefs.

7958

for (unsigned i = TrgNumElts; i < WideNumElts; ++i)

7959

// ShuffV.push_back(i + WideNumElts);

7960

ShuffV.push_back(WideNumElts + 1);

7961

7962

Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);

7963

Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);

7964

return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);

7965

}

7966

7967

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

7968

/// possible.

7969

SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

7970

// Not FP, or using SPE? Not a fsel.

7971

if (!Op.getOperand(0).getValueType().isFloatingPoint() ||

7972

!Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())

7973

return Op;

7974

7975

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

7976

7977

EVT ResVT = Op.getValueType();

7978

EVT CmpVT = Op.getOperand(0).getValueType();

7979

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

7980

SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);

7981

SDLoc dl(Op);

7982

SDNodeFlags Flags = Op.getNode()->getFlags();

7983

7984

// We have xsmaxcdp/xsmincdp which are OK to emit even in the

7985

// presence of infinities.

7986

if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {

7987

switch (CC) {

7988

default:

7989

break;

7990

case ISD::SETOGT:

7991

case ISD::SETGT:

7992

return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);

7993

case ISD::SETOLT:

7994

case ISD::SETLT:

7995

return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);

7996

}

7997

}

7998

7999

// We might be able to do better than this under some circumstances, but in

8000

// general, fsel-based lowering of select is a finite-math-only optimization.

8001

// For more information, see section F.3 of the 2.06 ISA specification.

8002

// With ISA 3.0

8003

if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||

8004

(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))

8005

return Op;

8006

8007

// If the RHS of the comparison is a 0.0, we don't need to do the

8008

// subtraction at all.

8009

SDValue Sel1;

8010

if (isFloatingPointZero(RHS))

8011

switch (CC) {

8012

default: break; // SETUO etc aren't handled by fsel.

8013

case ISD::SETNE:

8014

std::swap(TV, FV);

8015

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8016

case ISD::SETEQ:

8017

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8018

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8019

Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);

8020

if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits

8021

Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);

8022

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8023

DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);

8024

case ISD::SETULT:

8025

case ISD::SETLT:

8026

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

8027

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8028

case ISD::SETOGE:

8029

case ISD::SETGE:

8030

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8031

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8032

return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);

8033

case ISD::SETUGT:

8034

case ISD::SETGT:

8035

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

8036

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8037

case ISD::SETOLE:

8038

case ISD::SETLE:

8039

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8040

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8041

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8042

DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);

8043

}

8044

8045

SDValue Cmp;

8046

switch (CC) {

8047

default: break; // SETUO etc aren't handled by fsel.

8048

case ISD::SETNE:

8049

std::swap(TV, FV);

8050

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8051

case ISD::SETEQ:

8052

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8053

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8054

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8055

Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8056

if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits

8057

Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);

8058

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8059

DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);

8060

case ISD::SETULT:

8061

case ISD::SETLT:

8062

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8063

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8064

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8065

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);

8066

case ISD::SETOGE:

8067

case ISD::SETGE:

8068

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8069

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8070

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8071

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8072

case ISD::SETUGT:

8073

case ISD::SETGT:

8074

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);

8075

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8076

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8077

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);

8078

case ISD::SETOLE:

8079

case ISD::SETLE:

8080

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);

8081

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8082

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8083

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8084

}

8085

return Op;

8086

}

8087

8088

static unsigned getPPCStrictOpcode(unsigned Opc) {

8089

switch (Opc) {

8090

default:

8091

llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8091);

8092

case PPCISD::FCTIDZ:

8093

return PPCISD::STRICT_FCTIDZ;

8094

case PPCISD::FCTIWZ:

8095

return PPCISD::STRICT_FCTIWZ;

8096

case PPCISD::FCTIDUZ:

8097

return PPCISD::STRICT_FCTIDUZ;

8098

case PPCISD::FCTIWUZ:

8099

return PPCISD::STRICT_FCTIWUZ;

8100

case PPCISD::FCFID:

8101

return PPCISD::STRICT_FCFID;

8102

case PPCISD::FCFIDU:

8103

return PPCISD::STRICT_FCFIDU;

8104

case PPCISD::FCFIDS:

8105

return PPCISD::STRICT_FCFIDS;

8106

case PPCISD::FCFIDUS:

8107

return PPCISD::STRICT_FCFIDUS;

8108

}

8109

}

8110

8111

static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,

8112

const PPCSubtarget &Subtarget) {

8113

SDLoc dl(Op);

8114

bool IsStrict = Op->isStrictFPOpcode();

8115

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8116

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8117

8118

// TODO: Any other flags to propagate?

8119

SDNodeFlags Flags;

8120

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8121

8122

// For strict nodes, source is the second operand.

8123

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8124

SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

8125

assert(Src.getValueType().isFloatingPoint())((Src.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8125, __PRETTY_FUNCTION__));

8126

if (Src.getValueType() == MVT::f32) {

8127

if (IsStrict) {

8128

Src =

8129

DAG.getNode(ISD::STRICT_FP_EXTEND, dl,

8130

DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);

8131

Chain = Src.getValue(1);

8132

} else

8133

Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);

8134

}

8135

SDValue Conv;

8136

unsigned Opc = ISD::DELETED_NODE;

8137

switch (Op.getSimpleValueType().SimpleTy) {

8138

default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8138);

8139

case MVT::i32:

8140

Opc = IsSigned ? PPCISD::FCTIWZ

8141

: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);

8142

break;

8143

case MVT::i64:

8144

assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8145, __PRETTY_FUNCTION__))

8145

"i64 FP_TO_UINT is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8145, __PRETTY_FUNCTION__));

8146

Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;

8147

}

8148

if (IsStrict) {

8149

Opc = getPPCStrictOpcode(Opc);

8150

Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),

8151

{Chain, Src}, Flags);

8152

} else {

8153

Conv = DAG.getNode(Opc, dl, MVT::f64, Src);

8154

}

8155

return Conv;

8156

}

8157

8158

void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,

8159

SelectionDAG &DAG,

8160

const SDLoc &dl) const {

8161

SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);

8162

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8163

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8164

bool IsStrict = Op->isStrictFPOpcode();

8165

8166

// Convert the FP value to an int value through memory.

8167

bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&

8168

(IsSigned || Subtarget.hasFPCVT());

8169

SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);

8170

int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();

8171

MachinePointerInfo MPI =

8172

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

8173

8174

// Emit a store to the stack slot.

8175

SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();

8176

Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));

8177

if (i32Stack) {

8178

MachineFunction &MF = DAG.getMachineFunction();

8179

Alignment = Align(4);

8180

MachineMemOperand *MMO =

8181

MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);

8182

SDValue Ops[] = { Chain, Tmp, FIPtr };

8183

Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,

8184

DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);

8185

} else

8186

Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);

8187

8188

// Result is a load from the stack slot. If loading 4 bytes, make sure to

8189

// add in a bias on big endian.

8190

if (Op.getValueType() == MVT::i32 && !i32Stack) {

8191

FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,

8192

DAG.getConstant(4, dl, FIPtr.getValueType()));

8193

MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);

8194

}

8195

8196

RLI.Chain = Chain;

8197

RLI.Ptr = FIPtr;

8198

RLI.MPI = MPI;

8199

RLI.Alignment = Alignment;

8200

}

8201

8202

/// Custom lowers floating point to integer conversions to use

8203

/// the direct move instructions available in ISA 2.07 to avoid the

8204

/// need for load/store combinations.

8205

SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,

8206

SelectionDAG &DAG,

8207

const SDLoc &dl) const {

8208

SDValue Conv = convertFPToInt(Op, DAG, Subtarget);

8209

SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);

8210

if (Op->isStrictFPOpcode())

8211

return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);

8212

else

8213

return Mov;

8214

}

8215

8216

SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

8217

const SDLoc &dl) const {

8218

bool IsStrict = Op->isStrictFPOpcode();

8219

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8220

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8221

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8222

EVT SrcVT = Src.getValueType();

8223

EVT DstVT = Op.getValueType();

8224

8225

// FP to INT conversions are legal for f128.

8226

if (SrcVT == MVT::f128)

8227

return Op;

8228

8229

// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

8230

// PPC (the libcall is not available).

8231

if (SrcVT == MVT::ppcf128) {

8232

if (DstVT == MVT::i32) {

8233

// TODO: Conservatively pass only nofpexcept flag here. Need to check and

8234

// set other fast-math flags to FP operations in both strict and

8235

// non-strict cases. (FP_TO_SINT, FSUB)

8236

SDNodeFlags Flags;

8237

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8238

8239

if (IsSigned) {

8240

SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,

8241

DAG.getIntPtrConstant(0, dl));

8242

SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,

8243

DAG.getIntPtrConstant(1, dl));

8244

8245

// Add the two halves of the long double in round-to-zero mode, and use

8246

// a smaller FP_TO_SINT.

8247

if (IsStrict) {

8248

SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,

8249

DAG.getVTList(MVT::f64, MVT::Other),

8250

{Op.getOperand(0), Lo, Hi}, Flags);

8251

return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,

8252

DAG.getVTList(MVT::i32, MVT::Other),

8253

{Res.getValue(1), Res}, Flags);

8254

} else {

8255

SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);

8256

return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);

8257

}

8258

} else {

8259

const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};

8260

APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));

8261

SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);

8262

SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);

8263

if (IsStrict) {

8264

// Sel = Src < 0x80000000

8265

// FltOfs = select Sel, 0.0, 0x80000000

8266

// IntOfs = select Sel, 0, 0x80000000

8267

// Result = fp_to_sint(Src - FltOfs) ^ IntOfs

8268

SDValue Chain = Op.getOperand(0);

8269

EVT SetCCVT =

8270

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

8271

EVT DstSetCCVT =

8272

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);

8273

SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,

8274

Chain, true);

8275

Chain = Sel.getValue(1);

8276

8277

SDValue FltOfs = DAG.getSelect(

8278

dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);

8279

Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);

8280

8281

SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,

8282

DAG.getVTList(SrcVT, MVT::Other),

8283

{Chain, Src, FltOfs}, Flags);

8284

Chain = Val.getValue(1);

8285

SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,

8286

DAG.getVTList(DstVT, MVT::Other),

8287

{Chain, Val}, Flags);

8288

Chain = SInt.getValue(1);

8289

SDValue IntOfs = DAG.getSelect(

8290

dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);

8291

SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);

8292

return DAG.getMergeValues({Result, Chain}, dl);

8293

} else {

8294

// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X

8295

// FIXME: generated code sucks.

8296

SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);

8297

True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);

8298

True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);

8299

SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);

8300

return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);

8301

}

8302

}

8303

}

8304

8305

return SDValue();

8306

}

8307

8308

if (Subtarget.hasDirectMove() && Subtarget.isPPC64())

8309

return LowerFP_TO_INTDirectMove(Op, DAG, dl);

8310

8311

ReuseLoadInfo RLI;

8312

LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);

8313

8314

return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,

8315

RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);

8316

}

8317

8318

// We're trying to insert a regular store, S, and then a load, L. If the

8319

// incoming value, O, is a load, we might just be able to have our load use the

8320

// address used by O. However, we don't know if anything else will store to

8321

// that address before we can load from it. To prevent this situation, we need

8322

// to insert our load, L, into the chain as a peer of O. To do this, we give L

8323

// the same chain operand as O, we create a token factor from the chain results

8324

// of O and L, and we replace all uses of O's chain result with that token

8325

// factor (see spliceIntoChain below for this last part).

8326

bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,

8327

ReuseLoadInfo &RLI,

8328

SelectionDAG &DAG,

8329

ISD::LoadExtType ET) const {

8330

// Conservatively skip reusing for constrained FP nodes.

8331

if (Op->isStrictFPOpcode())

8332

return false;

8333

8334

SDLoc dl(Op);

8335

bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&

8336

(Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);

8337

if (ET == ISD::NON_EXTLOAD &&

8338

(ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&

8339

isOperationLegalOrCustom(Op.getOpcode(),

8340

Op.getOperand(0).getValueType())) {

8341

8342

LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);

8343

return true;

8344

}

8345

8346

LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);

8347

if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||

8348

LD->isNonTemporal())

8349

return false;

8350

if (LD->getMemoryVT() != MemVT)

8351

return false;

8352

8353

RLI.Ptr = LD->getBasePtr();

8354

if (LD->isIndexed() && !LD->getOffset().isUndef()) {

8355

assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8356, __PRETTY_FUNCTION__))

8356

"Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8356, __PRETTY_FUNCTION__));

8357

RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,

8358

LD->getOffset());

8359

}

8360

8361

RLI.Chain = LD->getChain();

8362

RLI.MPI = LD->getPointerInfo();

8363

RLI.IsDereferenceable = LD->isDereferenceable();

8364

RLI.IsInvariant = LD->isInvariant();

8365

RLI.Alignment = LD->getAlign();

8366

RLI.AAInfo = LD->getAAInfo();

8367

RLI.Ranges = LD->getRanges();

8368

8369

RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);

8370

return true;

8371

}

8372

8373

// Given the head of the old chain, ResChain, insert a token factor containing

8374

// it and NewResChain, and make users of ResChain now be users of that token

8375

// factor.

8376

// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.

8377

void PPCTargetLowering::spliceIntoChain(SDValue ResChain,

8378

SDValue NewResChain,

8379

SelectionDAG &DAG) const {

8380

if (!ResChain)

8381

return;

8382

8383

SDLoc dl(NewResChain);

8384

8385

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

8386

NewResChain, DAG.getUNDEF(MVT::Other));

8387

assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8388, __PRETTY_FUNCTION__))

8388

"A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8388, __PRETTY_FUNCTION__));

8389

8390

DAG.ReplaceAllUsesOfValueWith(ResChain, TF);

8391

DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);

8392

}

8393

8394

/// Analyze profitability of direct move

8395

/// prefer float load to int load plus direct move

8396

/// when there is no integer use of int load

8397

bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {

8398

SDNode *Origin = Op.getOperand(0).getNode();

8399

if (Origin->getOpcode() != ISD::LOAD)

8400

return true;

8401

8402

// If there is no LXSIBZX/LXSIHZX, like Power8,

8403

// prefer direct move if the memory size is 1 or 2 bytes.

8404

MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();

8405

if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)

8406

return true;

8407

8408

for (SDNode::use_iterator UI = Origin->use_begin(),

8409

UE = Origin->use_end();

8410

UI != UE; ++UI) {

8411

8412

// Only look at the users of the loaded value.

8413

if (UI.getUse().get().getResNo() != 0)

8414

continue;

8415

8416

if (UI->getOpcode() != ISD::SINT_TO_FP &&

8417

UI->getOpcode() != ISD::UINT_TO_FP &&

8418

UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&

8419

UI->getOpcode() != ISD::STRICT_UINT_TO_FP)

8420

return true;

8421

}

8422

8423

return false;

8424

}

8425

8426

static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,

8427

const PPCSubtarget &Subtarget,

8428

SDValue Chain = SDValue()) {

8429

bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||

8430

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8431

SDLoc dl(Op);

8432

8433

// TODO: Any other flags to propagate?

8434

SDNodeFlags Flags;

8435

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8436

8437

// If we have FCFIDS, then use it when converting to single-precision.

8438

// Otherwise, convert to double-precision and then round.

8439

bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();

8440

unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)

8441

: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);

8442

EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;

8443

if (Op->isStrictFPOpcode()) {

8444

if (!Chain)

8445

Chain = Op.getOperand(0);

8446

return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,

8447

DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);

8448

} else

8449

return DAG.getNode(ConvOpc, dl, ConvTy, Src);

8450

}

8451

8452

/// Custom lowers integer to floating point conversions to use

8453

/// the direct move instructions available in ISA 2.07 to avoid the

8454

/// need for load/store combinations.

8455

SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,

8456

SelectionDAG &DAG,

8457

const SDLoc &dl) const {

8458

assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__))

8459

Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__))

8460

"Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__));

8461

assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8462, __PRETTY_FUNCTION__))

8462

"Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8462, __PRETTY_FUNCTION__));

8463

SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);

8464

bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;

8465

bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||

8466

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8467

unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;

8468

SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);

8469

return convertIntToFP(Op, Mov, DAG, Subtarget);

8470

}

8471

8472

static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {

8473

8474

EVT VecVT = Vec.getValueType();

8475

assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8475, __PRETTY_FUNCTION__));

8476

assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8476, __PRETTY_FUNCTION__));

8477

8478

EVT EltVT = VecVT.getVectorElementType();

8479

unsigned WideNumElts = 128 / EltVT.getSizeInBits();

8480

EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

8481

8482

unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();

8483

SmallVector<SDValue, 16> Ops(NumConcat);

8484

Ops[0] = Vec;

8485

SDValue UndefVec = DAG.getUNDEF(VecVT);

8486

for (unsigned i = 1; i < NumConcat; ++i)

8487

Ops[i] = UndefVec;

8488

8489

return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);

8490

}

8491

8492

SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,

8493

const SDLoc &dl) const {

8494

bool IsStrict = Op->isStrictFPOpcode();

8495

unsigned Opc = Op.getOpcode();

8496

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8497

assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__))

8498

Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__))

8499

"Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__));

8500

assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8501, __PRETTY_FUNCTION__))

8501

"Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8501, __PRETTY_FUNCTION__));

8502

8503

// TODO: Any other flags to propagate?

8504

SDNodeFlags Flags;

8505

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8506

8507

bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;

8508

bool FourEltRes = Op.getValueType() == MVT::v4f32;

8509

8510

SDValue Wide = widenVec(DAG, Src, dl);

8511

EVT WideVT = Wide.getValueType();

8512

unsigned WideNumElts = WideVT.getVectorNumElements();

8513

MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;

8514

8515

SmallVector<int, 16> ShuffV;

8516

for (unsigned i = 0; i < WideNumElts; ++i)

8517

ShuffV.push_back(i + WideNumElts);

8518

8519

int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;

8520

int SaveElts = FourEltRes ? 4 : 2;

8521

if (Subtarget.isLittleEndian())

8522

for (int i = 0; i < SaveElts; i++)

8523

ShuffV[i * Stride] = i;

8524

else

8525

for (int i = 1; i <= SaveElts; i++)

8526

ShuffV[i * Stride - 1] = i - 1;

8527

8528

SDValue ShuffleSrc2 =

8529

SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);

8530

SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);

8531

8532

SDValue Extend;

8533

if (SignedConv) {

8534

Arrange = DAG.getBitcast(IntermediateVT, Arrange);

8535

EVT ExtVT = Src.getValueType();

8536

if (Subtarget.hasP9Altivec())

8537

ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),

8538

IntermediateVT.getVectorNumElements());

8539

8540

Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,

8541

DAG.getValueType(ExtVT));

8542

} else

8543

Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);

8544

8545

if (IsStrict)

8546

return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),

8547

{Op.getOperand(0), Extend}, Flags);

8548

8549

return DAG.getNode(Opc, dl, Op.getValueType(), Extend);

8550

}

8551

8552

SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

8553

SelectionDAG &DAG) const {

8554

SDLoc dl(Op);

8555

bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||

8556

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8557

bool IsStrict = Op->isStrictFPOpcode();

8558

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8559

SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

8560

8561

// TODO: Any other flags to propagate?

8562

SDNodeFlags Flags;

8563

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8564

8565

EVT InVT = Src.getValueType();

8566

EVT OutVT = Op.getValueType();

8567

if (OutVT.isVector() && OutVT.isFloatingPoint() &&

8568

isOperationCustom(Op.getOpcode(), InVT))

8569

return LowerINT_TO_FPVector(Op, DAG, dl);

8570

8571

// Conversions to f128 are legal.

8572

if (Op.getValueType() == MVT::f128)

8573

return Op;

8574

8575

// Don't handle ppc_fp128 here; let it be lowered to a libcall.

8576

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

8577

return SDValue();

8578

8579

if (Src.getValueType() == MVT::i1)

8580

return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,

8581

DAG.getConstantFP(1.0, dl, Op.getValueType()),

8582

DAG.getConstantFP(0.0, dl, Op.getValueType()));

8583

8584

// If we have direct moves, we can do all the conversion, skip the store/load

8585

// however, without FPCVT we can't do most conversions.

8586

if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&

8587

Subtarget.isPPC64() && Subtarget.hasFPCVT())

8588

return LowerINT_TO_FPDirectMove(Op, DAG, dl);

8589

8590

assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8591, __PRETTY_FUNCTION__))

8591

"UINT_TO_FP is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8591, __PRETTY_FUNCTION__));

8592

8593

if (Src.getValueType() == MVT::i64) {

8594

SDValue SINT = Src;

8595

// When converting to single-precision, we actually need to convert

8596

// to double-precision first and then round to single-precision.

8597

// To avoid double-rounding effects during that operation, we have

8598

// to prepare the input operand. Bits that might be truncated when

8599

// converting to double-precision are replaced by a bit that won't

8600

// be lost at this stage, but is below the single-precision rounding

8601

// position.

8602

//

8603

// However, if -enable-unsafe-fp-math is in effect, accept double

8604

// rounding to avoid the extra overhead.

8605

if (Op.getValueType() == MVT::f32 &&

8606

!Subtarget.hasFPCVT() &&

8607

!DAG.getTarget().Options.UnsafeFPMath) {

8608

8609

// Twiddle input to make sure the low 11 bits are zero. (If this

8610

// is the case, we are guaranteed the value will fit into the 53 bit

8611

// mantissa of an IEEE double-precision value without rounding.)

8612

// If any of those low 11 bits were not zero originally, make sure

8613

// bit 12 (value 2048) is set instead, so that the final rounding

8614

// to single-precision gets the correct result.

8615

SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,

8616

SINT, DAG.getConstant(2047, dl, MVT::i64));

8617

Round = DAG.getNode(ISD::ADD, dl, MVT::i64,

8618

Round, DAG.getConstant(2047, dl, MVT::i64));

8619

Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);

8620

Round = DAG.getNode(ISD::AND, dl, MVT::i64,

8621

Round, DAG.getConstant(-2048, dl, MVT::i64));

8622

8623

// However, we cannot use that value unconditionally: if the magnitude

8624

// of the input value is small, the bit-twiddling we did above might

8625

// end up visibly changing the output. Fortunately, in that case, we

8626

// don't need to twiddle bits since the original input will convert

8627

// exactly to double-precision floating-point already. Therefore,

8628

// construct a conditional to use the original value if the top 11

8629

// bits are all sign-bit copies, and use the rounded value computed

8630

// above otherwise.

8631

SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,

8632

SINT, DAG.getConstant(53, dl, MVT::i32));

8633

Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,

8634

Cond, DAG.getConstant(1, dl, MVT::i64));

8635

Cond = DAG.getSetCC(

8636

dl,

8637

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),

8638

Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);

8639

8640

SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);

8641

}

8642

8643

ReuseLoadInfo RLI;

8644

SDValue Bits;

8645

8646

MachineFunction &MF = DAG.getMachineFunction();

8647

if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {

8648

Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,

8649

RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);

8650

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8651

} else if (Subtarget.hasLFIWAX() &&

8652

canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {

8653

MachineMemOperand *MMO =

8654

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8655

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8656

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8657

Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,

8658

DAG.getVTList(MVT::f64, MVT::Other),

8659

Ops, MVT::i32, MMO);

8660

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8661

} else if (Subtarget.hasFPCVT() &&

8662

canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {

8663

MachineMemOperand *MMO =

8664

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8665

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8666

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8667

Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,

8668

DAG.getVTList(MVT::f64, MVT::Other),

8669

Ops, MVT::i32, MMO);

8670

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8671

} else if (((Subtarget.hasLFIWAX() &&

8672

SINT.getOpcode() == ISD::SIGN_EXTEND) ||

8673

(Subtarget.hasFPCVT() &&

8674

SINT.getOpcode() == ISD::ZERO_EXTEND)) &&

8675

SINT.getOperand(0).getValueType() == MVT::i32) {

8676

MachineFrameInfo &MFI = MF.getFrameInfo();

8677

EVT PtrVT = getPointerTy(DAG.getDataLayout());

8678

8679

int FrameIdx = MFI.CreateStackObject(4, Align(4), false);

8680

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8681

8682

SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,

8683

MachinePointerInfo::getFixedStack(

8684

DAG.getMachineFunction(), FrameIdx));

8685

Chain = Store;

8686

8687

assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8688, __PRETTY_FUNCTION__))

8688

"Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8688, __PRETTY_FUNCTION__));

8689

8690

RLI.Ptr = FIdx;

8691

RLI.Chain = Chain;

8692

RLI.MPI =

8693

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);

8694

RLI.Alignment = Align(4);

8695

8696

MachineMemOperand *MMO =

8697

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8698

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8699

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8700

Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?

8701

PPCISD::LFIWZX : PPCISD::LFIWAX,

8702

dl, DAG.getVTList(MVT::f64, MVT::Other),

8703

Ops, MVT::i32, MMO);

8704

Chain = Bits.getValue(1);

8705

} else

8706

Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);

8707

8708

SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);

8709

if (IsStrict)

8710

Chain = FP.getValue(1);

8711

8712

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

8713

if (IsStrict)

8714

FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,

8715

DAG.getVTList(MVT::f32, MVT::Other),

8716

{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);

8717

else

8718

FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,

8719

DAG.getIntPtrConstant(0, dl));

8720

}

8721

return FP;

8722

}

8723

8724

assert(Src.getValueType() == MVT::i32 &&((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8725, __PRETTY_FUNCTION__))

8725

"Unhandled INT_TO_FP type in custom expander!")((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8725, __PRETTY_FUNCTION__));

8726

// Since we only generate this in 64-bit mode, we can take advantage of

8727

// 64-bit registers. In particular, sign extend the input value into the

8728

// 64-bit register with extsw, store the WHOLE 64-bit value into the stack

8729

// then lfd it and fcfid it.

8730

MachineFunction &MF = DAG.getMachineFunction();

8731

MachineFrameInfo &MFI = MF.getFrameInfo();

8732

EVT PtrVT = getPointerTy(MF.getDataLayout());

8733

8734

SDValue Ld;

8735

if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {

8736

ReuseLoadInfo RLI;

8737

bool ReusingLoad;

8738

if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {

8739

int FrameIdx = MFI.CreateStackObject(4, Align(4), false);

8740

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8741

8742

SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,

8743

MachinePointerInfo::getFixedStack(

8744

DAG.getMachineFunction(), FrameIdx));

8745

Chain = Store;

8746

8747

assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8748, __PRETTY_FUNCTION__))

8748

"Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8748, __PRETTY_FUNCTION__));

8749

8750

RLI.Ptr = FIdx;

8751

RLI.Chain = Chain;

8752

RLI.MPI =

8753

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);

8754

RLI.Alignment = Align(4);

8755

}

8756

8757

MachineMemOperand *MMO =

8758

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8759

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8760

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8761

Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,

8762

DAG.getVTList(MVT::f64, MVT::Other), Ops,

8763

MVT::i32, MMO);

8764

Chain = Ld.getValue(1);

8765

if (ReusingLoad)

8766

spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);

8767

} else {

8768

assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8769, __PRETTY_FUNCTION__))

8769

"i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8769, __PRETTY_FUNCTION__));

8770

8771

int FrameIdx = MFI.CreateStackObject(8, Align(8), false);

8772

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8773

8774

SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);

8775

8776

// STD the extended value into the stack slot.

8777

SDValue Store = DAG.getStore(

8778

Chain, dl, Ext64, FIdx,

8779

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));

8780

Chain = Store;

8781

8782

// Load the value as a double.

8783

Ld = DAG.getLoad(

8784

MVT::f64, dl, Chain, FIdx,

8785

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));

8786

Chain = Ld.getValue(1);

8787

}

8788

8789

// FCFID it and return it.

8790

SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);

8791

if (IsStrict)

8792

Chain = FP.getValue(1);

8793

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

8794

if (IsStrict)

8795

FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,

8796

DAG.getVTList(MVT::f32, MVT::Other),

8797

{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);

8798

else

8799

FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,

8800

DAG.getIntPtrConstant(0, dl));

8801

}

8802

return FP;

8803

}

8804

8805

SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,

8806

SelectionDAG &DAG) const {

8807

SDLoc dl(Op);

8808

/*

8809

The rounding mode is in bits 30:31 of FPSR, and has the following

8810

settings:

8811

00 Round to nearest

8812

01 Round to 0

8813

10 Round to +inf

8814

11 Round to -inf

8815

8816

FLT_ROUNDS, on the other hand, expects the following:

8817

-1 Undefined

8818

0 Round to 0

8819

1 Round to nearest

8820

2 Round to +inf

8821

3 Round to -inf

8822

8823

To perform the conversion, we do:

8824

((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))

8825

*/

8826

8827

MachineFunction &MF = DAG.getMachineFunction();

8828

EVT VT = Op.getValueType();

8829

EVT PtrVT = getPointerTy(MF.getDataLayout());

8830

8831

// Save FP Control Word to register

8832

SDValue Chain = Op.getOperand(0);

8833

SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);

8834

Chain = MFFS.getValue(1);

8835

8836

// Save FP register to stack slot

8837

int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);

8838

SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

8839

Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());

8840

8841

// Load FP Control Word from low 32 bits of stack slot.

8842

SDValue Four = DAG.getConstant(4, dl, PtrVT);

8843

SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);

8844

SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());

8845

Chain = CWD.getValue(1);

8846

8847

// Transform as necessary

8848

SDValue CWD1 =

8849

DAG.getNode(ISD::AND, dl, MVT::i32,

8850

CWD, DAG.getConstant(3, dl, MVT::i32));

8851

SDValue CWD2 =

8852

DAG.getNode(ISD::SRL, dl, MVT::i32,

8853

DAG.getNode(ISD::AND, dl, MVT::i32,

8854

DAG.getNode(ISD::XOR, dl, MVT::i32,

8855

CWD, DAG.getConstant(3, dl, MVT::i32)),

8856

DAG.getConstant(3, dl, MVT::i32)),

8857

DAG.getConstant(1, dl, MVT::i32));

8858

8859

SDValue RetVal =

8860

DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);

8861

8862

RetVal =

8863

DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),

8864

dl, VT, RetVal);

8865

8866

return DAG.getMergeValues({RetVal, Chain}, dl);

8867

}

8868

8869

SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {

8870

EVT VT = Op.getValueType();

8871

unsigned BitWidth = VT.getSizeInBits();

8872

SDLoc dl(Op);

8873

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__))

8874

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__))

8875

"Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__));

8876

8877

// Expand into a bunch of logical ops. Note that these ops

8878

// depend on the PPC behavior for oversized shift amounts.

8879

SDValue Lo = Op.getOperand(0);

8880

SDValue Hi = Op.getOperand(1);

8881

SDValue Amt = Op.getOperand(2);

8882

EVT AmtVT = Amt.getValueType();

8883

8884

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8885

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8886

SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);

8887

SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);

8888

SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);

8889

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8890

DAG.getConstant(-BitWidth, dl, AmtVT));

8891

SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);

8892

SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

8893

SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);

8894

SDValue OutOps[] = { OutLo, OutHi };

8895

return DAG.getMergeValues(OutOps, dl);

8896

}

8897

8898

SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {

8899

EVT VT = Op.getValueType();

8900

SDLoc dl(Op);

8901

unsigned BitWidth = VT.getSizeInBits();

8902

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__))

8903

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__))

8904

"Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__));

8905

8906

// Expand into a bunch of logical ops. Note that these ops

8907

// depend on the PPC behavior for oversized shift amounts.

8908

SDValue Lo = Op.getOperand(0);

8909

SDValue Hi = Op.getOperand(1);

8910

SDValue Amt = Op.getOperand(2);

8911

EVT AmtVT = Amt.getValueType();

8912

8913

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8914

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8915

SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);

8916

SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);

8917

SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

8918

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8919

DAG.getConstant(-BitWidth, dl, AmtVT));

8920

SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);

8921

SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

8922

SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);

8923

SDValue OutOps[] = { OutLo, OutHi };

8924

return DAG.getMergeValues(OutOps, dl);

8925

}

8926

8927

SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {

8928

SDLoc dl(Op);

8929

EVT VT = Op.getValueType();

8930

unsigned BitWidth = VT.getSizeInBits();

8931

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__))

8932

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__))

8933

"Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__));

8934

8935

// Expand into a bunch of logical ops, followed by a select_cc.

8936

SDValue Lo = Op.getOperand(0);

8937

SDValue Hi = Op.getOperand(1);

8938

SDValue Amt = Op.getOperand(2);

8939

EVT AmtVT = Amt.getValueType();

8940

8941

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8942

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8943

SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);

8944

SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);

8945

SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

8946

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8947

DAG.getConstant(-BitWidth, dl, AmtVT));

8948

SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);

8949

SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);

8950

SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),

8951

Tmp4, Tmp6, ISD::SETLE);

8952

SDValue OutOps[] = { OutLo, OutHi };

8953

return DAG.getMergeValues(OutOps, dl);

8954

}

8955

8956

SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,

8957

SelectionDAG &DAG) const {

8958

SDLoc dl(Op);

8959

EVT VT = Op.getValueType();

8960

unsigned BitWidth = VT.getSizeInBits();

8961

8962

bool IsFSHL = Op.getOpcode() == ISD::FSHL;

8963

SDValue X = Op.getOperand(0);

8964

SDValue Y = Op.getOperand(1);

8965

SDValue Z = Op.getOperand(2);

8966

EVT AmtVT = Z.getValueType();

8967

8968

// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))

8969

// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))

8970

// This is simpler than TargetLowering::expandFunnelShift because we can rely

8971

// on PowerPC shift by BW being well defined.

8972

Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,

8973

DAG.getConstant(BitWidth - 1, dl, AmtVT));

8974

SDValue SubZ =

8975

DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);

8976

X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);

8977

Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);

8978

return DAG.getNode(ISD::OR, dl, VT, X, Y);

8979

}

8980

8981

//===----------------------------------------------------------------------===//

8982

// Vector related lowering.

8983

//

8984

8985

/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an

8986

/// element size of SplatSize. Cast the result to VT.

8987

static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,

8988

SelectionDAG &DAG, const SDLoc &dl) {

8989

static const MVT VTys[] = { // canonical VT to use for each size.

8990

MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32

8991

};

8992

8993

EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];

8994

8995

// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.

8996

if (Val == ((1LU << (SplatSize * 8)) - 1)) {

8997

SplatSize = 1;

8998

Val = 0xFF;

8999

}

9000

9001

EVT CanonicalVT = VTys[SplatSize-1];

9002

9003

// Build a canonical splat for this value.

9004

return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));

9005

}

9006

9007

/// BuildIntrinsicOp - Return a unary operator intrinsic node with the

9008

/// specified intrinsic ID.

9009

static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,

9010

const SDLoc &dl, EVT DestVT = MVT::Other) {

9011

if (DestVT == MVT::Other) DestVT = Op.getValueType();

9012

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9013

DAG.getConstant(IID, dl, MVT::i32), Op);

9014

}

9015

9016

/// BuildIntrinsicOp - Return a binary operator intrinsic node with the

9017

/// specified intrinsic ID.

9018

static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,

9019

SelectionDAG &DAG, const SDLoc &dl,

9020

EVT DestVT = MVT::Other) {

9021

if (DestVT == MVT::Other) DestVT = LHS.getValueType();

9022

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9023

DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);

9024

}

9025

9026

/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the

9027

/// specified intrinsic ID.

9028

static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,

9029

SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,

9030

EVT DestVT = MVT::Other) {

9031

if (DestVT == MVT::Other) DestVT = Op0.getValueType();

9032

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9033

DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);

9034

}

9035

9036

/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified

9037

/// amount. The result has the specified value type.

9038

static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,

9039

SelectionDAG &DAG, const SDLoc &dl) {

9040

// Force LHS/RHS to be the right type.

9041

LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);

9042

RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);

9043

9044

int Ops[16];

9045

for (unsigned i = 0; i != 16; ++i)

9046

Ops[i] = i + Amt;

9047

SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);

9048

return DAG.getNode(ISD::BITCAST, dl, VT, T);

9049

}

9050

9051

/// Do we have an efficient pattern in a .td file for this node?

9052

///

9053

/// \param V - pointer to the BuildVectorSDNode being matched

9054

/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?

9055

///

9056

/// There are some patterns where it is beneficial to keep a BUILD_VECTOR

9057

/// node as a BUILD_VECTOR node rather than expanding it. The patterns where

9058

/// the opposite is true (expansion is beneficial) are:

9059

/// - The node builds a vector out of integers that are not 32 or 64-bits

9060

/// - The node builds a vector out of constants

9061

/// - The node is a "load-and-splat"

9062

/// In all other cases, we will choose to keep the BUILD_VECTOR.

9063

static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,

9064

bool HasDirectMove,

9065

bool HasP8Vector) {

9066

EVT VecVT = V->getValueType(0);

9067

bool RightType = VecVT == MVT::v2f64 ||

9068

(HasP8Vector && VecVT == MVT::v4f32) ||

9069

(HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));

9070

if (!RightType)

9071

return false;

9072

9073

bool IsSplat = true;

9074

bool IsLoad = false;

9075

SDValue Op0 = V->getOperand(0);

9076

9077

// This function is called in a block that confirms the node is not a constant

9078

// splat. So a constant BUILD_VECTOR here means the vector is built out of

9079

// different constants.

9080

if (V->isConstant())

9081

return false;

9082

for (int i = 0, e = V->getNumOperands(); i < e; ++i) {

9083

if (V->getOperand(i).isUndef())

9084

return false;

9085

// We want to expand nodes that represent load-and-splat even if the

9086

// loaded value is a floating point truncation or conversion to int.

9087

if (V->getOperand(i).getOpcode() == ISD::LOAD ||

9088

(V->getOperand(i).getOpcode() == ISD::FP_ROUND &&

9089

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||

9090

(V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&

9091

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||

9092

(V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&

9093

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))

9094

IsLoad = true;

9095

// If the operands are different or the input is not a load and has more

9096

// uses than just this BV node, then it isn't a splat.

9097

if (V->getOperand(i) != Op0 ||

9098

(!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))

9099

IsSplat = false;

9100

}

9101

return !(IsSplat && IsLoad);

9102

}

9103

9104

// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.

9105

SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {

9106

9107

SDLoc dl(Op);

9108

SDValue Op0 = Op->getOperand(0);

9109

9110

if ((Op.getValueType() != MVT::f128) ||

9111

(Op0.getOpcode() != ISD::BUILD_PAIR) ||

9112

(Op0.getOperand(0).getValueType() != MVT::i64) ||

9113

(Op0.getOperand(1).getValueType() != MVT::i64))

9114

return SDValue();

9115

9116

return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),

9117

Op0.getOperand(1));

9118

}

9119

9120

static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {

9121

const SDValue *InputLoad = &Op;

9122

if (InputLoad->getOpcode() == ISD::BITCAST)

9123

InputLoad = &InputLoad->getOperand(0);

9124

if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||

9125

InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {

9126

IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;

9127

InputLoad = &InputLoad->getOperand(0);

9128

}

9129

if (InputLoad->getOpcode() != ISD::LOAD)

9130

return nullptr;

9131

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9132

return ISD::isNormalLoad(LD) ? InputLoad : nullptr;

9133

}

9134

9135

// Convert the argument APFloat to a single precision APFloat if there is no

9136

// loss in information during the conversion to single precision APFloat and the

9137

// resulting number is not a denormal number. Return true if successful.

9138

bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {

9139

APFloat APFloatToConvert = ArgAPFloat;

9140

bool LosesInfo = true;

9141

APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,

9142

&LosesInfo);

9143

bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());

9144

if (Success)

9145

ArgAPFloat = APFloatToConvert;

9146

return Success;

9147

}

9148

9149

// Bitcast the argument APInt to a double and convert it to a single precision

9150

// APFloat, bitcast the APFloat to an APInt and assign it to the original

9151

// argument if there is no loss in information during the conversion from

9152

// double to single precision APFloat and the resulting number is not a denormal

9153

// number. Return true if successful.

9154

bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {

9155

double DpValue = ArgAPInt.bitsToDouble();

9156

APFloat APFloatDp(DpValue);

9157

bool Success = convertToNonDenormSingle(APFloatDp);

9158

if (Success)

9159

ArgAPInt = APFloatDp.bitcastToAPInt();

9160

return Success;

9161

}

9162

9163

// If this is a case we can't handle, return null and let the default

9164

// expansion code take care of it. If we CAN select this case, and if it

9165

// selects to a single instruction, return Op. Otherwise, if we can codegen

9166

// this case more efficiently than a constant pool load, lower it to the

9167

// sequence of ops that should be used.

9168

SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

9169

SelectionDAG &DAG) const {

9170

SDLoc dl(Op);

9171

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());

9172

assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9172, __PRETTY_FUNCTION__));

9173

9174

// Check if this is a splat of a constant value.

9175

APInt APSplatBits, APSplatUndef;

9176

unsigned SplatBitSize;

9177

bool HasAnyUndefs;

9178

bool BVNIsConstantSplat =

9179

BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,

9180

HasAnyUndefs, 0, !Subtarget.isLittleEndian());

9181

9182

// If it is a splat of a double, check if we can shrink it to a 32 bit

9183

// non-denormal float which when converted back to double gives us the same

9184

// double. This is to exploit the XXSPLTIDP instruction.

9185

if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&

9186

(SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&

9187

convertToNonDenormSingle(APSplatBits)) {

9188

SDValue SplatNode = DAG.getNode(

9189

PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,

9190

DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));

9191

return DAG.getBitcast(Op.getValueType(), SplatNode);

9192

}

9193

9194

if (!BVNIsConstantSplat || SplatBitSize > 32) {

9195

9196

bool IsPermutedLoad = false;

9197

const SDValue *InputLoad =

9198

getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);

9199

// Handle load-and-splat patterns as we have instructions that will do this

9200

// in one go.

9201

if (InputLoad && DAG.isSplatValue(Op, true)) {

9202

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9203

9204

// We have handling for 4 and 8 byte elements.

9205

unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();

9206

9207

// Checking for a single use of this load, we have to check for vector

9208

// width (128 bits) / ElementSize uses (since each operand of the

9209

// BUILD_VECTOR is a separate use of the value.

9210

unsigned NumUsesOfInputLD = 128 / ElementSize;

9211

for (SDValue BVInOp : Op->ops())

9212

if (BVInOp.isUndef())

9213

NumUsesOfInputLD--;

9214

assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?")((NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"
) ? static_cast<void> (0) : __assert_fail ("NumUsesOfInputLD > 0 && \"No uses of input LD of a build_vector?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9214, __PRETTY_FUNCTION__));

9215

if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&

9216

((Subtarget.hasVSX() && ElementSize == 64) ||

9217

(Subtarget.hasP9Vector() && ElementSize == 32))) {

9218

SDValue Ops[] = {

9219

LD->getChain(), // Chain

9220

LD->getBasePtr(), // Ptr

9221

DAG.getValueType(Op.getValueType()) // VT

9222

};

9223

SDValue LdSplt = DAG.getMemIntrinsicNode(

9224

PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),

9225

Ops, LD->getMemoryVT(), LD->getMemOperand());

9226

// Replace all uses of the output chain of the original load with the

9227

// output chain of the new load.

9228

DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),

9229

LdSplt.getValue(1));

9230

return LdSplt;

9231

}

9232

}

9233

9234

// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be

9235

// lowered to VSX instructions under certain conditions.

9236

// Without VSX, there is no pattern more efficient than expanding the node.

9237

if (Subtarget.hasVSX() &&

9238

haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),

9239

Subtarget.hasP8Vector()))

9240

return Op;

9241

return SDValue();

9242

}

9243

9244

uint64_t SplatBits = APSplatBits.getZExtValue();

9245

uint64_t SplatUndef = APSplatUndef.getZExtValue();

9246

unsigned SplatSize = SplatBitSize / 8;

9247

9248

// First, handle single instruction cases.

9249

9250

// All zeros?

9251

if (SplatBits == 0) {

9252

// Canonicalize all zero vectors to be v4i32.

9253

if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {

9254

SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);

9255

Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);

9256

}

9257

return Op;

9258

}

9259

9260

// We have XXSPLTIW for constant splats four bytes wide.

9261

// Given vector length is a multiple of 4, 2-byte splats can be replaced

9262

// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to

9263

// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be

9264

// turned into a 4-byte splat of 0xABABABAB.

9265

if (Subtarget.hasPrefixInstrs() && SplatSize == 2)

9266

return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,

9267

Op.getValueType(), DAG, dl);

9268

9269

if (Subtarget.hasPrefixInstrs() && SplatSize == 4)

9270

return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,

9271

dl);

9272

9273

// We have XXSPLTIB for constant splats one byte wide.

9274

if (Subtarget.hasP9Vector() && SplatSize == 1)

9275

return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,

9276

dl);

9277

9278

// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].

9279

int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>

9280

(32-SplatBitSize));

9281

if (SextVal >= -16 && SextVal <= 15)

9282

return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,

9283

dl);

9284

9285

// Two instruction sequences.

9286

9287

// If this value is in the range [-32,30] and is even, use:

9288

// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)

9289

// If this value is in the range [17,31] and is odd, use:

9290

// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)

9291

// If this value is in the range [-31,-17] and is odd, use:

9292

// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)

9293

// Note the last two are three-instruction sequences.

9294

if (SextVal >= -32 && SextVal <= 31) {

9295

// To avoid having these optimizations undone by constant folding,

9296

// we convert to a pseudo that will be expanded later into one of

9297

// the above forms.

9298

SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);

9299

EVT VT = (SplatSize == 1 ? MVT::v16i8 :

9300

(SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));

9301

SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);

9302

SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);

9303

if (VT == Op.getValueType())

9304

return RetVal;

9305

else

9306

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);

9307

}

9308

9309

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

9310

// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important

9311

// for fneg/fabs.

9312

if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {

9313

// Make -1 and vspltisw -1:

9314

SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);

9315

9316

// Make the VSLW intrinsic, computing 0x8000_0000.

9317

SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,

9318

OnesV, DAG, dl);

9319

9320

// xor by OnesV to invert it.

9321

Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);

9322

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9323

}

9324

9325

// Check to see if this is a wide variety of vsplti*, binop self cases.

9326

static const signed char SplatCsts[] = {

9327

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

9328

-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16

9329

};

9330

9331

for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {

9332

// Indirect through the SplatCsts array so that we favor 'vsplti -1' for

9333

// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'

9334

int i = SplatCsts[idx];

9335

9336

// Figure out what shift amount will be used by altivec if shifted by i in

9337

// this splat size.

9338

unsigned TypeShiftAmt = i & (SplatBitSize-1);

9339

9340

// vsplti + shl self.

9341

if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {

9342

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9343

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9344

Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,

9345

Intrinsic::ppc_altivec_vslw

9346

};

9347

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9348

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9349

}

9350

9351

// vsplti + srl self.

9352

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

9353

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9354

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9355

Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,

9356

Intrinsic::ppc_altivec_vsrw

9357

};

9358

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9359

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9360

}

9361

9362

// vsplti + sra self.

9363

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

9364

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9365

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9366

Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,

9367

Intrinsic::ppc_altivec_vsraw

9368

};

9369

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9370

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9371

}

9372

9373

// vsplti + rol self.

9374

if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |

9375

((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {

9376

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9377

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9378

Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,

9379

Intrinsic::ppc_altivec_vrlw

9380

};

9381

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9382

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9383

}

9384

9385

// t = vsplti c, result = vsldoi t, t, 1

9386

if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {

9387

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9388

unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;

9389

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9390

}

9391

// t = vsplti c, result = vsldoi t, t, 2

9392

if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {

9393

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9394

unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;

9395

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9396

}

9397

// t = vsplti c, result = vsldoi t, t, 3

9398

if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {

9399

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9400

unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;

9401

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9402

}

9403

}

9404

9405

return SDValue();

9406

}

9407

9408

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

9409

/// the specified operations to build the shuffle.

9410

static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,

9411

SDValue RHS, SelectionDAG &DAG,

9412

const SDLoc &dl) {

9413

unsigned OpNum = (PFEntry >> 26) & 0x0F;

9414

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

9415

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

9416

9417

enum {

9418

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

9419

OP_VMRGHW,

9420

OP_VMRGLW,

9421

OP_VSPLTISW0,

9422

OP_VSPLTISW1,

9423

OP_VSPLTISW2,

9424

OP_VSPLTISW3,

9425

OP_VSLDOI4,

9426

OP_VSLDOI8,

9427

OP_VSLDOI12

9428

};

9429

9430

if (OpNum == OP_COPY) {

9431

if (LHSID == (1*9+2)*9+3) return LHS;

9432

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9432, __PRETTY_FUNCTION__));

9433

return RHS;

9434

}

9435

9436

SDValue OpLHS, OpRHS;

9437

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);

9438

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);

9439

9440

int ShufIdxs[16];

9441

switch (OpNum) {

9442

default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9442);

9443

case OP_VMRGHW:

9444

ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;

9445

ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;

9446

ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;

9447

ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;

9448

break;

9449

case OP_VMRGLW:

9450

ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;

9451

ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;

9452

ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;

9453

ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;

9454

break;

9455

case OP_VSPLTISW0:

9456

for (unsigned i = 0; i != 16; ++i)

9457

ShufIdxs[i] = (i&3)+0;

9458

break;

9459

case OP_VSPLTISW1:

9460

for (unsigned i = 0; i != 16; ++i)

9461

ShufIdxs[i] = (i&3)+4;

9462

break;

9463

case OP_VSPLTISW2:

9464

for (unsigned i = 0; i != 16; ++i)

9465

ShufIdxs[i] = (i&3)+8;

9466

break;

9467

case OP_VSPLTISW3:

9468

for (unsigned i = 0; i != 16; ++i)

9469

ShufIdxs[i] = (i&3)+12;

9470

break;

9471

case OP_VSLDOI4:

9472

return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);

9473

case OP_VSLDOI8:

9474

return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);

9475

case OP_VSLDOI12:

9476

return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);

9477

}

9478

EVT VT = OpLHS.getValueType();

9479

OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);

9480

OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);

9481

SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);

9482

return DAG.getNode(ISD::BITCAST, dl, VT, T);

9483

}

9484

9485

/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled

9486

/// by the VINSERTB instruction introduced in ISA 3.0, else just return default

9487

/// SDValue.

9488

SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,

9489

SelectionDAG &DAG) const {

9490

const unsigned BytesInVector = 16;

9491

bool IsLE = Subtarget.isLittleEndian();

9492

SDLoc dl(N);

9493

SDValue V1 = N->getOperand(0);

9494

SDValue V2 = N->getOperand(1);

9495

unsigned ShiftElts = 0, InsertAtByte = 0;

9496

bool Swap = false;

9497

9498

// Shifts required to get the byte we want at element 7.

9499

unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,

9500

0, 15, 14, 13, 12, 11, 10, 9};

9501

unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,

9502

1, 2, 3, 4, 5, 6, 7, 8};

9503

9504

ArrayRef<int> Mask = N->getMask();

9505

int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

9506

9507

// For each mask element, find out if we're just inserting something

9508

// from V2 into V1 or vice versa.

9509

// Possible permutations inserting an element from V2 into V1:

9510

// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15

9511

// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15

9512

// ...

9513

// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X

9514

// Inserting from V1 into V2 will be similar, except mask range will be

9515

// [16,31].

9516

9517

bool FoundCandidate = false;

9518

// If both vector operands for the shuffle are the same vector, the mask

9519

// will contain only elements from the first one and the second one will be

9520

// undef.

9521

unsigned VINSERTBSrcElem = IsLE ? 8 : 7;

9522

// Go through the mask of half-words to find an element that's being moved

9523

// from one vector to the other.

9524

for (unsigned i = 0; i < BytesInVector; ++i) {

9525

unsigned CurrentElement = Mask[i];

9526

// If 2nd operand is undefined, we should only look for element 7 in the

9527

// Mask.

9528

if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)

9529

continue;

9530

9531

bool OtherElementsInOrder = true;

9532

// Examine the other elements in the Mask to see if they're in original

9533

// order.

9534

for (unsigned j = 0; j < BytesInVector; ++j) {

9535

if (j == i)

9536

continue;

9537

// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be

9538

// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,

9539

// in which we always assume we're always picking from the 1st operand.

9540

int MaskOffset =

9541

(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;

9542

if (Mask[j] != OriginalOrder[j] + MaskOffset) {

9543

OtherElementsInOrder = false;

9544

break;

9545

}

9546

}

9547

// If other elements are in original order, we record the number of shifts

9548

// we need to get the element we want into element 7. Also record which byte

9549

// in the vector we should insert into.

9550

if (OtherElementsInOrder) {

9551

// If 2nd operand is undefined, we assume no shifts and no swapping.

9552

if (V2.isUndef()) {

9553

ShiftElts = 0;

9554

Swap = false;

9555

} else {

9556

// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.

9557

ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]

9558

: BigEndianShifts[CurrentElement & 0xF];

9559

Swap = CurrentElement < BytesInVector;

9560

}

9561

InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;

9562

FoundCandidate = true;

9563

break;

9564

}

9565

}

9566

9567

if (!FoundCandidate)

9568

return SDValue();

9569

9570

// Candidate found, construct the proper SDAG sequence with VINSERTB,

9571

// optionally with VECSHL if shift is required.

9572

if (Swap)

9573

std::swap(V1, V2);

9574

if (V2.isUndef())

9575

V2 = V1;

9576

if (ShiftElts) {

9577

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,

9578

DAG.getConstant(ShiftElts, dl, MVT::i32));

9579

return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,

9580

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9581

}

9582

return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,

9583

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9584

}

9585

9586

/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled

9587

/// by the VINSERTH instruction introduced in ISA 3.0, else just return default

9588

/// SDValue.

9589

SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,

9590

SelectionDAG &DAG) const {

9591

const unsigned NumHalfWords = 8;

9592

const unsigned BytesInVector = NumHalfWords * 2;

9593

// Check that the shuffle is on half-words.

9594

if (!isNByteElemShuffleMask(N, 2, 1))

9595

return SDValue();

9596

9597

bool IsLE = Subtarget.isLittleEndian();

9598

SDLoc dl(N);

9599

SDValue V1 = N->getOperand(0);

9600

SDValue V2 = N->getOperand(1);

9601

unsigned ShiftElts = 0, InsertAtByte = 0;

9602

bool Swap = false;

9603

9604

// Shifts required to get the half-word we want at element 3.

9605

unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};

9606

unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};

9607

9608

uint32_t Mask = 0;

9609

uint32_t OriginalOrderLow = 0x1234567;

9610

uint32_t OriginalOrderHigh = 0x89ABCDEF;

9611

// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a

9612

// 32-bit space, only need 4-bit nibbles per element.

9613

for (unsigned i = 0; i < NumHalfWords; ++i) {

9614

unsigned MaskShift = (NumHalfWords - 1 - i) * 4;

9615

Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);

9616

}

9617

9618

// For each mask element, find out if we're just inserting something

9619

// from V2 into V1 or vice versa. Possible permutations inserting an element

9620

// from V2 into V1:

9621

// X, 1, 2, 3, 4, 5, 6, 7

9622

// 0, X, 2, 3, 4, 5, 6, 7

9623

// 0, 1, X, 3, 4, 5, 6, 7

9624

// 0, 1, 2, X, 4, 5, 6, 7

9625

// 0, 1, 2, 3, X, 5, 6, 7

9626

// 0, 1, 2, 3, 4, X, 6, 7

9627

// 0, 1, 2, 3, 4, 5, X, 7

9628

// 0, 1, 2, 3, 4, 5, 6, X

9629

// Inserting from V1 into V2 will be similar, except mask range will be [8,15].

9630

9631

bool FoundCandidate = false;

9632

// Go through the mask of half-words to find an element that's being moved

9633

// from one vector to the other.

9634

for (unsigned i = 0; i < NumHalfWords; ++i) {

9635

unsigned MaskShift = (NumHalfWords - 1 - i) * 4;

9636

uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;

9637

uint32_t MaskOtherElts = ~(0xF << MaskShift);

9638

uint32_t TargetOrder = 0x0;

9639

9640

// If both vector operands for the shuffle are the same vector, the mask

9641

// will contain only elements from the first one and the second one will be

9642

// undef.

9643

if (V2.isUndef()) {

9644

ShiftElts = 0;

9645

unsigned VINSERTHSrcElem = IsLE ? 4 : 3;

9646

TargetOrder = OriginalOrderLow;

9647

Swap = false;

9648

// Skip if not the correct element or mask of other elements don't equal

9649

// to our expected order.

9650

if (MaskOneElt == VINSERTHSrcElem &&

9651

(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {

9652

InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;

9653

FoundCandidate = true;

9654

break;

9655

}

9656

} else { // If both operands are defined.

9657

// Target order is [8,15] if the current mask is between [0,7].

9658

TargetOrder =

9659

(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;

9660

// Skip if mask of other elements don't equal our expected order.

9661

if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {

9662

// We only need the last 3 bits for the number of shifts.

9663

ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]

9664

: BigEndianShifts[MaskOneElt & 0x7];

9665

InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;

9666

Swap = MaskOneElt < NumHalfWords;

9667

FoundCandidate = true;

9668

break;

9669

}

9670

}

9671

}

9672

9673

if (!FoundCandidate)

9674

return SDValue();

9675

9676

// Candidate found, construct the proper SDAG sequence with VINSERTH,

9677

// optionally with VECSHL if shift is required.

9678

if (Swap)

9679

std::swap(V1, V2);

9680

if (V2.isUndef())

9681

V2 = V1;

9682

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);

9683

if (ShiftElts) {

9684

// Double ShiftElts because we're left shifting on v16i8 type.

9685

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,

9686

DAG.getConstant(2 * ShiftElts, dl, MVT::i32));

9687

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);

9688

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,

9689

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9690

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9691

}

9692

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);

9693

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,

9694

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9695

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9696

}

9697

9698

/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be

9699

/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise

9700

/// return the default SDValue.

9701

SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,

9702

SelectionDAG &DAG) const {

9703

// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles

9704

// to v16i8. Peek through the bitcasts to get the actual operands.

9705

SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));

9706

SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));

9707

9708

auto ShuffleMask = SVN->getMask();

9709

SDValue VecShuffle(SVN, 0);

9710

SDLoc DL(SVN);

9711

9712

// Check that we have a four byte shuffle.

9713

if (!isNByteElemShuffleMask(SVN, 4, 1))

9714

return SDValue();

9715

9716

// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.

9717

if (RHS->getOpcode() != ISD::BUILD_VECTOR) {

9718

std::swap(LHS, RHS);

9719

VecShuffle = DAG.getCommutedVectorShuffle(*SVN);

9720

ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();

9721

}

9722

9723

// Ensure that the RHS is a vector of constants.

9724

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());

9725

if (!BVN)

9726

return SDValue();

9727

9728

// Check if RHS is a splat of 4-bytes (or smaller).

9729

APInt APSplatValue, APSplatUndef;

9730

unsigned SplatBitSize;

9731

bool HasAnyUndefs;

9732

if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,

9733

HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||

9734

SplatBitSize > 32)

9735

return SDValue();

9736

9737

// Check that the shuffle mask matches the semantics of XXSPLTI32DX.

9738

// The instruction splats a constant C into two words of the source vector

9739

// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.

9740

// Thus we check that the shuffle mask is the equivalent of

9741

// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.

9742

// Note: the check above of isNByteElemShuffleMask() ensures that the bytes

9743

// within each word are consecutive, so we only need to check the first byte.

9744

SDValue Index;

9745

bool IsLE = Subtarget.isLittleEndian();

9746

if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&

9747

(ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&

9748

ShuffleMask[4] > 15 && ShuffleMask[12] > 15))

9749

Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);

9750

else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&

9751

(ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&

9752

ShuffleMask[0] > 15 && ShuffleMask[8] > 15))

9753

Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);

9754

else

9755

return SDValue();

9756

9757

// If the splat is narrower than 32-bits, we need to get the 32-bit value

9758

// for XXSPLTI32DX.

9759

unsigned SplatVal = APSplatValue.getZExtValue();

9760

for (; SplatBitSize < 32; SplatBitSize <<= 1)

9761

SplatVal |= (SplatVal << SplatBitSize);

9762

9763

SDValue SplatNode = DAG.getNode(

9764

PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),

9765

Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));

9766

return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);

9767

}

9768

9769

/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).

9770

/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is

9771

/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)

9772

/// i.e (or (shl x, C1), (srl x, 128-C1)).

9773

SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {

9774

assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")((Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9774, __PRETTY_FUNCTION__));

9775

assert(Op.getValueType() == MVT::v1i128 &&((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9776, __PRETTY_FUNCTION__))

9776

"Only set v1i128 as custom, other type shouldn't reach here!")((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9776, __PRETTY_FUNCTION__));

9777

SDLoc dl(Op);

9778

SDValue N0 = peekThroughBitcasts(Op.getOperand(0));

9779

SDValue N1 = peekThroughBitcasts(Op.getOperand(1));

9780

unsigned SHLAmt = N1.getConstantOperandVal(0);

9781

if (SHLAmt % 8 == 0) {

9782

SmallVector<int, 16> Mask(16, 0);

9783

std::iota(Mask.begin(), Mask.end(), 0);

9784

std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());

9785

if (SDValue Shuffle =

9786

DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),

9787

DAG.getUNDEF(MVT::v16i8), Mask))

9788

return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);

9789

}

9790

SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);

9791

SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,

9792

DAG.getConstant(SHLAmt, dl, MVT::i32));

9793

SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,

9794

DAG.getConstant(128 - SHLAmt, dl, MVT::i32));

9795

SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);

9796

return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);

9797

}

9798

9799

/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this

9800

/// is a shuffle we can handle in a single instruction, return it. Otherwise,

9801

/// return the code it can be lowered into. Worst case, it can always be

9802

/// lowered into a vperm.

9803

SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

9804

SelectionDAG &DAG) const {

9805

SDLoc dl(Op);

9806

SDValue V1 = Op.getOperand(0);

9807

SDValue V2 = Op.getOperand(1);

9808

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);

9809

9810

// Any nodes that were combined in the target-independent combiner prior

9811

// to vector legalization will not be sent to the target combine. Try to

9812

// combine it here.

9813

if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {

9814

if (!isa<ShuffleVectorSDNode>(NewShuffle))

9815

return NewShuffle;

9816

Op = NewShuffle;

9817

SVOp = cast<ShuffleVectorSDNode>(Op);

9818

V1 = Op.getOperand(0);

9819

V2 = Op.getOperand(1);

9820

}

9821

EVT VT = Op.getValueType();

9822

bool isLittleEndian = Subtarget.isLittleEndian();

9823

9824

unsigned ShiftElts, InsertAtByte;

9825

bool Swap = false;

9826

9827

// If this is a load-and-splat, we can do that with a single instruction

9828

// in some cases. However if the load has multiple uses, we don't want to

9829

// combine it because that will just produce multiple loads.

9830

bool IsPermutedLoad = false;

9831

const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);

9832

if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&

9833

(PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&

9834

InputLoad->hasOneUse()) {

9835

bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);

9836

int SplatIdx =

9837

PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);

9838

9839

// The splat index for permuted loads will be in the left half of the vector

9840

// which is strictly wider than the loaded value by 8 bytes. So we need to

9841

// adjust the splat index to point to the correct address in memory.

9842

if (IsPermutedLoad) {

9843

assert(isLittleEndian && "Unexpected permuted load on big endian target")((isLittleEndian && "Unexpected permuted load on big endian target"
) ? static_cast<void> (0) : __assert_fail ("isLittleEndian && \"Unexpected permuted load on big endian target\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9843, __PRETTY_FUNCTION__));

9844

SplatIdx += IsFourByte ? 2 : 1;

9845

assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9846, __PRETTY_FUNCTION__))

9846

"Splat of a value outside of the loaded memory")(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9846, __PRETTY_FUNCTION__));

9847

}

9848

9849

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9850

// For 4-byte load-and-splat, we need Power9.

9851

if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {

9852

uint64_t Offset = 0;

9853

if (IsFourByte)

9854

Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;

9855

else

9856

Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;

9857

9858

SDValue BasePtr = LD->getBasePtr();

9859

if (Offset != 0)

9860

BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),

9861

BasePtr, DAG.getIntPtrConstant(Offset, dl));

9862

SDValue Ops[] = {

9863

LD->getChain(), // Chain

9864

BasePtr, // BasePtr

9865

DAG.getValueType(Op.getValueType()) // VT

9866

};

9867

SDVTList VTL =

9868

DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);

9869

SDValue LdSplt =

9870

DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,

9871

Ops, LD->getMemoryVT(), LD->getMemOperand());

9872

DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));

9873

if (LdSplt.getValueType() != SVOp->getValueType(0))

9874

LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);

9875

return LdSplt;

9876

}

9877

}

9878

if (Subtarget.hasP9Vector() &&

9879

PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,

9880

isLittleEndian)) {

9881

if (Swap)

9882

std::swap(V1, V2);

9883

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9884

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);

9885

if (ShiftElts) {

9886

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,

9887

DAG.getConstant(ShiftElts, dl, MVT::i32));

9888

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,

9889

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9890

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9891

}

9892

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,

9893

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9894

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9895

}

9896

9897

if (Subtarget.hasPrefixInstrs()) {

9898

SDValue SplatInsertNode;

9899

if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))

9900

return SplatInsertNode;

9901

}

9902

9903

if (Subtarget.hasP9Altivec()) {

9904

SDValue NewISDNode;

9905

if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))

9906

return NewISDNode;

9907

9908

if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))

9909

return NewISDNode;

9910

}

9911

9912

if (Subtarget.hasVSX() &&

9913

PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {

9914

if (Swap)

9915

std::swap(V1, V2);

9916

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9917

SDValue Conv2 =

9918

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);

9919

9920

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,

9921

DAG.getConstant(ShiftElts, dl, MVT::i32));

9922

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);

9923

}

9924

9925

if (Subtarget.hasVSX() &&

9926

PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {

9927

if (Swap)

9928

std::swap(V1, V2);

9929

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);

9930

SDValue Conv2 =

9931

DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);

9932

9933

SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,

9934

DAG.getConstant(ShiftElts, dl, MVT::i32));

9935

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);

9936

}

9937

9938

if (Subtarget.hasP9Vector()) {

9939

if (PPC::isXXBRHShuffleMask(SVOp)) {

9940

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);

9941

SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);

9942

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);

9943

} else if (PPC::isXXBRWShuffleMask(SVOp)) {

9944

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9945

SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);

9946

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);

9947

} else if (PPC::isXXBRDShuffleMask(SVOp)) {

9948

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);

9949

SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);

9950

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);

9951

} else if (PPC::isXXBRQShuffleMask(SVOp)) {

9952

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);

9953

SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);

9954

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);

9955

}

9956

}

9957

9958

if (Subtarget.hasVSX()) {

9959

if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {

9960

int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);

9961

9962

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9963

SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,

9964

DAG.getConstant(SplatIdx, dl, MVT::i32));

9965

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);

9966

}

9967

9968

// Left shifts of 8 bytes are actually swaps. Convert accordingly.

9969

if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {

9970

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);

9971

SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);

9972

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);

9973

}

9974

}

9975

9976

// Cases that are handled by instructions that take permute immediates

9977

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

9978

// selected by the instruction selector.

9979

if (V2.isUndef()) {

9980

if (PPC::isSplatShuffleMask(SVOp, 1) ||

9981

PPC::isSplatShuffleMask(SVOp, 2) ||

9982

PPC::isSplatShuffleMask(SVOp, 4) ||

9983

PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||

9984

PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||

9985

PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||

9986

PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||

9987

PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||

9988

PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||

9989

PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||

9990

PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||

9991

PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||

9992

(Subtarget.hasP8Altivec() && (

9993

PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||

9994

PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||

9995

PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {

9996

return Op;

9997

}

9998

}

9999

10000

// Altivec has a variety of "shuffle immediates" that take two vector inputs

10001

// and produce a fixed permutation. If any of these match, do not lower to

10002

// VPERM.

10003

unsigned int ShuffleKind = isLittleEndian ? 2 : 0;

10004

if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10005

PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10006

PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||

10007

PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

10008

PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

10009

PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||

10010

PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

10011

PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

10012

PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||

10013

(Subtarget.hasP8Altivec() && (

10014

PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10015

PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||

10016

PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))

10017

return Op;

10018

10019

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

10020

// perfect shuffle table to emit an optimal matching sequence.

10021

ArrayRef<int> PermMask = SVOp->getMask();

10022

10023

unsigned PFIndexes[4];

10024

bool isFourElementShuffle = true;

10025

for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number

10026

unsigned EltNo = 8; // Start out undef.

10027

for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.

10028

if (PermMask[i*4+j] < 0)

10029

continue; // Undef, ignore it.

10030

10031

unsigned ByteSource = PermMask[i*4+j];

10032

if ((ByteSource & 3) != j) {

10033

isFourElementShuffle = false;

10034

break;

10035

}

10036

10037

if (EltNo == 8) {

10038

EltNo = ByteSource/4;

10039

} else if (EltNo != ByteSource/4) {

10040

isFourElementShuffle = false;

10041

break;

10042

}

10043

}

10044

PFIndexes[i] = EltNo;

10045

}

10046

10047

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

10048

// perfect shuffle vector to determine if it is cost effective to do this as

10049

// discrete instructions, or whether we should use a vperm.

10050

// For now, we skip this for little endian until such time as we have a

10051

// little-endian perfect shuffle table.

10052

if (isFourElementShuffle && !isLittleEndian) {

10053

// Compute the index in the perfect shuffle table.

10054

unsigned PFTableIndex =

10055

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

10056

10057

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

10058

unsigned Cost = (PFEntry >> 30);

10059

10060

// Determining when to avoid vperm is tricky. Many things affect the cost

10061

// of vperm, particularly how many times the perm mask needs to be computed.

10062

// For example, if the perm mask can be hoisted out of a loop or is already

10063

// used (perhaps because there are multiple permutes with the same shuffle

10064

// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of

10065

// the loop requires an extra register.

10066

//

10067

// As a compromise, we only emit discrete instructions if the shuffle can be

10068

// generated in 3 or fewer operations. When we have loop information

10069

// available, if this block is within a loop, we should avoid using vperm

10070

// for 3-operation perms and use a constant pool load instead.

10071

if (Cost < 3)

10072

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);

10073

}

10074

10075

// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant

10076

// vector that will get spilled to the constant pool.

10077

if (V2.isUndef()) V2 = V1;

10078

10079

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

10080

// that it is in input element units, not in bytes. Convert now.

10081

10082

// For little endian, the order of the input vectors is reversed, and

10083

// the permutation mask is complemented with respect to 31. This is

10084

// necessary to produce proper semantics with the big-endian-biased vperm

10085

// instruction.

10086

EVT EltVT = V1.getValueType().getVectorElementType();

10087

unsigned BytesPerElement = EltVT.getSizeInBits()/8;

10088

10089

SmallVector<SDValue, 16> ResultMask;

10090

for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {

10091

unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];

10092

10093

for (unsigned j = 0; j != BytesPerElement; ++j)

10094

if (isLittleEndian)

10095

ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),

10096

dl, MVT::i32));

10097

else

10098

ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,

10099

MVT::i32));

10100

}

10101

10102

ShufflesHandledWithVPERM++;

10103

SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);

10104

LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false);

10105

LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false);

10106

LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false);

10107

LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false);

10108

10109

if (isLittleEndian)

10110

return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

10111

V2, V1, VPermMask);

10112

else

10113

return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

10114

V1, V2, VPermMask);

10115

}

10116

10117

/// getVectorCompareInfo - Given an intrinsic, return false if it is not a

10118

/// vector comparison. If it is, return true and fill in Opc/isDot with

10119

/// information about the intrinsic.

10120

static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,

10121

bool &isDot, const PPCSubtarget &Subtarget) {

10122

unsigned IntrinsicID =

10123

cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();

10124

CompareOpc = -1;

10125

isDot = false;

10126

switch (IntrinsicID) {

10127

default:

10128

return false;

10129

// Comparison predicates.

10130

case Intrinsic::ppc_altivec_vcmpbfp_p:

10131

CompareOpc = 966;

10132

isDot = true;

10133

break;

10134

case Intrinsic::ppc_altivec_vcmpeqfp_p:

10135

CompareOpc = 198;

10136

isDot = true;

10137

break;

10138

case Intrinsic::ppc_altivec_vcmpequb_p:

10139

CompareOpc = 6;

10140

isDot = true;

10141

break;

10142

case Intrinsic::ppc_altivec_vcmpequh_p:

10143

CompareOpc = 70;

10144

isDot = true;

10145

break;

10146

case Intrinsic::ppc_altivec_vcmpequw_p:

10147

CompareOpc = 134;

10148

isDot = true;

10149

break;

10150

case Intrinsic::ppc_altivec_vcmpequd_p:

10151

if (Subtarget.hasP8Altivec()) {

10152

CompareOpc = 199;

10153

isDot = true;

10154

} else

10155

return false;

10156

break;

10157

case Intrinsic::ppc_altivec_vcmpneb_p:

10158

case Intrinsic::ppc_altivec_vcmpneh_p:

10159

case Intrinsic::ppc_altivec_vcmpnew_p:

10160

case Intrinsic::ppc_altivec_vcmpnezb_p:

10161

case Intrinsic::ppc_altivec_vcmpnezh_p:

10162

case Intrinsic::ppc_altivec_vcmpnezw_p:

10163

if (Subtarget.hasP9Altivec()) {

10164

switch (IntrinsicID) {

10165

default:

10166

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10166);

10167

case Intrinsic::ppc_altivec_vcmpneb_p:

10168

CompareOpc = 7;

10169

break;

10170

case Intrinsic::ppc_altivec_vcmpneh_p:

10171

CompareOpc = 71;

10172

break;

10173

case Intrinsic::ppc_altivec_vcmpnew_p:

10174

CompareOpc = 135;

10175

break;

10176

case Intrinsic::ppc_altivec_vcmpnezb_p:

10177

CompareOpc = 263;

10178

break;

10179

case Intrinsic::ppc_altivec_vcmpnezh_p:

10180

CompareOpc = 327;

10181

break;

10182

case Intrinsic::ppc_altivec_vcmpnezw_p:

10183

CompareOpc = 391;

10184

break;

10185

}

10186

isDot = true;

10187

} else

10188

return false;

10189

break;

10190

case Intrinsic::ppc_altivec_vcmpgefp_p:

10191

CompareOpc = 454;

10192

isDot = true;

10193

break;

10194

case Intrinsic::ppc_altivec_vcmpgtfp_p:

10195

CompareOpc = 710;

10196

isDot = true;

10197

break;

10198

case Intrinsic::ppc_altivec_vcmpgtsb_p:

10199

CompareOpc = 774;

10200

isDot = true;

10201

break;

10202

case Intrinsic::ppc_altivec_vcmpgtsh_p:

10203

CompareOpc = 838;

10204

isDot = true;

10205

break;

10206

case Intrinsic::ppc_altivec_vcmpgtsw_p:

10207

CompareOpc = 902;

10208

isDot = true;

10209

break;

10210

case Intrinsic::ppc_altivec_vcmpgtsd_p:

10211

if (Subtarget.hasP8Altivec()) {

10212

CompareOpc = 967;

10213

isDot = true;

10214

} else

10215

return false;

10216

break;

10217

case Intrinsic::ppc_altivec_vcmpgtub_p:

10218

CompareOpc = 518;

10219

isDot = true;

10220

break;

10221

case Intrinsic::ppc_altivec_vcmpgtuh_p:

10222

CompareOpc = 582;

10223

isDot = true;

10224

break;

10225

case Intrinsic::ppc_altivec_vcmpgtuw_p:

10226

CompareOpc = 646;

10227

isDot = true;

10228

break;

10229

case Intrinsic::ppc_altivec_vcmpgtud_p:

10230

if (Subtarget.hasP8Altivec()) {

10231

CompareOpc = 711;

10232

isDot = true;

10233

} else

10234

return false;

10235

break;

10236

10237

case Intrinsic::ppc_altivec_vcmpequq:

10238

case Intrinsic::ppc_altivec_vcmpgtsq:

10239

case Intrinsic::ppc_altivec_vcmpgtuq:

10240

if (!Subtarget.isISA3_1())

10241

return false;

10242

switch (IntrinsicID) {

10243

default:

10244

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10244);

10245

case Intrinsic::ppc_altivec_vcmpequq:

10246

CompareOpc = 455;

10247

break;

10248

case Intrinsic::ppc_altivec_vcmpgtsq:

10249

CompareOpc = 903;

10250

break;

10251

case Intrinsic::ppc_altivec_vcmpgtuq:

10252

CompareOpc = 647;

10253

break;

10254

}

10255

break;

10256

10257

// VSX predicate comparisons use the same infrastructure

10258

case Intrinsic::ppc_vsx_xvcmpeqdp_p:

10259

case Intrinsic::ppc_vsx_xvcmpgedp_p:

10260

case Intrinsic::ppc_vsx_xvcmpgtdp_p:

10261

case Intrinsic::ppc_vsx_xvcmpeqsp_p:

10262

case Intrinsic::ppc_vsx_xvcmpgesp_p:

10263

case Intrinsic::ppc_vsx_xvcmpgtsp_p:

10264

if (Subtarget.hasVSX()) {

10265

switch (IntrinsicID) {

10266

case Intrinsic::ppc_vsx_xvcmpeqdp_p:

10267

CompareOpc = 99;

10268

break;

10269

case Intrinsic::ppc_vsx_xvcmpgedp_p:

10270

CompareOpc = 115;

10271

break;

10272

case Intrinsic::ppc_vsx_xvcmpgtdp_p:

10273

CompareOpc = 107;

10274

break;

10275

case Intrinsic::ppc_vsx_xvcmpeqsp_p:

10276

CompareOpc = 67;

10277

break;

10278

case Intrinsic::ppc_vsx_xvcmpgesp_p:

10279

CompareOpc = 83;

10280

break;

10281

case Intrinsic::ppc_vsx_xvcmpgtsp_p:

10282

CompareOpc = 75;

10283

break;

10284

}

10285

isDot = true;

10286

} else

10287

return false;

10288

break;

10289

10290

// Normal Comparisons.

10291

case Intrinsic::ppc_altivec_vcmpbfp:

10292

CompareOpc = 966;

10293

break;

10294

case Intrinsic::ppc_altivec_vcmpeqfp:

10295

CompareOpc = 198;

10296

break;

10297

case Intrinsic::ppc_altivec_vcmpequb:

10298

CompareOpc = 6;

10299

break;

10300

case Intrinsic::ppc_altivec_vcmpequh:

10301

CompareOpc = 70;

10302

break;

10303

case Intrinsic::ppc_altivec_vcmpequw:

10304

CompareOpc = 134;

10305

break;

10306

case Intrinsic::ppc_altivec_vcmpequd:

10307

if (Subtarget.hasP8Altivec())

10308

CompareOpc = 199;

10309

else

10310

return false;

10311

break;

10312

case Intrinsic::ppc_altivec_vcmpneb:

10313

case Intrinsic::ppc_altivec_vcmpneh:

10314

case Intrinsic::ppc_altivec_vcmpnew:

10315

case Intrinsic::ppc_altivec_vcmpnezb:

10316

case Intrinsic::ppc_altivec_vcmpnezh:

10317

case Intrinsic::ppc_altivec_vcmpnezw:

10318

if (Subtarget.hasP9Altivec())

10319

switch (IntrinsicID) {

10320

default:

10321

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10321);

10322

case Intrinsic::ppc_altivec_vcmpneb:

10323

CompareOpc = 7;

10324

break;

10325

case Intrinsic::ppc_altivec_vcmpneh:

10326

CompareOpc = 71;

10327

break;

10328

case Intrinsic::ppc_altivec_vcmpnew:

10329

CompareOpc = 135;

10330

break;

10331

case Intrinsic::ppc_altivec_vcmpnezb:

10332

CompareOpc = 263;

10333

break;

10334

case Intrinsic::ppc_altivec_vcmpnezh:

10335

CompareOpc = 327;

10336

break;

10337

case Intrinsic::ppc_altivec_vcmpnezw:

10338

CompareOpc = 391;

10339

break;

10340

}

10341

else

10342

return false;

10343

break;

10344

case Intrinsic::ppc_altivec_vcmpgefp:

10345

CompareOpc = 454;

10346

break;

10347

case Intrinsic::ppc_altivec_vcmpgtfp:

10348

CompareOpc = 710;

10349

break;

10350

case Intrinsic::ppc_altivec_vcmpgtsb:

10351

CompareOpc = 774;

10352

break;

10353

case Intrinsic::ppc_altivec_vcmpgtsh:

10354

CompareOpc = 838;

10355

break;

10356

case Intrinsic::ppc_altivec_vcmpgtsw:

10357

CompareOpc = 902;

10358

break;

10359

case Intrinsic::ppc_altivec_vcmpgtsd:

10360

if (Subtarget.hasP8Altivec())

10361

CompareOpc = 967;

10362

else

10363

return false;

10364

break;

10365

case Intrinsic::ppc_altivec_vcmpgtub:

10366

CompareOpc = 518;

10367

break;

10368

case Intrinsic::ppc_altivec_vcmpgtuh:

10369

CompareOpc = 582;

10370

break;

10371

case Intrinsic::ppc_altivec_vcmpgtuw:

10372

CompareOpc = 646;

10373

break;

10374

case Intrinsic::ppc_altivec_vcmpgtud:

10375

if (Subtarget.hasP8Altivec())

10376

CompareOpc = 711;

10377

else

10378

return false;

10379

break;

10380

case Intrinsic::ppc_altivec_vcmpequq_p:

10381

case Intrinsic::ppc_altivec_vcmpgtsq_p:

10382

case Intrinsic::ppc_altivec_vcmpgtuq_p:

10383

if (!Subtarget.isISA3_1())

10384

return false;

10385

switch (IntrinsicID) {

10386

default:

10387

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10387);

10388

case Intrinsic::ppc_altivec_vcmpequq_p:

10389

CompareOpc = 455;

10390

break;

10391

case Intrinsic::ppc_altivec_vcmpgtsq_p:

10392

CompareOpc = 903;

10393

break;

10394

case Intrinsic::ppc_altivec_vcmpgtuq_p:

10395

CompareOpc = 647;

10396

break;

10397

}

10398

isDot = true;

10399

break;

10400

}

10401

return true;

10402

}

10403

10404

/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom

10405

/// lower, do it, otherwise return null.

10406

SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

10407

SelectionDAG &DAG) const {

10408

unsigned IntrinsicID =

10409

cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

10410

10411

SDLoc dl(Op);

10412

10413

switch (IntrinsicID) {

10414

case Intrinsic::thread_pointer:

10415

// Reads the thread pointer register, used for __builtin_thread_pointer.

10416

if (Subtarget.isPPC64())

10417

return DAG.getRegister(PPC::X13, MVT::i64);

10418

return DAG.getRegister(PPC::R2, MVT::i32);

10419

10420

case Intrinsic::ppc_mma_disassemble_acc:

10421

case Intrinsic::ppc_mma_disassemble_pair: {

10422

int NumVecs = 2;

10423

SDValue WideVec = Op.getOperand(1);

10424

if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {

10425

NumVecs = 4;

10426

WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);

10427

}

10428

SmallVector<SDValue, 4> RetOps;

10429

for (int VecNo = 0; VecNo < NumVecs; VecNo++) {

10430

SDValue Extract = DAG.getNode(

10431

PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,

10432

DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo

10433

: VecNo,

10434

dl, MVT::i64));

10435

RetOps.push_back(Extract);

10436

}

10437

return DAG.getMergeValues(RetOps, dl);

10438

}

10439

}

10440

10441

// If this is a lowered altivec predicate compare, CompareOpc is set to the

10442

// opcode number of the comparison.

10443

int CompareOpc;

10444

bool isDot;

10445

if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))

10446

return SDValue(); // Don't custom lower most intrinsics.

10447

10448

// If this is a non-dot comparison, make the VCMP node and we are done.

10449

if (!isDot) {

10450

SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),

10451

Op.getOperand(1), Op.getOperand(2),

10452

DAG.getConstant(CompareOpc, dl, MVT::i32));

10453

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);

10454

}

10455

10456

// Create the PPCISD altivec 'dot' comparison node.

10457

SDValue Ops[] = {

10458

Op.getOperand(2), // LHS

10459

Op.getOperand(3), // RHS

10460

DAG.getConstant(CompareOpc, dl, MVT::i32)

10461

};

10462

EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };

10463

SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

10464

10465

// Now that we have the comparison, emit a copy from the CR to a GPR.

10466

// This is flagged to the above dot comparison.

10467

SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,

10468

DAG.getRegister(PPC::CR6, MVT::i32),

10469

CompNode.getValue(1));

10470

10471

// Unpack the result based on how the target uses it.

10472

unsigned BitNo; // Bit # of CR6.

10473

bool InvertBit; // Invert result?

10474

switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {

10475

default: // Can't happen, don't crash on invalid number though.

10476

case 0: // Return the value of the EQ bit of CR6.

10477

BitNo = 0; InvertBit = false;

10478

break;

10479

case 1: // Return the inverted value of the EQ bit of CR6.

10480

BitNo = 0; InvertBit = true;

10481

break;

10482

case 2: // Return the value of the LT bit of CR6.

10483

BitNo = 2; InvertBit = false;

10484

break;

10485

case 3: // Return the inverted value of the LT bit of CR6.

10486

BitNo = 2; InvertBit = true;

10487

break;

10488

}

10489

10490

// Shift the bit into the low position.

10491

Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,

10492

DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));

10493

// Isolate the bit.

10494

Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,

10495

DAG.getConstant(1, dl, MVT::i32));

10496

10497

// If we are supposed to, toggle the bit.

10498

if (InvertBit)

10499

Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,

10500

DAG.getConstant(1, dl, MVT::i32));

10501

return Flags;

10502

}

10503

10504

SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,

10505

SelectionDAG &DAG) const {

10506

// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to

10507

// the beginning of the argument list.

10508

int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;

10509

SDLoc DL(Op);

10510

switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {

10511

case Intrinsic::ppc_cfence: {

10512

assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10512, __PRETTY_FUNCTION__));

10513

assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10513, __PRETTY_FUNCTION__));

10514

return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,

10515

DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,

10516

Op.getOperand(ArgStart + 1)),

10517

Op.getOperand(0)),

10518

0);

10519

}

10520

default:

10521

break;

10522

}

10523

return SDValue();

10524

}

10525

10526

// Lower scalar BSWAP64 to xxbrd.

10527

SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {

10528

SDLoc dl(Op);

10529

// MTVSRDD

10530

Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),

10531

Op.getOperand(0));

10532

// XXBRD

10533

Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);

10534

// MFVSRD

10535

int VectorIndex = 0;

10536

if (Subtarget.isLittleEndian())

10537

VectorIndex = 1;

10538

Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,

10539

DAG.getTargetConstant(VectorIndex, dl, MVT::i32));

10540

return Op;

10541

}

10542

10543

// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be

10544

// compared to a value that is atomically loaded (atomic loads zero-extend).

10545

SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,

10546

SelectionDAG &DAG) const {

10547

assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10548, __PRETTY_FUNCTION__))

10548

"Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10548, __PRETTY_FUNCTION__));

10549

SDLoc dl(Op);

10550

auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());

10551

EVT MemVT = AtomicNode->getMemoryVT();

10552

if (MemVT.getSizeInBits() >= 32)

10553

return Op;

10554

10555

SDValue CmpOp = Op.getOperand(2);

10556

// If this is already correctly zero-extended, leave it alone.

10557

auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());

10558

if (DAG.MaskedValueIsZero(CmpOp, HighBits))

10559

return Op;

10560

10561

// Clear the high bits of the compare operand.

10562

unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;

10563

SDValue NewCmpOp =

10564

DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,

10565

DAG.getConstant(MaskVal, dl, MVT::i32));

10566

10567

// Replace the existing compare operand with the properly zero-extended one.

10568

SmallVector<SDValue, 4> Ops;

10569

for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)

10570

Ops.push_back(AtomicNode->getOperand(i));

10571

Ops[2] = NewCmpOp;

10572

MachineMemOperand *MMO = AtomicNode->getMemOperand();

10573

SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);

10574

auto NodeTy =

10575

(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;

10576

return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);

10577

}

10578

10579

SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,

10580

SelectionDAG &DAG) const {

10581

SDLoc dl(Op);

10582

// Create a stack slot that is 16-byte aligned.

10583

MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

10584

int FrameIdx = MFI.CreateStackObject(16, Align(16), false);

10585

EVT PtrVT = getPointerTy(DAG.getDataLayout());

10586

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

10587

10588

// Store the input value into Value#0 of the stack slot.

10589

SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,

10590

MachinePointerInfo());

10591

// Load it out.

10592

return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());

10593

}

10594

10595

SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

10596

SelectionDAG &DAG) const {

10597

assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10598, __PRETTY_FUNCTION__))

10598

"Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10598, __PRETTY_FUNCTION__));

10599

10600

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));

10601

// We have legal lowering for constant indices but not for variable ones.

10602

if (!C)

10603

return SDValue();

10604

10605

EVT VT = Op.getValueType();

10606

SDLoc dl(Op);

10607

SDValue V1 = Op.getOperand(0);

10608

SDValue V2 = Op.getOperand(1);

10609

// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.

10610

if (VT == MVT::v8i16 || VT == MVT::v16i8) {

10611

SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);

10612

unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;

10613

unsigned InsertAtElement = C->getZExtValue();

10614

unsigned InsertAtByte = InsertAtElement * BytesInEachElement;

10615

if (Subtarget.isLittleEndian()) {

10616

InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;

10617

}

10618

return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,

10619

DAG.getConstant(InsertAtByte, dl, MVT::i32));

10620

}

10621

return Op;

10622

}

10623

10624

SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,

10625

SelectionDAG &DAG) const {

10626

SDLoc dl(Op);

10627

LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());

10628

SDValue LoadChain = LN->getChain();

10629

SDValue BasePtr = LN->getBasePtr();

10630

EVT VT = Op.getValueType();

10631

10632

if (VT != MVT::v256i1 && VT != MVT::v512i1)

10633

return Op;

10634

10635

// Type v256i1 is used for pairs and v512i1 is used for accumulators.

10636

// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in

10637

// 2 or 4 vsx registers.

10638

assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))

10639

"Type unsupported without MMA")(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__));

10640

assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10641, __PRETTY_FUNCTION__))

10641

"Type unsupported without paired vector support")(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10641, __PRETTY_FUNCTION__));

10642

Align Alignment = LN->getAlign();

10643

SmallVector<SDValue, 4> Loads;

10644

SmallVector<SDValue, 4> LoadChains;

10645

unsigned NumVecs = VT.getSizeInBits() / 128;

10646

for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

10647

SDValue Load =

10648

DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,

10649

LN->getPointerInfo().getWithOffset(Idx * 16),

10650

commonAlignment(Alignment, Idx * 16),

10651

LN->getMemOperand()->getFlags(), LN->getAAInfo());

10652

BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

10653

DAG.getConstant(16, dl, BasePtr.getValueType()));

10654

Loads.push_back(Load);

10655

LoadChains.push_back(Load.getValue(1));

10656

}

10657

if (Subtarget.isLittleEndian()) {

10658

std::reverse(Loads.begin(), Loads.end());

10659

std::reverse(LoadChains.begin(), LoadChains.end());

10660

}

10661

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);

10662

SDValue Value =

10663

DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,

10664

dl, VT, Loads);

10665

SDValue RetOps[] = {Value, TF};

10666

return DAG.getMergeValues(RetOps, dl);

10667

}

10668

10669

SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,

10670

SelectionDAG &DAG) const {

10671

SDLoc dl(Op);

10672

StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());

10673

SDValue StoreChain = SN->getChain();

10674

SDValue BasePtr = SN->getBasePtr();

10675

SDValue Value = SN->getValue();

10676

EVT StoreVT = Value.getValueType();

10677

10678

if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)

10679

return Op;

10680

10681

// Type v256i1 is used for pairs and v512i1 is used for accumulators.

10682

// Here we create 2 or 4 v16i8 stores to store the pair or accumulator

10683

// underlying registers individually.

10684

assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))

10685

"Type unsupported without MMA")(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__));

10686

assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10687, __PRETTY_FUNCTION__))

10687

"Type unsupported without paired vector support")(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10687, __PRETTY_FUNCTION__));

10688

Align Alignment = SN->getAlign();

10689

SmallVector<SDValue, 4> Stores;

10690

unsigned NumVecs = 2;

10691

if (StoreVT == MVT::v512i1) {

10692

Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);

10693

NumVecs = 4;

10694

}

10695

for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

10696

unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;

10697

SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,

10698

DAG.getConstant(VecNum, dl, MVT::i64));

10699

SDValue Store =

10700

DAG.getStore(StoreChain, dl, Elt, BasePtr,

10701

SN->getPointerInfo().getWithOffset(Idx * 16),

10702

commonAlignment(Alignment, Idx * 16),

10703

SN->getMemOperand()->getFlags(), SN->getAAInfo());

10704

BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

10705

DAG.getConstant(16, dl, BasePtr.getValueType()));

10706

Stores.push_back(Store);

10707

}

10708

SDValue TF = DAG.getTokenFactor(dl, Stores);

10709

return TF;

10710

}

10711

10712

SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {

10713

SDLoc dl(Op);

10714

if (Op.getValueType() == MVT::v4i32) {

10715

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

10716

10717

SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);

10718

// +16 as shift amt.

10719

SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);

10720

SDValue RHSSwap = // = vrlw RHS, 16

10721

BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);

10722

10723

// Shrinkify inputs to v8i16.

10724

LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);

10725

RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);

10726

RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);

10727

10728

// Low parts multiplied together, generating 32-bit results (we ignore the

10729

// top parts).

10730

SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,

10731

LHS, RHS, DAG, dl, MVT::v4i32);

10732

10733

SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,

10734

LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);

10735

// Shift the high parts up 16 bits.

10736

HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,

10737

Neg16, DAG, dl);

10738

return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);

10739

} else if (Op.getValueType() == MVT::v16i8) {

10740

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

10741

bool isLittleEndian = Subtarget.isLittleEndian();

10742

10743

// Multiply the even 8-bit parts, producing 16-bit sums.

10744

SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

10745

LHS, RHS, DAG, dl, MVT::v8i16);

10746

EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);

10747

10748

// Multiply the odd 8-bit parts, producing 16-bit sums.

10749

SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,

10750

LHS, RHS, DAG, dl, MVT::v8i16);

10751

OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);

10752

10753

// Merge the results together. Because vmuleub and vmuloub are

10754

// instructions with a big-endian bias, we must reverse the

10755

// element numbering and reverse the meaning of "odd" and "even"

10756

// when generating little endian code.

10757

int Ops[16];

10758

for (unsigned i = 0; i != 8; ++i) {

10759

if (isLittleEndian) {

10760

Ops[i*2 ] = 2*i;

10761

Ops[i*2+1] = 2*i+16;

10762

} else {

10763

Ops[i*2 ] = 2*i+1;

10764

Ops[i*2+1] = 2*i+1+16;

10765

}

10766

}

10767

if (isLittleEndian)

10768

return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);

10769

else

10770

return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);

10771

} else {

10772

llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10772);

10773

}

10774

}

10775

10776

SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {

10777

10778

assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10778, __PRETTY_FUNCTION__));

10779

10780

EVT VT = Op.getValueType();

10781

assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10782, __PRETTY_FUNCTION__))

10782

"Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10782, __PRETTY_FUNCTION__));

10783

assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))

10784

VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))

10785

"Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__));

10786

assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10787, __PRETTY_FUNCTION__))

10787

"Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10787, __PRETTY_FUNCTION__));

10788

10789

// For vector abs, it can be lowered to:

10790

// abs x

10791

// ==>

10792

// y = -x

10793

// smax(x, y)

10794

10795

SDLoc dl(Op);

10796

SDValue X = Op.getOperand(0);

10797

SDValue Zero = DAG.getConstant(0, dl, VT);

10798

SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);

10799

10800

// SMAX patch https://reviews.llvm.org/D47332

10801

// hasn't landed yet, so use intrinsic first here.

10802

// TODO: Should use SMAX directly once SMAX patch landed

10803

Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;

10804

if (VT == MVT::v2i64)

10805

BifID = Intrinsic::ppc_altivec_vmaxsd;

10806

else if (VT == MVT::v8i16)

10807

BifID = Intrinsic::ppc_altivec_vmaxsh;

10808

else if (VT == MVT::v16i8)

10809

BifID = Intrinsic::ppc_altivec_vmaxsb;

10810

10811

return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);

10812

}

10813

10814

// Custom lowering for fpext vf32 to v2f64

10815

SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

10816

10817

assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10818, __PRETTY_FUNCTION__))

10818

"Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10818, __PRETTY_FUNCTION__));

10819

10820

// FIXME: handle extends from half precision float vectors on P9.

10821

// We only want to custom lower an extend from v2f32 to v2f64.

10822

if (Op.getValueType() != MVT::v2f64 ||

10823

Op.getOperand(0).getValueType() != MVT::v2f32)

10824

return SDValue();

10825

10826

SDLoc dl(Op);

10827

SDValue Op0 = Op.getOperand(0);

10828

10829

switch (Op0.getOpcode()) {

10830

default:

10831

return SDValue();

10832

case ISD::EXTRACT_SUBVECTOR: {

10833

assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__))

10834

isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__))

10835

"Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__));

10836

10837

if (Op0.getOperand(0).getValueType() != MVT::v4f32)

10838

return SDValue();

10839

10840

// Custom lower is only done for high or low doubleword.

10841

int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();

10842

if (Idx % 2 != 0)

10843

return SDValue();

10844

10845

// Since input is v4f32, at this point Idx is either 0 or 2.

10846

// Shift to get the doubleword position we want.

10847

int DWord = Idx >> 1;

10848

10849

// High and low word positions are different on little endian.

10850

if (Subtarget.isLittleEndian())

10851

DWord ^= 0x1;

10852

10853

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,

10854

Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));

10855

}

10856

case ISD::FADD:

10857

case ISD::FMUL:

10858

case ISD::FSUB: {

10859

SDValue NewLoad[2];

10860

for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {

10861

// Ensure both input are loads.

10862

SDValue LdOp = Op0.getOperand(i);

10863

if (LdOp.getOpcode() != ISD::LOAD)

10864

return SDValue();

10865

// Generate new load node.

10866

LoadSDNode *LD = cast<LoadSDNode>(LdOp);

10867

SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};

10868

NewLoad[i] = DAG.getMemIntrinsicNode(

10869

PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,

10870

LD->getMemoryVT(), LD->getMemOperand());

10871

}

10872

SDValue NewOp =

10873

DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],

10874

NewLoad[1], Op0.getNode()->getFlags());

10875

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,

10876

DAG.getConstant(0, dl, MVT::i32));

10877

}

10878

case ISD::LOAD: {

10879

LoadSDNode *LD = cast<LoadSDNode>(Op0);

10880

SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};

10881

SDValue NewLd = DAG.getMemIntrinsicNode(

10882

PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,

10883

LD->getMemoryVT(), LD->getMemOperand());

10884

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,

10885

DAG.getConstant(0, dl, MVT::i32));

10886

}

10887

}

10888

llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10888);

10889

}

10890

10891

/// LowerOperation - Provide custom lowering hooks for some operations.

10892

///

10893

SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

10894

switch (Op.getOpcode()) {

10895

default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10895);

10896

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

10897

case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);

10898

case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

10899

case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);

10900

case ISD::JumpTable: return LowerJumpTable(Op, DAG);

10901

case ISD::SETCC: return LowerSETCC(Op, DAG);

10902

case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);

10903

case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);

10904

10905

// Variable argument lowering.

10906

case ISD::VASTART: return LowerVASTART(Op, DAG);

10907

case ISD::VAARG: return LowerVAARG(Op, DAG);

10908

case ISD::VACOPY: return LowerVACOPY(Op, DAG);

10909

10910

case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);

10911

case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);

10912

case ISD::GET_DYNAMIC_AREA_OFFSET:

10913

return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);

10914

10915

// Exception handling lowering.

10916

case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);

10917

case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);

10918

case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);

10919

10920

case ISD::LOAD: return LowerLOAD(Op, DAG);

10921

case ISD::STORE: return LowerSTORE(Op, DAG);

10922

case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);

10923

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

10924

case ISD::STRICT_FP_TO_UINT:

10925

case ISD::STRICT_FP_TO_SINT:

10926

case ISD::FP_TO_UINT:

10927

case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));

10928

case ISD::STRICT_UINT_TO_FP:

10929

case ISD::STRICT_SINT_TO_FP:

10930

case ISD::UINT_TO_FP:

10931

case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);

10932

case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);

10933

10934

// Lower 64-bit shifts.

10935

case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);

10936

case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);

10937

case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);

10938

10939

case ISD::FSHL: return LowerFunnelShift(Op, DAG);

10940

case ISD::FSHR: return LowerFunnelShift(Op, DAG);

10941

10942

// Vector-related lowering.

10943

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);

10944

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

10945

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

10946

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

10947

case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

10948

case ISD::MUL: return LowerMUL(Op, DAG);

10949

case ISD::ABS: return LowerABS(Op, DAG);

10950

case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);

10951

case ISD::ROTL: return LowerROTL(Op, DAG);

10952

10953

// For counter-based loop handling.

10954

case ISD::INTRINSIC_W_CHAIN: return SDValue();

10955

10956

case ISD::BITCAST: return LowerBITCAST(Op, DAG);

10957

10958

// Frame & Return address.

10959

case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);

10960

case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);

10961

10962

case ISD::INTRINSIC_VOID:

10963

return LowerINTRINSIC_VOID(Op, DAG);

10964

case ISD::BSWAP:

10965

return LowerBSWAP(Op, DAG);

10966

case ISD::ATOMIC_CMP_SWAP:

10967

return LowerATOMIC_CMP_SWAP(Op, DAG);

10968

}

10969

}

10970

10971

void PPCTargetLowering::LowerOperationWrapper(SDNode *N,

10972

SmallVectorImpl<SDValue> &Results,

10973

SelectionDAG &DAG) const {

10974

SDValue Res = LowerOperation(SDValue(N, 0), DAG);

10975

10976

if (!Res.getNode())

10977

return;

10978

10979

// Take the return value as-is if original node has only one result.

10980

if (N->getNumValues() == 1) {

10981

Results.push_back(Res);

10982

return;

10983

}

10984

10985

// New node should have the same number of results.

10986

assert((N->getNumValues() == Res->getNumValues()) &&(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10987, __PRETTY_FUNCTION__))

10987

"Lowering returned the wrong number of results!")(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10987, __PRETTY_FUNCTION__));

10988

10989

for (unsigned i = 0; i < N->getNumValues(); ++i)

10990

Results.push_back(Res.getValue(i));

10991

}

10992

10993

void PPCTargetLowering::ReplaceNodeResults(SDNode *N,

10994

SmallVectorImpl<SDValue>&Results,

10995

SelectionDAG &DAG) const {

10996

SDLoc dl(N);

10997

switch (N->getOpcode()) {

10998

default:

10999

llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10999);

11000

case ISD::READCYCLECOUNTER: {

11001

SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);

11002

SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));

11003

11004

Results.push_back(

11005

DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));

11006

Results.push_back(RTB.getValue(2));

11007

break;

11008

}

11009

case ISD::INTRINSIC_W_CHAIN: {

11010

if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=

11011

Intrinsic::loop_decrement)

11012

break;

11013

11014

assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11015, __PRETTY_FUNCTION__))

11015

"Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11015, __PRETTY_FUNCTION__));

11016

EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

11017

N->getValueType(0));

11018

SDVTList VTs = DAG.getVTList(SVT, MVT::Other);

11019

SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),

11020

N->getOperand(1));

11021

11022

Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));

11023

Results.push_back(NewInt.getValue(1));

11024

break;

11025

}

11026

case ISD::VAARG: {

11027

if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())

11028

return;

11029

11030

EVT VT = N->getValueType(0);

11031

11032

if (VT == MVT::i64) {

11033

SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);

11034

11035

Results.push_back(NewNode);

11036

Results.push_back(NewNode.getValue(1));

11037

}

11038

return;

11039

}

11040

case ISD::STRICT_FP_TO_SINT:

11041

case ISD::STRICT_FP_TO_UINT:

11042

case ISD::FP_TO_SINT:

11043

case ISD::FP_TO_UINT:

11044

// LowerFP_TO_INT() can only handle f32 and f64.

11045

if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==

11046

MVT::ppcf128)

11047

return;

11048

Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));

11049

return;

11050

case ISD::TRUNCATE: {

11051

if (!N->getValueType(0).isVector())

11052

return;

11053

SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);

11054

if (Lowered)

11055

Results.push_back(Lowered);

11056

return;

11057

}

11058

case ISD::FSHL:

11059

case ISD::FSHR:

11060

// Don't handle funnel shifts here.

11061

return;

11062

case ISD::BITCAST:

11063

// Don't handle bitcast here.

11064

return;

11065

case ISD::FP_EXTEND:

11066

SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);

11067

if (Lowered)

11068

Results.push_back(Lowered);

11069

return;

11070

}

11071

}

11072

11073

//===----------------------------------------------------------------------===//

11074

// Other Lowering Code

11075

//===----------------------------------------------------------------------===//

11076

11077

static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {

11078

Module *M = Builder.GetInsertBlock()->getParent()->getParent();

11079

Function *Func = Intrinsic::getDeclaration(M, Id);

11080

return Builder.CreateCall(Func, {});

11081

}

11082

11083

// The mappings for emitLeading/TrailingFence is taken from

11084

// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html

11085

Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,

11086

Instruction *Inst,

11087

AtomicOrdering Ord) const {

11088

if (Ord == AtomicOrdering::SequentiallyConsistent)

11089

return callIntrinsic(Builder, Intrinsic::ppc_sync);

11090

if (isReleaseOrStronger(Ord))

11091

return callIntrinsic(Builder, Intrinsic::ppc_lwsync);

11092

return nullptr;

11093

}

11094

11095

Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,

11096

Instruction *Inst,

11097

AtomicOrdering Ord) const {

11098

if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {

11099

// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and

11100

// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html

11101

// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.

11102

if (isa<LoadInst>(Inst) && Subtarget.isPPC64())

11103

return Builder.CreateCall(

11104

Intrinsic::getDeclaration(

11105

Builder.GetInsertBlock()->getParent()->getParent(),

11106

Intrinsic::ppc_cfence, {Inst->getType()}),

11107

{Inst});

11108

// FIXME: Can use isync for rmw operation.

11109

return callIntrinsic(Builder, Intrinsic::ppc_lwsync);

11110

}

11111

return nullptr;

11112

}

11113

11114

MachineBasicBlock *

11115

PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,

11116

unsigned AtomicSize,

11117

unsigned BinOpcode,

11118

unsigned CmpOpcode,

11119

unsigned CmpPred) const {

11120

// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.

11121

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11122

11123

auto LoadMnemonic = PPC::LDARX;

11124

auto StoreMnemonic = PPC::STDCX;

11125

switch (AtomicSize) {

11126

default:

11127

llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11127);

11128

case 1:

11129

LoadMnemonic = PPC::LBARX;

11130

StoreMnemonic = PPC::STBCX;

11131

assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11131, __PRETTY_FUNCTION__));

11132

break;

11133

case 2:

11134

LoadMnemonic = PPC::LHARX;

11135

StoreMnemonic = PPC::STHCX;

11136

assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11136, __PRETTY_FUNCTION__));

11137

break;

11138

case 4:

11139

LoadMnemonic = PPC::LWARX;

11140

StoreMnemonic = PPC::STWCX;

11141

break;

11142

case 8:

11143

LoadMnemonic = PPC::LDARX;

11144

StoreMnemonic = PPC::STDCX;

11145

break;

11146

}

11147

11148

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11149

MachineFunction *F = BB->getParent();

11150

MachineFunction::iterator It = ++BB->getIterator();

11151

11152

Register dest = MI.getOperand(0).getReg();

11153

Register ptrA = MI.getOperand(1).getReg();

11154

Register ptrB = MI.getOperand(2).getReg();

11155

Register incr = MI.getOperand(3).getReg();

11156

DebugLoc dl = MI.getDebugLoc();

11157

11158

MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);

11159

MachineBasicBlock *loop2MBB =

11160

CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;

11161

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

11162

F->insert(It, loopMBB);

11163

if (CmpOpcode)

11164

F->insert(It, loop2MBB);

11165

F->insert(It, exitMBB);

11166

exitMBB->splice(exitMBB->begin(), BB,

11167

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11168

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

11169

11170

MachineRegisterInfo &RegInfo = F->getRegInfo();

11171

Register TmpReg = (!BinOpcode) ? incr :

11172

RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass

11173

: &PPC::GPRCRegClass);

11174

11175

// thisMBB:

11176

// ...

11177

// fallthrough --> loopMBB

11178

BB->addSuccessor(loopMBB);

11179

11180

// loopMBB:

11181

// l[wd]arx dest, ptr

11182

// add r0, dest, incr

11183

// st[wd]cx. r0, ptr

11184

// bne- loopMBB

11185

// fallthrough --> exitMBB

11186

11187

// For max/min...

11188

// loopMBB:

11189

// l[wd]arx dest, ptr

11190

// cmpl?[wd] incr, dest

11191

// bgt exitMBB

11192

// loop2MBB:

11193

// st[wd]cx. dest, ptr

11194

// bne- loopMBB

11195

// fallthrough --> exitMBB

11196

11197

BB = loopMBB;

11198

BuildMI(BB, dl, TII->get(LoadMnemonic), dest)

11199

.addReg(ptrA).addReg(ptrB);

11200

if (BinOpcode)

11201

BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);

11202

if (CmpOpcode) {

11203

// Signed comparisons of byte or halfword values must be sign-extended.

11204

if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {

11205

Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

11206

BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),

11207

ExtReg).addReg(dest);

11208

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11209

.addReg(incr).addReg(ExtReg);

11210

} else

11211

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11212

.addReg(incr).addReg(dest);

11213

11214

BuildMI(BB, dl, TII->get(PPC::BCC))

11215

.addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);

11216

BB->addSuccessor(loop2MBB);

11217

BB->addSuccessor(exitMBB);

11218

BB = loop2MBB;

11219

}

11220

BuildMI(BB, dl, TII->get(StoreMnemonic))

11221

.addReg(TmpReg).addReg(ptrA).addReg(ptrB);

11222

BuildMI(BB, dl, TII->get(PPC::BCC))

11223

.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);

11224

BB->addSuccessor(loopMBB);

11225

BB->addSuccessor(exitMBB);

11226

11227

// exitMBB:

11228

// ...

11229

BB = exitMBB;

11230

return BB;

11231

}

11232

11233

MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(

11234

MachineInstr &MI, MachineBasicBlock *BB,

11235

bool is8bit, // operation

11236

unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {

11237

// If we support part-word atomic mnemonics, just use them

11238

if (Subtarget.hasPartwordAtomics())

11239

return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,

11240

CmpPred);

11241

11242

// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.

11243

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11244

// In 64 bit mode we have to use 64 bits for addresses, even though the

11245

// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address

11246

// registers without caring whether they're 32 or 64, but here we're

11247

// doing actual arithmetic on the addresses.

11248

bool is64bit = Subtarget.isPPC64();

11249

bool isLittleEndian = Subtarget.isLittleEndian();

11250

unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

11251

11252

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11253

MachineFunction *F = BB->getParent();

11254

MachineFunction::iterator It = ++BB->getIterator();

11255

11256

Register dest = MI.getOperand(0).getReg();

11257

Register ptrA = MI.getOperand(1).getReg();

11258

Register ptrB = MI.getOperand(2).getReg();

11259

Register incr = MI.getOperand(3).getReg();

11260

DebugLoc dl = MI.getDebugLoc();

11261

11262

MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);

11263

MachineBasicBlock *loop2MBB =

11264

CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;

11265

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

11266

F->insert(It, loopMBB);

11267

if (CmpOpcode)

11268

F->insert(It, loop2MBB);

11269

F->insert(It, exitMBB);

11270

exitMBB->splice(exitMBB->begin(), BB,

11271

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11272

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

11273

11274

MachineRegisterInfo &RegInfo = F->getRegInfo();

11275

const TargetRegisterClass *RC =

11276

is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

11277

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

11278

11279

Register PtrReg = RegInfo.createVirtualRegister(RC);

11280

Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);

11281

Register ShiftReg =

11282

isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);

11283

Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);

11284

Register MaskReg = RegInfo.createVirtualRegister(GPRC);

11285

Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);

11286

Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);

11287

Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);

11288

Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);

11289

Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);

11290

Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);

11291

Register Ptr1Reg;

11292

Register TmpReg =

11293

(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);

11294

11295

// thisMBB:

11296

// ...

11297

// fallthrough --> loopMBB

11298

BB->addSuccessor(loopMBB);

11299

11300

// The 4-byte load must be aligned, while a char or short may be

11301

// anywhere in the word. Hence all this nasty bookkeeping code.

11302

// add ptr1, ptrA, ptrB [copy if ptrA==0]

11303

// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]

11304

// xori shift, shift1, 24 [16]

11305

// rlwinm ptr, ptr1, 0, 0, 29

11306

// slw incr2, incr, shift

11307

// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]

11308

// slw mask, mask2, shift

11309

// loopMBB:

11310

// lwarx tmpDest, ptr

11311

// add tmp, tmpDest, incr2

11312

// andc tmp2, tmpDest, mask

11313

// and tmp3, tmp, mask

11314

// or tmp4, tmp3, tmp2

11315

// stwcx. tmp4, ptr

11316

// bne- loopMBB

11317

// fallthrough --> exitMBB

11318

// srw dest, tmpDest, shift

11319

if (ptrA != ZeroReg) {

11320

Ptr1Reg = RegInfo.createVirtualRegister(RC);

11321

BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)

11322

.addReg(ptrA)

11323

.addReg(ptrB);

11324

} else {

11325

Ptr1Reg = ptrB;

11326

}

11327

// We need use 32-bit subregister to avoid mismatch register class in 64-bit

11328

// mode.

11329

BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)

11330

.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)

11331

.addImm(3)

11332

.addImm(27)

11333

.addImm(is8bit ? 28 : 27);

11334

if (!isLittleEndian)

11335

BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)

11336

.addReg(Shift1Reg)

11337

.addImm(is8bit ? 24 : 16);

11338

if (is64bit)

11339

BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)

11340

.addReg(Ptr1Reg)

11341

.addImm(0)

11342

.addImm(61);

11343

else

11344

BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)

11345

.addReg(Ptr1Reg)

11346

.addImm(0)

11347

.addImm(0)

11348

.addImm(29);

11349

BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);

11350

if (is8bit)

11351

BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);

11352

else {

11353

BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);

11354

BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)

11355

.addReg(Mask3Reg)

11356

.addImm(65535);

11357

}

11358

BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)

11359

.addReg(Mask2Reg)

11360

.addReg(ShiftReg);

11361

11362

BB = loopMBB;

11363

BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)

11364

.addReg(ZeroReg)

11365

.addReg(PtrReg);

11366

if (BinOpcode)

11367

BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)

11368

.addReg(Incr2Reg)

11369

.addReg(TmpDestReg);

11370

BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)

11371

.addReg(TmpDestReg)

11372

.addReg(MaskReg);

11373

BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);

11374

if (CmpOpcode) {

11375

// For unsigned comparisons, we can directly compare the shifted values.

11376

// For signed comparisons we shift and sign extend.

11377

Register SReg = RegInfo.createVirtualRegister(GPRC);

11378

BuildMI(BB, dl, TII->get(PPC::AND), SReg)

11379

.addReg(TmpDestReg)

11380

.addReg(MaskReg);

11381

unsigned ValueReg = SReg;

11382

unsigned CmpReg = Incr2Reg;

11383

if (CmpOpcode == PPC::CMPW) {

11384

ValueReg = RegInfo.createVirtualRegister(GPRC);

11385

BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)

11386

.addReg(SReg)

11387

.addReg(ShiftReg);

11388

Register ValueSReg = RegInfo.createVirtualRegister(GPRC);

11389

BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)

11390

.addReg(ValueReg);

11391

ValueReg = ValueSReg;

11392

CmpReg = incr;

11393

}

11394

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11395

.addReg(CmpReg)

11396

.addReg(ValueReg);

11397

BuildMI(BB, dl, TII->get(PPC::BCC))

11398

.addImm(CmpPred)

11399

.addReg(PPC::CR0)

11400

.addMBB(exitMBB);

11401

BB->addSuccessor(loop2MBB);

11402

BB->addSuccessor(exitMBB);

11403

BB = loop2MBB;

11404

}

11405

BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);

11406

BuildMI(BB, dl, TII->get(PPC::STWCX))

11407

.addReg(Tmp4Reg)

11408

.addReg(ZeroReg)

11409

.addReg(PtrReg);

11410

BuildMI(BB, dl, TII->get(PPC::BCC))

11411

.addImm(PPC::PRED_NE)

11412

.addReg(PPC::CR0)

11413

.addMBB(loopMBB);

11414

BB->addSuccessor(loopMBB);

11415

BB->addSuccessor(exitMBB);

11416

11417

// exitMBB:

11418

// ...

11419

BB = exitMBB;

11420

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)

11421

.addReg(TmpDestReg)

11422

.addReg(ShiftReg);

11423

return BB;

11424

}

11425

11426

llvm::MachineBasicBlock *

11427

PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,

11428

MachineBasicBlock *MBB) const {

11429

DebugLoc DL = MI.getDebugLoc();

11430

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11431

const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();

11432

11433

MachineFunction *MF = MBB->getParent();

11434

MachineRegisterInfo &MRI = MF->getRegInfo();

11435

11436

const BasicBlock *BB = MBB->getBasicBlock();

11437

MachineFunction::iterator I = ++MBB->getIterator();

11438

11439

Register DstReg = MI.getOperand(0).getReg();

11440

const TargetRegisterClass *RC = MRI.getRegClass(DstReg);

11441

assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11441, __PRETTY_FUNCTION__));

11442

Register mainDstReg = MRI.createVirtualRegister(RC);

11443

Register restoreDstReg = MRI.createVirtualRegister(RC);

11444

11445

MVT PVT = getPointerTy(MF->getDataLayout());

11446

assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11447, __PRETTY_FUNCTION__))

11447

"Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11447, __PRETTY_FUNCTION__));

11448

// For v = setjmp(buf), we generate

11449

//

11450

// thisMBB:

11451

// SjLjSetup mainMBB

11452

// bl mainMBB

11453

// v_restore = 1

11454

// b sinkMBB

11455

//

11456

// mainMBB:

11457

// buf[LabelOffset] = LR

11458

// v_main = 0

11459

//

11460

// sinkMBB:

11461

// v = phi(main, restore)

11462

//

11463

11464

MachineBasicBlock *thisMBB = MBB;

11465

MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);

11466

MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);

11467

MF->insert(I, mainMBB);

11468

MF->insert(I, sinkMBB);

11469

11470

MachineInstrBuilder MIB;

11471

11472

// Transfer the remainder of BB and its successor edges to sinkMBB.

11473

sinkMBB->splice(sinkMBB->begin(), MBB,

11474

std::next(MachineBasicBlock::iterator(MI)), MBB->end());

11475

sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

11476

11477

// Note that the structure of the jmp_buf used here is not compatible

11478

// with that used by libc, and is not designed to be. Specifically, it

11479

// stores only those 'reserved' registers that LLVM does not otherwise

11480

// understand how to spill. Also, by convention, by the time this

11481

// intrinsic is called, Clang has already stored the frame address in the

11482

// first slot of the buffer and stack address in the third. Following the

11483

// X86 target code, we'll store the jump address in the second slot. We also

11484

// need to save the TOC pointer (R2) to handle jumps between shared

11485

// libraries, and that will be stored in the fourth slot. The thread

11486

// identifier (R13) is not affected.

11487

11488

// thisMBB:

11489

const int64_t LabelOffset = 1 * PVT.getStoreSize();

11490

const int64_t TOCOffset = 3 * PVT.getStoreSize();

11491

const int64_t BPOffset = 4 * PVT.getStoreSize();

11492

11493

// Prepare IP either in reg.

11494

const TargetRegisterClass *PtrRC = getRegClassFor(PVT);

11495

Register LabelReg = MRI.createVirtualRegister(PtrRC);

11496

Register BufReg = MI.getOperand(1).getReg();

11497

11498

if (Subtarget.is64BitELFABI()) {

11499

setUsesTOCBasePtr(*MBB->getParent());

11500

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))

11501

.addReg(PPC::X2)

11502

.addImm(TOCOffset)

11503

.addReg(BufReg)

11504

.cloneMemRefs(MI);

11505

}

11506

11507

// Naked functions never have a base pointer, and so we use r1. For all

11508

// other functions, this decision must be delayed until during PEI.

11509

unsigned BaseReg;

11510

if (MF->getFunction().hasFnAttribute(Attribute::Naked))

11511

BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;

11512

else

11513

BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;

11514

11515

MIB = BuildMI(*thisMBB, MI, DL,

11516

TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))

11517

.addReg(BaseReg)

11518

.addImm(BPOffset)

11519

.addReg(BufReg)

11520

.cloneMemRefs(MI);

11521

11522

// Setup

11523

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);

11524

MIB.addRegMask(TRI->getNoPreservedMask());

11525

11526

BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);

11527

11528

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))

11529

.addMBB(mainMBB);

11530

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);

11531

11532

thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());

11533

thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());

11534

11535

// mainMBB:

11536

// mainDstReg = 0

11537

MIB =

11538

BuildMI(mainMBB, DL,

11539

TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);

11540

11541

// Store IP

11542

if (Subtarget.isPPC64()) {

11543

MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))

11544

.addReg(LabelReg)

11545

.addImm(LabelOffset)

11546

.addReg(BufReg);

11547

} else {

11548

MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))

11549

.addReg(LabelReg)

11550

.addImm(LabelOffset)

11551

.addReg(BufReg);

11552

}

11553

MIB.cloneMemRefs(MI);

11554

11555

BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);

11556

mainMBB->addSuccessor(sinkMBB);

11557

11558

// sinkMBB:

11559

BuildMI(*sinkMBB, sinkMBB->begin(), DL,

11560

TII->get(PPC::PHI), DstReg)

11561

.addReg(mainDstReg).addMBB(mainMBB)

11562

.addReg(restoreDstReg).addMBB(thisMBB);

11563

11564

MI.eraseFromParent();

11565

return sinkMBB;

11566

}

11567

11568

MachineBasicBlock *

11569

PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,

11570

MachineBasicBlock *MBB) const {

11571

DebugLoc DL = MI.getDebugLoc();

11572

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11573

11574

MachineFunction *MF = MBB->getParent();

11575

MachineRegisterInfo &MRI = MF->getRegInfo();

11576

11577

MVT PVT = getPointerTy(MF->getDataLayout());

11578

assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11579, __PRETTY_FUNCTION__))

11579

"Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11579, __PRETTY_FUNCTION__));

11580

11581

const TargetRegisterClass *RC =

11582

(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

11583

Register Tmp = MRI.createVirtualRegister(RC);

11584

// Since FP is only updated here but NOT referenced, it's treated as GPR.

11585

unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;

11586

unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;

11587

unsigned BP =

11588

(PVT == MVT::i64)

11589

? PPC::X30

11590

: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29

11591

: PPC::R30);

11592

11593

MachineInstrBuilder MIB;

11594

11595

const int64_t LabelOffset = 1 * PVT.getStoreSize();

11596

const int64_t SPOffset = 2 * PVT.getStoreSize();

11597

const int64_t TOCOffset = 3 * PVT.getStoreSize();

11598

const int64_t BPOffset = 4 * PVT.getStoreSize();

11599

11600

Register BufReg = MI.getOperand(0).getReg();

11601

11602

// Reload FP (the jumped-to function may not have had a

11603

// frame pointer, and if so, then its r31 will be restored

11604

// as necessary).

11605

if (PVT == MVT::i64) {

11606

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)

11607

.addImm(0)

11608

.addReg(BufReg);

11609

} else {

11610

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)

11611

.addImm(0)

11612

.addReg(BufReg);

11613

}

11614

MIB.cloneMemRefs(MI);

11615

11616

// Reload IP

11617

if (PVT == MVT::i64) {

11618

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)

11619

.addImm(LabelOffset)

11620

.addReg(BufReg);

11621

} else {

11622

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)

11623

.addImm(LabelOffset)

11624

.addReg(BufReg);

11625

}

11626

MIB.cloneMemRefs(MI);

11627

11628

// Reload SP

11629

if (PVT == MVT::i64) {

11630

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)

11631

.addImm(SPOffset)

11632

.addReg(BufReg);

11633

} else {

11634

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)

11635

.addImm(SPOffset)

11636

.addReg(BufReg);

11637

}

11638

MIB.cloneMemRefs(MI);

11639

11640

// Reload BP

11641

if (PVT == MVT::i64) {

11642

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)

11643

.addImm(BPOffset)

11644

.addReg(BufReg);

11645

} else {

11646

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)

11647

.addImm(BPOffset)

11648

.addReg(BufReg);

11649

}

11650

MIB.cloneMemRefs(MI);

11651

11652

// Reload TOC

11653

if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {

11654

setUsesTOCBasePtr(*MBB->getParent());

11655

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)

11656

.addImm(TOCOffset)

11657

.addReg(BufReg)

11658

.cloneMemRefs(MI);

11659

}

11660

11661

// Jump

11662

BuildMI(*MBB, MI, DL,

11663

TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);

11664

BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));

11665

11666

MI.eraseFromParent();

11667

return MBB;

11668

}

11669

11670

bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {

11671

// If the function specifically requests inline stack probes, emit them.

11672

if (MF.getFunction().hasFnAttribute("probe-stack"))

11673

return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==

11674

"inline-asm";

11675

return false;

11676

}

11677

11678

unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {

11679

const TargetFrameLowering *TFI = Subtarget.getFrameLowering();

11680

unsigned StackAlign = TFI->getStackAlignment();

11681

assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11682, __PRETTY_FUNCTION__))

11682

"Unexpected stack alignment")((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11682, __PRETTY_FUNCTION__));

11683

// The default stack probe size is 4096 if the function has no

11684

// stack-probe-size attribute.

11685

unsigned StackProbeSize = 4096;

11686

const Function &Fn = MF.getFunction();

11687

if (Fn.hasFnAttribute("stack-probe-size"))

11688

Fn.getFnAttribute("stack-probe-size")

11689

.getValueAsString()

11690

.getAsInteger(0, StackProbeSize);

11691

// Round down to the stack alignment.

11692

StackProbeSize &= ~(StackAlign - 1);

11693

return StackProbeSize ? StackProbeSize : StackAlign;

11694

}

11695

11696

// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted

11697

// into three phases. In the first phase, it uses pseudo instruction

11698

// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and

11699

// FinalStackPtr. In the second phase, it generates a loop for probing blocks.

11700

// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of

11701

// MaxCallFrameSize so that it can calculate correct data area pointer.

11702

MachineBasicBlock *

11703

PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,

11704

MachineBasicBlock *MBB) const {

11705

const bool isPPC64 = Subtarget.isPPC64();

11706

MachineFunction *MF = MBB->getParent();

11707

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11708

DebugLoc DL = MI.getDebugLoc();

11709

const unsigned ProbeSize = getStackProbeSize(*MF);

11710

const BasicBlock *ProbedBB = MBB->getBasicBlock();

11711

MachineRegisterInfo &MRI = MF->getRegInfo();

11712

// The CFG of probing stack looks as

11713

// +-----+

11714

// | MBB |

11715

// +--+--+

11716

// |

11717

// +----v----+

11718

// +--->+ TestMBB +---+

11719

// | +----+----+ |

11720

// | | |

11721

// | +-----v----+ |

11722

// +---+ BlockMBB | |

11723

// +----------+ |

11724

// |

11725

// +---------+ |

11726

// | TailMBB +<--+

11727

// +---------+

11728

// In MBB, calculate previous frame pointer and final stack pointer.

11729

// In TestMBB, test if sp is equal to final stack pointer, if so, jump to

11730

// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.

11731

// TailMBB is spliced via \p MI.

11732

MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);

11733

MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);

11734

MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);

11735

11736

MachineFunction::iterator MBBIter = ++MBB->getIterator();

11737

MF->insert(MBBIter, TestMBB);

11738

MF->insert(MBBIter, BlockMBB);

11739

MF->insert(MBBIter, TailMBB);

11740

11741

const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;

11742

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

11743

11744

Register DstReg = MI.getOperand(0).getReg();

11745

Register NegSizeReg = MI.getOperand(1).getReg();

11746

Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;

11747

Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11748

Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11749

Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11750

11751

// Since value of NegSizeReg might be realigned in prologepilog, insert a

11752

// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and

11753

// NegSize.

11754

unsigned ProbeOpc;

11755

if (!MRI.hasOneNonDBGUse(NegSizeReg))

11756

ProbeOpc =

11757

isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;

11758

else

11759

// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg

11760

// and NegSizeReg will be allocated in the same phyreg to avoid

11761

// redundant copy when NegSizeReg has only one use which is current MI and

11762

// will be replaced by PREPARE_PROBED_ALLOCA then.

11763

ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64

11764

: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;

11765

BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)

11766

.addDef(ActualNegSizeReg)

11767

.addReg(NegSizeReg)

11768

.add(MI.getOperand(2))

11769

.add(MI.getOperand(3));

11770

11771

// Calculate final stack pointer, which equals to SP + ActualNegSize.

11772

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),

11773

FinalStackPtr)

11774

.addReg(SPReg)

11775

.addReg(ActualNegSizeReg);

11776

11777

// Materialize a scratch register for update.

11778

int64_t NegProbeSize = -(int64_t)ProbeSize;

11779

assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")((isInt<32>(NegProbeSize) && "Unhandled probe size!"
) ? static_cast<void> (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11779, __PRETTY_FUNCTION__));

11780

Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11781

if (!isInt<16>(NegProbeSize)) {

11782

Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11783

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)

11784

.addImm(NegProbeSize >> 16);

11785

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),

11786

ScratchReg)

11787

.addReg(TempReg)

11788

.addImm(NegProbeSize & 0xFFFF);

11789

} else

11790

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)

11791

.addImm(NegProbeSize);

11792

11793

{

11794

// Probing leading residual part.

11795

Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11796

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)

11797

.addReg(ActualNegSizeReg)

11798

.addReg(ScratchReg);

11799

Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11800

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)

11801

.addReg(Div)

11802

.addReg(ScratchReg);

11803

Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11804

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)

11805

.addReg(Mul)

11806

.addReg(ActualNegSizeReg);

11807

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)

11808

.addReg(FramePointer)

11809

.addReg(SPReg)

11810

.addReg(NegMod);

11811

}

11812

11813

{

11814

// Remaining part should be multiple of ProbeSize.

11815

Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);

11816

BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)

11817

.addReg(SPReg)

11818

.addReg(FinalStackPtr);

11819

BuildMI(TestMBB, DL, TII->get(PPC::BCC))

11820

.addImm(PPC::PRED_EQ)

11821

.addReg(CmpResult)

11822

.addMBB(TailMBB);

11823

TestMBB->addSuccessor(BlockMBB);

11824

TestMBB->addSuccessor(TailMBB);

11825

}

11826

11827

{

11828

// Touch the block.

11829

// |P...|P...|P...

11830

BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)

11831

.addReg(FramePointer)

11832

.addReg(SPReg)

11833

.addReg(ScratchReg);

11834

BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);

11835

BlockMBB->addSuccessor(TestMBB);

11836

}

11837

11838

// Calculation of MaxCallFrameSize is deferred to prologepilog, use

11839

// DYNAREAOFFSET pseudo instruction to get the future result.

11840

Register MaxCallFrameSizeReg =

11841

MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11842

BuildMI(TailMBB, DL,

11843

TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),

11844

MaxCallFrameSizeReg)

11845

.add(MI.getOperand(2))

11846

.add(MI.getOperand(3));

11847

BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)

11848

.addReg(SPReg)

11849

.addReg(MaxCallFrameSizeReg);

11850

11851

// Splice instructions after MI to TailMBB.

11852

TailMBB->splice(TailMBB->end(), MBB,

11853

std::next(MachineBasicBlock::iterator(MI)), MBB->end());

11854

TailMBB->transferSuccessorsAndUpdatePHIs(MBB);

11855

MBB->addSuccessor(TestMBB);

11856

11857

// Delete the pseudo instruction.

11858

MI.eraseFromParent();

11859

11860

++NumDynamicAllocaProbed;

11861

return TailMBB;

11862

}

11863

11864

MachineBasicBlock *

11865

PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

11866

MachineBasicBlock *BB) const {

11867

if (MI.getOpcode() == TargetOpcode::STACKMAP ||

11868

MI.getOpcode() == TargetOpcode::PATCHPOINT) {

11869

if (Subtarget.is64BitELFABI() &&

11870

MI.getOpcode() == TargetOpcode::PATCHPOINT &&

11871

!Subtarget.isUsingPCRelativeCalls()) {

11872

// Call lowering should have added an r2 operand to indicate a dependence

11873

// on the TOC base pointer value. It can't however, because there is no

11874

// way to mark the dependence as implicit there, and so the stackmap code

11875

// will confuse it with a regular operand. Instead, add the dependence

11876

// here.

11877

MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));

11878

}

11879

11880

return emitPatchPoint(MI, BB);

11881

}

11882

11883

if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||

11884

MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {

11885

return emitEHSjLjSetJmp(MI, BB);

11886

} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||

11887

MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {

11888

return emitEHSjLjLongJmp(MI, BB);

11889

}

11890

11891

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11892

11893

// To "insert" these instructions we actually have to insert their

11894

// control-flow patterns.

11895

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11896

MachineFunction::iterator It = ++BB->getIterator();

11897

11898

MachineFunction *F = BB->getParent();

11899

11900

if (MI.getOpcode() == PPC::SELECT_CC_I4 ||

11901

MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||

11902

MI.getOpcode() == PPC::SELECT_I8) {

11903

SmallVector<MachineOperand, 2> Cond;

11904

if (MI.getOpcode() == PPC::SELECT_CC_I4 ||

11905

MI.getOpcode() == PPC::SELECT_CC_I8)

11906

Cond.push_back(MI.getOperand(4));

11907

else

11908

Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));

11909

Cond.push_back(MI.getOperand(1));

11910

11911

DebugLoc dl = MI.getDebugLoc();

11912

TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,

11913

MI.getOperand(2).getReg(), MI.getOperand(3).getReg());

11914

} else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||

11915

MI.getOpcode() == PPC::SELECT_CC_F8 ||

11916

MI.getOpcode() == PPC::SELECT_CC_F16 ||

11917

MI.getOpcode() == PPC::SELECT_CC_VRRC ||

11918

MI.getOpcode() == PPC::SELECT_CC_VSFRC ||

11919

MI.getOpcode() == PPC::SELECT_CC_VSSRC ||

11920

MI.getOpcode() == PPC::SELECT_CC_VSRC ||

11921

MI.getOpcode() == PPC::SELECT_CC_SPE4 ||

11922

MI.getOpcode() == PPC::SELECT_CC_SPE ||

11923

MI.getOpcode() == PPC::SELECT_F4 ||

11924

MI.getOpcode() == PPC::SELECT_F8 ||

11925

MI.getOpcode() == PPC::SELECT_F16 ||

11926

MI.getOpcode() == PPC::SELECT_SPE ||

11927

MI.getOpcode() == PPC::SELECT_SPE4 ||

11928

MI.getOpcode() == PPC::SELECT_VRRC ||

11929

MI.getOpcode() == PPC::SELECT_VSFRC ||

11930

MI.getOpcode() == PPC::SELECT_VSSRC ||

11931

MI.getOpcode() == PPC::SELECT_VSRC) {

11932

// The incoming instruction knows the destination vreg to set, the

11933

// condition code register to branch on, the true/false values to

11934

// select between, and a branch opcode to use.

11935

11936

// thisMBB:

11937

// ...

11938

// TrueVal = ...

11939

// cmpTY ccX, r1, r2

11940

// bCC copy1MBB

11941

// fallthrough --> copy0MBB

11942

MachineBasicBlock *thisMBB = BB;

11943

MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);

11944

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

11945

DebugLoc dl = MI.getDebugLoc();

11946

F->insert(It, copy0MBB);

11947

F->insert(It, sinkMBB);

11948

11949

// Transfer the remainder of BB and its successor edges to sinkMBB.

11950

sinkMBB->splice(sinkMBB->begin(), BB,

11951

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11952

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

11953

11954

// Next, add the true and fallthrough blocks as its successors.

11955

BB->addSuccessor(copy0MBB);

11956

BB->addSuccessor(sinkMBB);

11957

11958

if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||

11959

MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||

11960

MI.getOpcode() == PPC::SELECT_F16 ||

11961

MI.getOpcode() == PPC::SELECT_SPE4 ||

11962

MI.getOpcode() == PPC::SELECT_SPE ||

11963

MI.getOpcode() == PPC::SELECT_VRRC ||

11964

MI.getOpcode() == PPC::SELECT_VSFRC ||

11965

MI.getOpcode() == PPC::SELECT_VSSRC ||

11966

MI.getOpcode() == PPC::SELECT_VSRC) {

11967

BuildMI(BB, dl, TII->get(PPC::BC))

11968

.addReg(MI.getOperand(1).getReg())

11969

.addMBB(sinkMBB);

11970

} else {

11971

unsigned SelectPred = MI.getOperand(4).getImm();

11972

BuildMI(BB, dl, TII->get(PPC::BCC))

11973

.addImm(SelectPred)

11974

.addReg(MI.getOperand(1).getReg())

11975

.addMBB(sinkMBB);

11976

}

11977

11978

// copy0MBB:

11979

// %FalseValue = ...

11980

// # fallthrough to sinkMBB

11981

BB = copy0MBB;

11982

11983

// Update machine-CFG edges

11984

BB->addSuccessor(sinkMBB);

11985

11986

// sinkMBB:

11987

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

11988

// ...

11989

BB = sinkMBB;

11990

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())

11991

.addReg(MI.getOperand(3).getReg())

11992

.addMBB(copy0MBB)

11993

.addReg(MI.getOperand(2).getReg())

11994

.addMBB(thisMBB);

11995

} else if (MI.getOpcode() == PPC::ReadTB) {

11996

// To read the 64-bit time-base register on a 32-bit target, we read the

11997

// two halves. Should the counter have wrapped while it was being read, we

11998

// need to try again.

11999

// ...

12000

// readLoop:

12001

// mfspr Rx,TBU # load from TBU

12002

// mfspr Ry,TB # load from TB

12003

// mfspr Rz,TBU # load from TBU

12004

// cmpw crX,Rx,Rz # check if 'old'='new'

12005

// bne readLoop # branch if they're not equal

12006

// ...

12007

12008

MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);

12009

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

12010

DebugLoc dl = MI.getDebugLoc();

12011

F->insert(It, readMBB);

12012

F->insert(It, sinkMBB);

12013

12014

// Transfer the remainder of BB and its successor edges to sinkMBB.

12015

sinkMBB->splice(sinkMBB->begin(), BB,

12016

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12017

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

12018

12019

BB->addSuccessor(readMBB);

12020

BB = readMBB;

12021

12022

MachineRegisterInfo &RegInfo = F->getRegInfo();

12023

Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

12024

Register LoReg = MI.getOperand(0).getReg();

12025

Register HiReg = MI.getOperand(1).getReg();

12026

12027

BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);

12028

BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);

12029

BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);

12030

12031

Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);

12032

12033

BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)

12034

.addReg(HiReg)

12035

.addReg(ReadAgainReg);

12036

BuildMI(BB, dl, TII->get(PPC::BCC))

12037

.addImm(PPC::PRED_NE)

12038

.addReg(CmpReg)

12039

.addMBB(readMBB);

12040

12041

BB->addSuccessor(readMBB);

12042

BB->addSuccessor(sinkMBB);

12043

} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)

12044

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);

12045

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)

12046

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);

12047

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)

12048

BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);

12049

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)

12050

BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);

12051

12052

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)

12053

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);

12054

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)

12055

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);

12056

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)

12057

BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);

12058

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)

12059

BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);

12060

12061

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)

12062

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);

12063

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)

12064

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);

12065

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)

12066

BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);

12067

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)

12068

BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);

12069

12070

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)

12071

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);

12072

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)

12073

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);

12074

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)

12075

BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);

12076

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)

12077

BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);

12078

12079

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)

12080

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);

12081

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)

12082

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);

12083

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)

12084

BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);

12085

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)

12086

BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);

12087

12088

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)

12089

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);

12090

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)

12091

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);

12092

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)

12093

BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);

12094

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)

12095

BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);

12096

12097

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)

12098

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);

12099

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)

12100

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);

12101

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)

12102

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);

12103

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)

12104

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);

12105

12106

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)

12107

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);

12108

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)

12109

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);

12110

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)

12111

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);

12112

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)

12113

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);

12114

12115

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)

12116

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);

12117

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)

12118

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);

12119

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)

12120

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);

12121

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)

12122

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);

12123

12124

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)

12125

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);

12126

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)

12127

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);

12128

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)

12129

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);

12130

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)

12131

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);

12132

12133

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)

12134

BB = EmitPartwordAtomicBinary(MI, BB, true, 0);

12135

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)

12136

BB = EmitPartwordAtomicBinary(MI, BB, false, 0);

12137

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)

12138

BB = EmitAtomicBinary(MI, BB, 4, 0);

12139

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)

12140

BB = EmitAtomicBinary(MI, BB, 8, 0);

12141

else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||

12142

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||

12143

(Subtarget.hasPartwordAtomics() &&

12144

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||

12145

(Subtarget.hasPartwordAtomics() &&

12146

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {

12147

bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;

12148

12149

auto LoadMnemonic = PPC::LDARX;

12150

auto StoreMnemonic = PPC::STDCX;

12151

switch (MI.getOpcode()) {

12152

default:

12153

llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12153);

12154

case PPC::ATOMIC_CMP_SWAP_I8:

12155

LoadMnemonic = PPC::LBARX;

12156

StoreMnemonic = PPC::STBCX;

12157

assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12157, __PRETTY_FUNCTION__));

12158

break;

12159

case PPC::ATOMIC_CMP_SWAP_I16:

12160

LoadMnemonic = PPC::LHARX;

12161

StoreMnemonic = PPC::STHCX;

12162

assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12162, __PRETTY_FUNCTION__));

12163

break;

12164

case PPC::ATOMIC_CMP_SWAP_I32:

12165

LoadMnemonic = PPC::LWARX;

12166

StoreMnemonic = PPC::STWCX;

12167

break;

12168

case PPC::ATOMIC_CMP_SWAP_I64:

12169

LoadMnemonic = PPC::LDARX;

12170

StoreMnemonic = PPC::STDCX;

12171

break;

12172

}

12173

Register dest = MI.getOperand(0).getReg();

12174

Register ptrA = MI.getOperand(1).getReg();

12175

Register ptrB = MI.getOperand(2).getReg();

12176

Register oldval = MI.getOperand(3).getReg();

12177

Register newval = MI.getOperand(4).getReg();

12178

DebugLoc dl = MI.getDebugLoc();

12179

12180

MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);

12181

MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);

12182

MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);

12183

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

12184

F->insert(It, loop1MBB);

12185

F->insert(It, loop2MBB);

12186

F->insert(It, midMBB);

12187

F->insert(It, exitMBB);

12188

exitMBB->splice(exitMBB->begin(), BB,

12189

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12190

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

12191

12192

// thisMBB:

12193

// ...

12194

// fallthrough --> loopMBB

12195

BB->addSuccessor(loop1MBB);

12196

12197

// loop1MBB:

12198

// l[bhwd]arx dest, ptr

12199

// cmp[wd] dest, oldval

12200

// bne- midMBB

12201

// loop2MBB:

12202

// st[bhwd]cx. newval, ptr

12203

// bne- loopMBB

12204

// b exitBB

12205

// midMBB:

12206

// st[bhwd]cx. dest, ptr

12207

// exitBB:

12208

BB = loop1MBB;

12209

BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);

12210

BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)

12211

.addReg(oldval)

12212

.addReg(dest);

12213

BuildMI(BB, dl, TII->get(PPC::BCC))

12214

.addImm(PPC::PRED_NE)

12215

.addReg(PPC::CR0)

12216

.addMBB(midMBB);

12217

BB->addSuccessor(loop2MBB);

12218

BB->addSuccessor(midMBB);

12219

12220

BB = loop2MBB;

12221

BuildMI(BB, dl, TII->get(StoreMnemonic))

12222

.addReg(newval)

12223

.addReg(ptrA)

12224

.addReg(ptrB);

12225

BuildMI(BB, dl, TII->get(PPC::BCC))

12226

.addImm(PPC::PRED_NE)

12227

.addReg(PPC::CR0)

12228

.addMBB(loop1MBB);

12229

BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);

12230

BB->addSuccessor(loop1MBB);

12231

BB->addSuccessor(exitMBB);

12232

12233

BB = midMBB;

12234

BuildMI(BB, dl, TII->get(StoreMnemonic))

12235

.addReg(dest)

12236

.addReg(ptrA)

12237

.addReg(ptrB);

12238

BB->addSuccessor(exitMBB);

12239

12240

// exitMBB:

12241

// ...

12242

BB = exitMBB;

12243

} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||

12244

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {

12245

// We must use 64-bit registers for addresses when targeting 64-bit,

12246

// since we're actually doing arithmetic on them. Other registers

12247

// can be 32-bit.

12248

bool is64bit = Subtarget.isPPC64();

12249

bool isLittleEndian = Subtarget.isLittleEndian();

12250

bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

12251

12252

Register dest = MI.getOperand(0).getReg();

12253

Register ptrA = MI.getOperand(1).getReg();

12254

Register ptrB = MI.getOperand(2).getReg();

12255

Register oldval = MI.getOperand(3).getReg();

12256

Register newval = MI.getOperand(4).getReg();

12257

DebugLoc dl = MI.getDebugLoc();

12258

12259

MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);

12260

MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);

12261

MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);

12262

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

12263

F->insert(It, loop1MBB);

12264

F->insert(It, loop2MBB);

12265

F->insert(It, midMBB);

12266

F->insert(It, exitMBB);

12267

exitMBB->splice(exitMBB->begin(), BB,

12268

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12269

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

12270

12271

MachineRegisterInfo &RegInfo = F->getRegInfo();

12272

const TargetRegisterClass *RC =

12273

is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

12274

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

12275

12276

Register PtrReg = RegInfo.createVirtualRegister(RC);

12277

Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);

12278

Register ShiftReg =

12279

isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);

12280

Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);

12281

Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);

12282

Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);

12283

Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);

12284

Register MaskReg = RegInfo.createVirtualRegister(GPRC);

12285

Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);

12286

Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);

12287

Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);

12288

Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);

12289

Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);

12290

Register Ptr1Reg;

12291

Register TmpReg = RegInfo.createVirtualRegister(GPRC);

12292

Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

12293

// thisMBB:

12294

// ...

12295

// fallthrough --> loopMBB

12296

BB->addSuccessor(loop1MBB);

12297

12298

// The 4-byte load must be aligned, while a char or short may be

12299

// anywhere in the word. Hence all this nasty bookkeeping code.

12300

// add ptr1, ptrA, ptrB [copy if ptrA==0]

12301

// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]

12302

// xori shift, shift1, 24 [16]

12303

// rlwinm ptr, ptr1, 0, 0, 29

12304

// slw newval2, newval, shift

12305

// slw oldval2, oldval,shift

12306

// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]

12307

// slw mask, mask2, shift

12308

// and newval3, newval2, mask

12309

// and oldval3, oldval2, mask

12310

// loop1MBB:

12311

// lwarx tmpDest, ptr

12312

// and tmp, tmpDest, mask

12313

// cmpw tmp, oldval3

12314

// bne- midMBB

12315

// loop2MBB:

12316

// andc tmp2, tmpDest, mask

12317

// or tmp4, tmp2, newval3

12318

// stwcx. tmp4, ptr

12319

// bne- loop1MBB

12320

// b exitBB

12321

// midMBB:

12322

// stwcx. tmpDest, ptr

12323

// exitBB:

12324

// srw dest, tmpDest, shift

12325

if (ptrA != ZeroReg) {

12326

Ptr1Reg = RegInfo.createVirtualRegister(RC);

12327

BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)

12328

.addReg(ptrA)

12329

.addReg(ptrB);

12330

} else {

12331

Ptr1Reg = ptrB;

12332

}

12333

12334

// We need use 32-bit subregister to avoid mismatch register class in 64-bit

12335

// mode.

12336

BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)

12337

.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)

12338

.addImm(3)

12339

.addImm(27)

12340

.addImm(is8bit ? 28 : 27);

12341

if (!isLittleEndian)

12342

BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)

12343

.addReg(Shift1Reg)

12344

.addImm(is8bit ? 24 : 16);

12345

if (is64bit)

12346

BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)

12347

.addReg(Ptr1Reg)

12348

.addImm(0)

12349

.addImm(61);

12350

else

12351

BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)

12352

.addReg(Ptr1Reg)

12353

.addImm(0)

12354

.addImm(0)

12355

.addImm(29);

12356

BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)

12357

.addReg(newval)

12358

.addReg(ShiftReg);

12359

BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)

12360

.addReg(oldval)

12361

.addReg(ShiftReg);

12362

if (is8bit)

12363

BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);

12364

else {

12365

BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);

12366

BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)

12367

.addReg(Mask3Reg)

12368

.addImm(65535);

12369

}

12370

BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)

12371

.addReg(Mask2Reg)

12372

.addReg(ShiftReg);

12373

BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)

12374

.addReg(NewVal2Reg)

12375

.addReg(MaskReg);

12376

BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)

12377

.addReg(OldVal2Reg)

12378

.addReg(MaskReg);

12379

12380

BB = loop1MBB;

12381

BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)

12382

.addReg(ZeroReg)

12383

.addReg(PtrReg);

12384

BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)

12385

.addReg(TmpDestReg)

12386

.addReg(MaskReg);

12387

BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)

12388

.addReg(TmpReg)

12389

.addReg(OldVal3Reg);

12390

BuildMI(BB, dl, TII->get(PPC::BCC))

12391

.addImm(PPC::PRED_NE)

12392

.addReg(PPC::CR0)

12393

.addMBB(midMBB);

12394

BB->addSuccessor(loop2MBB);

12395

BB->addSuccessor(midMBB);

12396

12397

BB = loop2MBB;

12398

BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)

12399

.addReg(TmpDestReg)

12400

.addReg(MaskReg);

12401

BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)

12402

.addReg(Tmp2Reg)

12403

.addReg(NewVal3Reg);

12404

BuildMI(BB, dl, TII->get(PPC::STWCX))

12405

.addReg(Tmp4Reg)

12406

.addReg(ZeroReg)

12407

.addReg(PtrReg);

12408

BuildMI(BB, dl, TII->get(PPC::BCC))

12409

.addImm(PPC::PRED_NE)

12410

.addReg(PPC::CR0)

12411

.addMBB(loop1MBB);

12412

BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);

12413

BB->addSuccessor(loop1MBB);

12414

BB->addSuccessor(exitMBB);

12415

12416

BB = midMBB;

12417

BuildMI(BB, dl, TII->get(PPC::STWCX))

12418

.addReg(TmpDestReg)

12419

.addReg(ZeroReg)

12420

.addReg(PtrReg);

12421

BB->addSuccessor(exitMBB);

12422

12423

// exitMBB:

12424

// ...

12425

BB = exitMBB;

12426

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)

12427

.addReg(TmpReg)

12428

.addReg(ShiftReg);

12429

} else if (MI.getOpcode() == PPC::FADDrtz) {

12430

// This pseudo performs an FADD with rounding mode temporarily forced

12431

// to round-to-zero. We emit this via custom inserter since the FPSCR

12432

// is not modeled at the SelectionDAG level.

12433

Register Dest = MI.getOperand(0).getReg();

12434

Register Src1 = MI.getOperand(1).getReg();

12435

Register Src2 = MI.getOperand(2).getReg();

12436

DebugLoc dl = MI.getDebugLoc();

12437

12438

MachineRegisterInfo &RegInfo = F->getRegInfo();

12439

Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

12440

12441

// Save FPSCR value.

12442

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);

12443

12444

// Set rounding mode to round-to-zero.

12445

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))

12446

.addImm(31)

12447

.addReg(PPC::RM, RegState::ImplicitDefine);

12448

12449

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))

12450

.addImm(30)

12451

.addReg(PPC::RM, RegState::ImplicitDefine);

12452

12453

// Perform addition.

12454

auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)

12455

.addReg(Src1)

12456

.addReg(Src2);

12457

if (MI.getFlag(MachineInstr::NoFPExcept))

12458

MIB.setMIFlag(MachineInstr::NoFPExcept);

12459

12460

// Restore FPSCR value.

12461

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);

12462

} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||

12463

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||

12464

MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||

12465

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {

12466

unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||

12467

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)

12468

? PPC::ANDI8_rec

12469

: PPC::ANDI_rec;

12470

bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||

12471

MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);

12472

12473

MachineRegisterInfo &RegInfo = F->getRegInfo();

12474

Register Dest = RegInfo.createVirtualRegister(

12475

Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);

12476

12477

DebugLoc Dl = MI.getDebugLoc();

12478

BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)

12479

.addReg(MI.getOperand(1).getReg())

12480

.addImm(1);

12481

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12482

MI.getOperand(0).getReg())

12483

.addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);

12484

} else if (MI.getOpcode() == PPC::TCHECK_RET) {

12485

DebugLoc Dl = MI.getDebugLoc();

12486

MachineRegisterInfo &RegInfo = F->getRegInfo();

12487

Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);

12488

BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);

12489

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12490

MI.getOperand(0).getReg())

12491

.addReg(CRReg);

12492

} else if (MI.getOpcode() == PPC::TBEGIN_RET) {

12493

DebugLoc Dl = MI.getDebugLoc();

12494

unsigned Imm = MI.getOperand(1).getImm();

12495

BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);

12496

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12497

MI.getOperand(0).getReg())

12498

.addReg(PPC::CR0EQ);

12499

} else if (MI.getOpcode() == PPC::SETRNDi) {

12500

DebugLoc dl = MI.getDebugLoc();

12501

Register OldFPSCRReg = MI.getOperand(0).getReg();

12502

12503

// Save FPSCR value.

12504

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

12505

12506

// The floating point rounding mode is in the bits 62:63 of FPCSR, and has

12507

// the following settings:

12508

// 00 Round to nearest

12509

// 01 Round to 0

12510

// 10 Round to +inf

12511

// 11 Round to -inf

12512

12513

// When the operand is immediate, using the two least significant bits of

12514

// the immediate to set the bits 62:63 of FPSCR.

12515

unsigned Mode = MI.getOperand(1).getImm();

12516

BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))

12517

.addImm(31)

12518

.addReg(PPC::RM, RegState::ImplicitDefine);

12519

12520

BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))

12521

.addImm(30)

12522

.addReg(PPC::RM, RegState::ImplicitDefine);

12523

} else if (MI.getOpcode() == PPC::SETRND) {

12524

DebugLoc dl = MI.getDebugLoc();

12525

12526

// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg

12527

// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.

12528

// If the target doesn't have DirectMove, we should use stack to do the

12529

// conversion, because the target doesn't have the instructions like mtvsrd

12530

// or mfvsrd to do this conversion directly.

12531

auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {

12532

if (Subtarget.hasDirectMove()) {

12533

BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)

12534

.addReg(SrcReg);

12535

} else {

12536

// Use stack to do the register copy.

12537

unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;

12538

MachineRegisterInfo &RegInfo = F->getRegInfo();

12539

const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);

12540

if (RC == &PPC::F8RCRegClass) {

12541

// Copy register from F8RCRegClass to G8RCRegclass.

12542

assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12543, __PRETTY_FUNCTION__))

12543

"Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12543, __PRETTY_FUNCTION__));

12544

12545

StoreOp = PPC::STFD;

12546

LoadOp = PPC::LD;

12547

} else {

12548

// Copy register from G8RCRegClass to F8RCRegclass.

12549

assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__))

12550

(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__))

12551

"Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__));

12552

}

12553

12554

MachineFrameInfo &MFI = F->getFrameInfo();

12555

int FrameIdx = MFI.CreateStackObject(8, Align(8), false);

12556

12557

MachineMemOperand *MMOStore = F->getMachineMemOperand(

12558

MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),

12559

MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),

12560

MFI.getObjectAlign(FrameIdx));

12561

12562

// Store the SrcReg into the stack.

12563

BuildMI(*BB, MI, dl, TII->get(StoreOp))

12564

.addReg(SrcReg)

12565

.addImm(0)

12566

.addFrameIndex(FrameIdx)

12567

.addMemOperand(MMOStore);

12568

12569

MachineMemOperand *MMOLoad = F->getMachineMemOperand(

12570

MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),

12571

MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),

12572

MFI.getObjectAlign(FrameIdx));

12573

12574

// Load from the stack where SrcReg is stored, and save to DestReg,

12575

// so we have done the RegClass conversion from RegClass::SrcReg to

12576

// RegClass::DestReg.

12577

BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)

12578

.addImm(0)

12579

.addFrameIndex(FrameIdx)

12580

.addMemOperand(MMOLoad);

12581

}

12582

};

12583

12584

Register OldFPSCRReg = MI.getOperand(0).getReg();

12585

12586

// Save FPSCR value.

12587

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

12588

12589

// When the operand is gprc register, use two least significant bits of the

12590

// register and mtfsf instruction to set the bits 62:63 of FPSCR.

12591

//

12592

// copy OldFPSCRTmpReg, OldFPSCRReg

12593

// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)

12594

// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62

12595

// copy NewFPSCRReg, NewFPSCRTmpReg

12596

// mtfsf 255, NewFPSCRReg

12597

MachineOperand SrcOp = MI.getOperand(1);

12598

MachineRegisterInfo &RegInfo = F->getRegInfo();

12599

Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12600

12601

copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);

12602

12603

Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12604

Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12605

12606

// The first operand of INSERT_SUBREG should be a register which has

12607

// subregisters, we only care about its RegClass, so we should use an

12608

// IMPLICIT_DEF register.

12609

BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);

12610

BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)

12611

.addReg(ImDefReg)

12612

.add(SrcOp)

12613

.addImm(1);

12614

12615

Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12616

BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)

12617

.addReg(OldFPSCRTmpReg)

12618

.addReg(ExtSrcReg)

12619

.addImm(0)

12620

.addImm(62);

12621

12622

Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

12623

copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);

12624

12625

// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63

12626

// bits of FPSCR.

12627

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))

12628

.addImm(255)

12629

.addReg(NewFPSCRReg)

12630

.addImm(0)

12631

.addImm(0);

12632

} else if (MI.getOpcode() == PPC::SETFLM) {

12633

DebugLoc Dl = MI.getDebugLoc();

12634

12635

// Result of setflm is previous FPSCR content, so we need to save it first.

12636

Register OldFPSCRReg = MI.getOperand(0).getReg();

12637

BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);

12638

12639

// Put bits in 32:63 to FPSCR.

12640

Register NewFPSCRReg = MI.getOperand(1).getReg();

12641

BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))

12642

.addImm(255)

12643

.addReg(NewFPSCRReg)

12644

.addImm(0)

12645

.addImm(0);

12646

} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||

12647

MI.getOpcode() == PPC::PROBED_ALLOCA_64) {

12648

return emitProbedAlloca(MI, BB);

12649

} else {

12650

llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12650);

12651

}

12652

12653

MI.eraseFromParent(); // The pseudo instruction is gone now.

12654

return BB;

12655

}

12656

12657

//===----------------------------------------------------------------------===//

12658

// Target Optimization Hooks

12659

//===----------------------------------------------------------------------===//

12660

12661

static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {

12662

// For the estimates, convergence is quadratic, so we essentially double the

12663

// number of digits correct after every iteration. For both FRE and FRSQRTE,

12664

// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),

12665

// this is 2^-14. IEEE float has 23 digits and double has 52 digits.

12666

int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;

12667

if (VT.getScalarType() == MVT::f64)

12668

RefinementSteps++;

12669

return RefinementSteps;

12670

}

12671

12672

SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,

12673

int Enabled, int &RefinementSteps,

12674

bool &UseOneConstNR,

12675

bool Reciprocal) const {

12676

EVT VT = Operand.getValueType();

12677

if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||

12678

(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||

12679

(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

12680

(VT == MVT::v2f64 && Subtarget.hasVSX())) {

12681

if (RefinementSteps == ReciprocalEstimate::Unspecified)

12682

RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

12683

12684

// The Newton-Raphson computation with a single constant does not provide

12685

// enough accuracy on some CPUs.

12686

UseOneConstNR = !Subtarget.needsTwoConstNR();

12687

return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);

12688

}

12689

return SDValue();

12690

}

12691

12692

SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,

12693

int Enabled,

12694

int &RefinementSteps) const {

12695

EVT VT = Operand.getValueType();

12696

if ((VT == MVT::f32 && Subtarget.hasFRES()) ||

12697

(VT == MVT::f64 && Subtarget.hasFRE()) ||

12698

(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

12699

(VT == MVT::v2f64 && Subtarget.hasVSX())) {

12700

if (RefinementSteps == ReciprocalEstimate::Unspecified)

12701

RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

12702

return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);

12703

}

12704

return SDValue();

12705

}

12706

12707

unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {

12708

// Note: This functionality is used only when unsafe-fp-math is enabled, and

12709

// on cores with reciprocal estimates (which are used when unsafe-fp-math is

12710

// enabled for division), this functionality is redundant with the default

12711

// combiner logic (once the division -> reciprocal/multiply transformation

12712

// has taken place). As a result, this matters more for older cores than for

12713

// newer ones.

12714

12715

// Combine multiple FDIVs with the same divisor into multiple FMULs by the

12716

// reciprocal if there are two or more FDIVs (for embedded cores with only

12717

// one FP pipeline) for three or more FDIVs (for generic OOO cores).

12718

switch (Subtarget.getCPUDirective()) {

12719

default:

12720

return 3;

12721

case PPC::DIR_440:

12722

case PPC::DIR_A2:

12723

case PPC::DIR_E500:

12724

case PPC::DIR_E500mc:

12725

case PPC::DIR_E5500:

12726

return 2;

12727

}

12728

}

12729

12730

// isConsecutiveLSLoc needs to work even if all adds have not yet been

12731

// collapsed, and so we need to look through chains of them.

12732

static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,

12733

int64_t& Offset, SelectionDAG &DAG) {

12734

if (DAG.isBaseWithConstantOffset(Loc)) {

12735

Base = Loc.getOperand(0);

12736

Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();

12737

12738

// The base might itself be a base plus an offset, and if so, accumulate

12739

// that as well.

12740

getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);

12741

}

12742

}

12743

12744

static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,

12745

unsigned Bytes, int Dist,

12746

SelectionDAG &DAG) {

12747

if (VT.getSizeInBits() / 8 != Bytes)

12748

return false;

12749

12750

SDValue BaseLoc = Base->getBasePtr();

12751

if (Loc.getOpcode() == ISD::FrameIndex) {

12752

if (BaseLoc.getOpcode() != ISD::FrameIndex)

12753

return false;

12754

const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

12755

int FI = cast<FrameIndexSDNode>(Loc)->getIndex();

12756

int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();

12757

int FS = MFI.getObjectSize(FI);

12758

int BFS = MFI.getObjectSize(BFI);

12759

if (FS != BFS || FS != (int)Bytes) return false;

12760

return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);

12761

}

12762

12763

SDValue Base1 = Loc, Base2 = BaseLoc;

12764

int64_t Offset1 = 0, Offset2 = 0;

12765

getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);

12766

getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);

12767

if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))

12768

return true;

12769

12770

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

12771

const GlobalValue *GV1 = nullptr;

12772

const GlobalValue *GV2 = nullptr;

12773

Offset1 = 0;

12774

Offset2 = 0;

12775

bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);

12776

bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);

12777

if (isGA1 && isGA2 && GV1 == GV2)

12778

return Offset1 == (Offset2 + Dist*Bytes);

12779

return false;

12780

}

12781

12782

// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does

12783

// not enforce equality of the chain operands.

12784

static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,

12785

unsigned Bytes, int Dist,

12786

SelectionDAG &DAG) {

12787

if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {

12788

EVT VT = LS->getMemoryVT();

12789

SDValue Loc = LS->getBasePtr();

12790

return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);

12791

}

12792

12793

if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {

12794

EVT VT;

12795

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

12796

default: return false;

12797

case Intrinsic::ppc_altivec_lvx:

12798

case Intrinsic::ppc_altivec_lvxl:

12799

case Intrinsic::ppc_vsx_lxvw4x:

12800

case Intrinsic::ppc_vsx_lxvw4x_be:

12801

VT = MVT::v4i32;

12802

break;

12803

case Intrinsic::ppc_vsx_lxvd2x:

12804

case Intrinsic::ppc_vsx_lxvd2x_be:

12805

VT = MVT::v2f64;

12806

break;

12807

case Intrinsic::ppc_altivec_lvebx:

12808

VT = MVT::i8;

12809

break;

12810

case Intrinsic::ppc_altivec_lvehx:

12811

VT = MVT::i16;

12812

break;

12813

case Intrinsic::ppc_altivec_lvewx:

12814

VT = MVT::i32;

12815

break;

12816

}

12817

12818

return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);

12819

}

12820

12821

if (N->getOpcode() == ISD::INTRINSIC_VOID) {

12822

EVT VT;

12823

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

12824

default: return false;

12825

case Intrinsic::ppc_altivec_stvx:

12826

case Intrinsic::ppc_altivec_stvxl:

12827

case Intrinsic::ppc_vsx_stxvw4x:

12828

VT = MVT::v4i32;

12829

break;

12830

case Intrinsic::ppc_vsx_stxvd2x:

12831

VT = MVT::v2f64;

12832

break;

12833

case Intrinsic::ppc_vsx_stxvw4x_be:

12834

VT = MVT::v4i32;

12835

break;

12836

case Intrinsic::ppc_vsx_stxvd2x_be:

12837

VT = MVT::v2f64;

12838

break;

12839

case Intrinsic::ppc_altivec_stvebx:

12840

VT = MVT::i8;

12841

break;

12842

case Intrinsic::ppc_altivec_stvehx:

12843

VT = MVT::i16;

12844

break;

12845

case Intrinsic::ppc_altivec_stvewx:

12846

VT = MVT::i32;

12847

break;

12848

}

12849

12850

return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);

12851

}

12852

12853

return false;

12854

}

12855

12856

// Return true is there is a nearyby consecutive load to the one provided

12857

// (regardless of alignment). We search up and down the chain, looking though

12858

// token factors and other loads (but nothing else). As a result, a true result

12859

// indicates that it is safe to create a new consecutive load adjacent to the

12860

// load provided.

12861

static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {

12862

SDValue Chain = LD->getChain();

12863

EVT VT = LD->getMemoryVT();

12864

12865

SmallSet<SDNode *, 16> LoadRoots;

12866

SmallVector<SDNode *, 8> Queue(1, Chain.getNode());

12867

SmallSet<SDNode *, 16> Visited;

12868

12869

// First, search up the chain, branching to follow all token-factor operands.

12870

// If we find a consecutive load, then we're done, otherwise, record all

12871

// nodes just above the top-level loads and token factors.

12872

while (!Queue.empty()) {

12873

SDNode *ChainNext = Queue.pop_back_val();

12874

if (!Visited.insert(ChainNext).second)

12875

continue;

12876

12877

if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {

12878

if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))

12879

return true;

12880

12881

if (!Visited.count(ChainLD->getChain().getNode()))

12882

Queue.push_back(ChainLD->getChain().getNode());

12883

} else if (ChainNext->getOpcode() == ISD::TokenFactor) {

12884

for (const SDUse &O : ChainNext->ops())

12885

if (!Visited.count(O.getNode()))

12886

Queue.push_back(O.getNode());

12887

} else

12888

LoadRoots.insert(ChainNext);

12889

}

12890

12891

// Second, search down the chain, starting from the top-level nodes recorded

12892

// in the first phase. These top-level nodes are the nodes just above all

12893

// loads and token factors. Starting with their uses, recursively look though

12894

// all loads (just the chain uses) and token factors to find a consecutive

12895

// load.

12896

Visited.clear();

12897

Queue.clear();

12898

12899

for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),

12900

IE = LoadRoots.end(); I != IE; ++I) {

12901

Queue.push_back(*I);

12902

12903

while (!Queue.empty()) {

12904

SDNode *LoadRoot = Queue.pop_back_val();

12905

if (!Visited.insert(LoadRoot).second)

12906

continue;

12907

12908

if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))

12909

if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))

12910

return true;

12911

12912

for (SDNode::use_iterator UI = LoadRoot->use_begin(),

12913

UE = LoadRoot->use_end(); UI != UE; ++UI)

12914

if (((isa<MemSDNode>(*UI) &&

12915

cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||

12916

UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))

12917

Queue.push_back(*UI);

12918

}

12919

}

12920

12921

return false;

12922

}

12923

12924

/// This function is called when we have proved that a SETCC node can be replaced

12925

/// by subtraction (and other supporting instructions) so that the result of

12926

/// comparison is kept in a GPR instead of CR. This function is purely for

12927

/// codegen purposes and has some flags to guide the codegen process.

12928

static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,

12929

bool Swap, SDLoc &DL, SelectionDAG &DAG) {

12930

assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12930, __PRETTY_FUNCTION__));

12931

12932

// Zero extend the operands to the largest legal integer. Originally, they

12933

// must be of a strictly smaller size.

12934

auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),

12935

DAG.getConstant(Size, DL, MVT::i32));

12936

auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),

12937

DAG.getConstant(Size, DL, MVT::i32));

12938

12939

// Swap if needed. Depends on the condition code.

12940

if (Swap)

12941

std::swap(Op0, Op1);

12942

12943

// Subtract extended integers.

12944

auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);

12945

12946

// Move the sign bit to the least significant position and zero out the rest.

12947

// Now the least significant bit carries the result of original comparison.

12948

auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,

12949

DAG.getConstant(Size - 1, DL, MVT::i32));

12950

auto Final = Shifted;

12951

12952

// Complement the result if needed. Based on the condition code.

12953

if (Complement)

12954

Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,

12955

DAG.getConstant(1, DL, MVT::i64));

12956

12957

return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);

12958

}

12959

12960

SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,

12961

DAGCombinerInfo &DCI) const {

12962

assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12962, __PRETTY_FUNCTION__));

12963

12964

SelectionDAG &DAG = DCI.DAG;

12965

SDLoc DL(N);

12966

12967

// Size of integers being compared has a critical role in the following

12968

// analysis, so we prefer to do this when all types are legal.

12969

if (!DCI.isAfterLegalizeDAG())

12970

return SDValue();

12971

12972

// If all users of SETCC extend its value to a legal integer type

12973

// then we replace SETCC with a subtraction

12974

for (SDNode::use_iterator UI = N->use_begin(),

12975

UE = N->use_end(); UI != UE; ++UI) {

12976

if (UI->getOpcode() != ISD::ZERO_EXTEND)

12977

return SDValue();

12978

}

12979

12980

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

12981

auto OpSize = N->getOperand(0).getValueSizeInBits();

12982

12983

unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();

12984

12985

if (OpSize < Size) {

12986

switch (CC) {

12987

default: break;

12988

case ISD::SETULT:

12989

return generateEquivalentSub(N, Size, false, false, DL, DAG);

12990

case ISD::SETULE:

12991

return generateEquivalentSub(N, Size, true, true, DL, DAG);

12992

case ISD::SETUGT:

12993

return generateEquivalentSub(N, Size, false, true, DL, DAG);

12994

case ISD::SETUGE:

12995

return generateEquivalentSub(N, Size, true, false, DL, DAG);

12996

}

12997

}

12998

12999

return SDValue();

13000

}

13001

13002

SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,

13003

DAGCombinerInfo &DCI) const {

13004

SelectionDAG &DAG = DCI.DAG;

13005

SDLoc dl(N);

13006

13007

assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13007, __PRETTY_FUNCTION__));

13008

// If we're tracking CR bits, we need to be careful that we don't have:

13009

// trunc(binary-ops(zext(x), zext(y)))

13010

// or

13011

// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)

13012

// such that we're unnecessarily moving things into GPRs when it would be

13013

// better to keep them in CR bits.

13014

13015

// Note that trunc here can be an actual i1 trunc, or can be the effective

13016

// truncation that comes from a setcc or select_cc.

13017

if (N->getOpcode() == ISD::TRUNCATE &&

13018

N->getValueType(0) != MVT::i1)

13019

return SDValue();

13020

13021

if (N->getOperand(0).getValueType() != MVT::i32 &&

13022

N->getOperand(0).getValueType() != MVT::i64)

13023

return SDValue();

13024

13025

if (N->getOpcode() == ISD::SETCC ||

13026

N->getOpcode() == ISD::SELECT_CC) {

13027

// If we're looking at a comparison, then we need to make sure that the

13028

// high bits (all except for the first) don't matter the result.

13029

ISD::CondCode CC =

13030

cast<CondCodeSDNode>(N->getOperand(

13031

N->getOpcode() == ISD::SETCC ? 2 : 4))->get();

13032

unsigned OpBits = N->getOperand(0).getValueSizeInBits();

13033

13034

if (ISD::isSignedIntSetCC(CC)) {

13035

if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||

13036

DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)

13037

return SDValue();

13038

} else if (ISD::isUnsignedIntSetCC(CC)) {

13039

if (!DAG.MaskedValueIsZero(N->getOperand(0),

13040

APInt::getHighBitsSet(OpBits, OpBits-1)) ||

13041

!DAG.MaskedValueIsZero(N->getOperand(1),

13042

APInt::getHighBitsSet(OpBits, OpBits-1)))

13043

return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)

13044

: SDValue());

13045

} else {

13046

// This is neither a signed nor an unsigned comparison, just make sure

13047

// that the high bits are equal.

13048

KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));

13049

KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));

13050

13051

// We don't really care about what is known about the first bit (if

13052

// anything), so clear it in all masks prior to comparing them.

13053

Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);

13054

Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);

13055

13056

if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)

13057

return SDValue();

13058

}

13059

}

13060

13061

// We now know that the higher-order bits are irrelevant, we just need to

13062

// make sure that all of the intermediate operations are bit operations, and

13063

// all inputs are extensions.

13064

if (N->getOperand(0).getOpcode() != ISD::AND &&

13065

N->getOperand(0).getOpcode() != ISD::OR &&

13066

N->getOperand(0).getOpcode() != ISD::XOR &&

13067

N->getOperand(0).getOpcode() != ISD::SELECT &&

13068

N->getOperand(0).getOpcode() != ISD::SELECT_CC &&

13069

N->getOperand(0).getOpcode() != ISD::TRUNCATE &&

13070

N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&

13071

N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&

13072

N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)

13073

return SDValue();

13074

13075

if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&

13076

N->getOperand(1).getOpcode() != ISD::AND &&

13077

N->getOperand(1).getOpcode() != ISD::OR &&

13078

N->getOperand(1).getOpcode() != ISD::XOR &&

13079

N->getOperand(1).getOpcode() != ISD::SELECT &&

13080

N->getOperand(1).getOpcode() != ISD::SELECT_CC &&

13081

N->getOperand(1).getOpcode() != ISD::TRUNCATE &&

13082

N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&

13083

N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&

13084

N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)

13085

return SDValue();

13086

13087

SmallVector<SDValue, 4> Inputs;

13088

SmallVector<SDValue, 8> BinOps, PromOps;

13089

SmallPtrSet<SDNode *, 16> Visited;

13090

13091

for (unsigned i = 0; i < 2; ++i) {

13092

if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13093

N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13094

N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

13095

N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

13096

isa<ConstantSDNode>(N->getOperand(i)))

13097

Inputs.push_back(N->getOperand(i));

13098

else

13099

BinOps.push_back(N->getOperand(i));

13100

13101

if (N->getOpcode() == ISD::TRUNCATE)

13102

break;

13103

}

13104

13105

// Visit all inputs, collect all binary operations (and, or, xor and

13106

// select) that are all fed by extensions.

13107

while (!BinOps.empty()) {

13108

SDValue BinOp = BinOps.back();

13109

BinOps.pop_back();

13110

13111

if (!Visited.insert(BinOp.getNode()).second)

13112

continue;

13113

13114

PromOps.push_back(BinOp);

13115

13116

for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

13117

// The condition of the select is not promoted.

13118

if (BinOp.getOpcode() == ISD::SELECT && i == 0)

13119

continue;

13120

if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

13121

continue;

13122

13123

if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13124

BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13125

BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

13126

BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

13127

isa<ConstantSDNode>(BinOp.getOperand(i))) {

13128

Inputs.push_back(BinOp.getOperand(i));

13129

} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

13130

BinOp.getOperand(i).getOpcode() == ISD::OR ||

13131

BinOp.getOperand(i).getOpcode() == ISD::XOR ||

13132

BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

13133

BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||

13134

BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

13135

BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13136

BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13137

BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {

13138

BinOps.push_back(BinOp.getOperand(i));

13139

} else {

13140

// We have an input that is not an extension or another binary

13141

// operation; we'll abort this transformation.

13142

return SDValue();

13143

}

13144

}

13145

}

13146

13147

// Make sure that this is a self-contained cluster of operations (which

13148

// is not quite the same thing as saying that everything has only one

13149

// use).

13150

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13151

if (isa<ConstantSDNode>(Inputs[i]))

13152

continue;

13153

13154

for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

13155

UE = Inputs[i].getNode()->use_end();

13156

UI != UE; ++UI) {

13157

SDNode *User = *UI;

13158

if (User != N && !Visited.count(User))

13159

return SDValue();

13160

13161

// Make sure that we're not going to promote the non-output-value

13162

// operand(s) or SELECT or SELECT_CC.

13163

// FIXME: Although we could sometimes handle this, and it does occur in

13164

// practice that one of the condition inputs to the select is also one of

13165

// the outputs, we currently can't deal with this.

13166

if (User->getOpcode() == ISD::SELECT) {

13167

if (User->getOperand(0) == Inputs[i])

13168

return SDValue();

13169

} else if (User->getOpcode() == ISD::SELECT_CC) {

13170

if (User->getOperand(0) == Inputs[i] ||

13171

User->getOperand(1) == Inputs[i])

13172

return SDValue();

13173

}

13174

}

13175

}

13176

13177

for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

13178

for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

13179

UE = PromOps[i].getNode()->use_end();

13180

UI != UE; ++UI) {

13181

SDNode *User = *UI;

13182

if (User != N && !Visited.count(User))

13183

return SDValue();

13184

13185

// Make sure that we're not going to promote the non-output-value

13186

// operand(s) or SELECT or SELECT_CC.

13187

// FIXME: Although we could sometimes handle this, and it does occur in

13188

// practice that one of the condition inputs to the select is also one of

13189

// the outputs, we currently can't deal with this.

13190

if (User->getOpcode() == ISD::SELECT) {

13191

if (User->getOperand(0) == PromOps[i])

13192

return SDValue();

13193

} else if (User->getOpcode() == ISD::SELECT_CC) {

13194

if (User->getOperand(0) == PromOps[i] ||

13195

User->getOperand(1) == PromOps[i])

13196

return SDValue();

13197

}

13198

}

13199

}

13200

13201

// Replace all inputs with the extension operand.

13202

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13203

// Constants may have users outside the cluster of to-be-promoted nodes,

13204

// and so we need to replace those as we do the promotions.

13205

if (isa<ConstantSDNode>(Inputs[i]))

13206

continue;

13207

else

13208

DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));

13209

}

13210

13211

std::list<HandleSDNode> PromOpHandles;

13212

for (auto &PromOp : PromOps)

13213

PromOpHandles.emplace_back(PromOp);

13214

13215

// Replace all operations (these are all the same, but have a different

13216

// (i1) return type). DAG.getNode will validate that the types of

13217

// a binary operator match, so go through the list in reverse so that

13218

// we've likely promoted both operands first. Any intermediate truncations or

13219

// extensions disappear.

13220

while (!PromOpHandles.empty()) {

13221

SDValue PromOp = PromOpHandles.back().getValue();

13222

PromOpHandles.pop_back();

13223

13224

if (PromOp.getOpcode() == ISD::TRUNCATE ||

13225

PromOp.getOpcode() == ISD::SIGN_EXTEND ||

13226

PromOp.getOpcode() == ISD::ZERO_EXTEND ||

13227

PromOp.getOpcode() == ISD::ANY_EXTEND) {

13228

if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&

13229

PromOp.getOperand(0).getValueType() != MVT::i1) {

13230

// The operand is not yet ready (see comment below).

13231

PromOpHandles.emplace_front(PromOp);

13232

continue;

13233

}

13234

13235

SDValue RepValue = PromOp.getOperand(0);

13236

if (isa<ConstantSDNode>(RepValue))

13237

RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);

13238

13239

DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);

13240

continue;

13241

}

13242

13243

unsigned C;

13244

switch (PromOp.getOpcode()) {

13245

default: C = 0; break;

13246

case ISD::SELECT: C = 1; break;

13247

case ISD::SELECT_CC: C = 2; break;

13248

}

13249

13250

if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

13251

PromOp.getOperand(C).getValueType() != MVT::i1) ||

13252

(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

13253

PromOp.getOperand(C+1).getValueType() != MVT::i1)) {

13254

// The to-be-promoted operands of this node have not yet been

13255

// promoted (this should be rare because we're going through the

13256

// list backward, but if one of the operands has several users in

13257

// this cluster of to-be-promoted nodes, it is possible).

13258

PromOpHandles.emplace_front(PromOp);

13259

continue;

13260

}

13261

13262

SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

13263

PromOp.getNode()->op_end());

13264

13265

// If there are any constant inputs, make sure they're replaced now.

13266

for (unsigned i = 0; i < 2; ++i)

13267

if (isa<ConstantSDNode>(Ops[C+i]))

13268

Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);

13269

13270

DAG.ReplaceAllUsesOfValueWith(PromOp,

13271

DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));

13272

}

13273

13274

// Now we're left with the initial truncation itself.

13275

if (N->getOpcode() == ISD::TRUNCATE)

13276

return N->getOperand(0);

13277

13278

// Otherwise, this is a comparison. The operands to be compared have just

13279

// changed type (to i1), but everything else is the same.

13280

return SDValue(N, 0);

13281

}

13282

13283

SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,

13284

DAGCombinerInfo &DCI) const {

13285

SelectionDAG &DAG = DCI.DAG;

13286

SDLoc dl(N);

13287

13288

// If we're tracking CR bits, we need to be careful that we don't have:

13289

// zext(binary-ops(trunc(x), trunc(y)))

13290

// or

13291

// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)

13292

// such that we're unnecessarily moving things into CR bits that can more

13293

// efficiently stay in GPRs. Note that if we're not certain that the high

13294

// bits are set as required by the final extension, we still may need to do

13295

// some masking to get the proper behavior.

13296

13297

// This same functionality is important on PPC64 when dealing with

13298

// 32-to-64-bit extensions; these occur often when 32-bit values are used as

13299

// the return values of functions. Because it is so similar, it is handled

13300

// here as well.

13301

13302

if (N->getValueType(0) != MVT::i32 &&

13303

N->getValueType(0) != MVT::i64)

13304

return SDValue();

13305

13306

if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||

13307

(N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))

13308

return SDValue();

13309

13310

if (N->getOperand(0).getOpcode() != ISD::AND &&

13311

N->getOperand(0).getOpcode() != ISD::OR &&

13312

N->getOperand(0).getOpcode() != ISD::XOR &&

13313

N->getOperand(0).getOpcode() != ISD::SELECT &&

13314

N->getOperand(0).getOpcode() != ISD::SELECT_CC)

13315

return SDValue();

13316

13317

SmallVector<SDValue, 4> Inputs;

13318

SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;

13319

SmallPtrSet<SDNode *, 16> Visited;

13320

13321

// Visit all inputs, collect all binary operations (and, or, xor and

13322

// select) that are all fed by truncations.

13323

while (!BinOps.empty()) {

13324

SDValue BinOp = BinOps.back();

13325

BinOps.pop_back();

13326

13327

if (!Visited.insert(BinOp.getNode()).second)

13328

continue;

13329

13330

PromOps.push_back(BinOp);

13331

13332

for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

13333

// The condition of the select is not promoted.

13334

if (BinOp.getOpcode() == ISD::SELECT && i == 0)

13335

continue;

13336

if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

13337

continue;

13338

13339

if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

13340

isa<ConstantSDNode>(BinOp.getOperand(i))) {

13341

Inputs.push_back(BinOp.getOperand(i));

13342

} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

13343

BinOp.getOperand(i).getOpcode() == ISD::OR ||

13344

BinOp.getOperand(i).getOpcode() == ISD::XOR ||

13345

BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

13346

BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {

13347

BinOps.push_back(BinOp.getOperand(i));

13348

} else {

13349

// We have an input that is not a truncation or another binary

13350

// operation; we'll abort this transformation.

13351

return SDValue();

13352

}

13353

}

13354

}

13355

13356

// The operands of a select that must be truncated when the select is

13357

// promoted because the operand is actually part of the to-be-promoted set.

13358

DenseMap<SDNode *, EVT> SelectTruncOp[2];

13359

13360

// Make sure that this is a self-contained cluster of operations (which

13361

// is not quite the same thing as saying that everything has only one

13362

// use).

13363

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13364

if (isa<ConstantSDNode>(Inputs[i]))

13365

continue;

13366

13367

for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

13368

UE = Inputs[i].getNode()->use_end();

13369

UI != UE; ++UI) {

13370

SDNode *User = *UI;

13371

if (User != N && !Visited.count(User))

13372

return SDValue();

13373

13374

// If we're going to promote the non-output-value operand(s) or SELECT or

13375

// SELECT_CC, record them for truncation.

13376

if (User->getOpcode() == ISD::SELECT) {

13377

if (User->getOperand(0) == Inputs[i])

13378

SelectTruncOp[0].insert(std::make_pair(User,

13379

User->getOperand(0).getValueType()));

13380

} else if (User->getOpcode() == ISD::SELECT_CC) {

13381

if (User->getOperand(0) == Inputs[i])

13382

SelectTruncOp[0].insert(std::make_pair(User,

13383

User->getOperand(0).getValueType()));

13384

if (User->getOperand(1) == Inputs[i])

13385

SelectTruncOp[1].insert(std::make_pair(User,

13386

User->getOperand(1).getValueType()));

13387

}

13388

}

13389

}

13390

13391

for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

13392

for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

13393

UE = PromOps[i].getNode()->use_end();

13394

UI != UE; ++UI) {

13395

SDNode *User = *UI;

13396

if (User != N && !Visited.count(User))

13397

return SDValue();

13398

13399

// If we're going to promote the non-output-value operand(s) or SELECT or

13400

// SELECT_CC, record them for truncation.

13401

if (User->getOpcode() == ISD::SELECT) {

13402

if (User->getOperand(0) == PromOps[i])

13403

SelectTruncOp[0].insert(std::make_pair(User,

13404

User->getOperand(0).getValueType()));

13405

} else if (User->getOpcode() == ISD::SELECT_CC) {

13406

if (User->getOperand(0) == PromOps[i])

13407

SelectTruncOp[0].insert(std::make_pair(User,

13408

User->getOperand(0).getValueType()));

13409

if (User->getOperand(1) == PromOps[i])

13410

SelectTruncOp[1].insert(std::make_pair(User,

13411

User->getOperand(1).getValueType()));

13412

}

13413

}

13414

}

13415

13416

unsigned PromBits = N->getOperand(0).getValueSizeInBits();

13417

bool ReallyNeedsExt = false;

13418

if (N->getOpcode() != ISD::ANY_EXTEND) {

13419

// If all of the inputs are not already sign/zero extended, then

13420

// we'll still need to do that at the end.

13421

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13422

if (isa<ConstantSDNode>(Inputs[i]))

13423

continue;

13424

13425

unsigned OpBits =

13426

Inputs[i].getOperand(0).getValueSizeInBits();

13427

assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13427, __PRETTY_FUNCTION__));

13428

13429

if ((N->getOpcode() == ISD::ZERO_EXTEND &&

13430

!DAG.MaskedValueIsZero(Inputs[i].getOperand(0),

13431

APInt::getHighBitsSet(OpBits,

13432

OpBits-PromBits))) ||

13433

(N->getOpcode() == ISD::SIGN_EXTEND &&

13434

DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <

13435

(OpBits-(PromBits-1)))) {

13436

ReallyNeedsExt = true;

13437

break;

13438

}

13439

}

13440

}

13441

13442

// Replace all inputs, either with the truncation operand, or a

13443

// truncation or extension to the final output type.

13444

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13445

// Constant inputs need to be replaced with the to-be-promoted nodes that

13446

// use them because they might have users outside of the cluster of

13447

// promoted nodes.

13448

if (isa<ConstantSDNode>(Inputs[i]))

13449

continue;

13450

13451

SDValue InSrc = Inputs[i].getOperand(0);

13452

if (Inputs[i].getValueType() == N->getValueType(0))

13453

DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);

13454

else if (N->getOpcode() == ISD::SIGN_EXTEND)

13455

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13456

DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));

13457

else if (N->getOpcode() == ISD::ZERO_EXTEND)

13458

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13459

DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));

13460

else

13461

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13462

DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));

13463

}

13464

13465

std::list<HandleSDNode> PromOpHandles;

13466

for (auto &PromOp : PromOps)

13467

PromOpHandles.emplace_back(PromOp);

13468

13469

// Replace all operations (these are all the same, but have a different

13470

// (promoted) return type). DAG.getNode will validate that the types of

13471

// a binary operator match, so go through the list in reverse so that

13472

// we've likely promoted both operands first.

13473

while (!PromOpHandles.empty()) {

13474

SDValue PromOp = PromOpHandles.back().getValue();

13475

PromOpHandles.pop_back();

13476

13477

unsigned C;

13478

switch (PromOp.getOpcode()) {

13479

default: C = 0; break;

13480

case ISD::SELECT: C = 1; break;

13481

case ISD::SELECT_CC: C = 2; break;

13482

}

13483

13484

if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

13485

PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||

13486

(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

13487

PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {

13488

// The to-be-promoted operands of this node have not yet been

13489

// promoted (this should be rare because we're going through the

13490

// list backward, but if one of the operands has several users in

13491

// this cluster of to-be-promoted nodes, it is possible).

13492

PromOpHandles.emplace_front(PromOp);

13493

continue;

13494

}

13495

13496

// For SELECT and SELECT_CC nodes, we do a similar check for any

13497

// to-be-promoted comparison inputs.

13498

if (PromOp.getOpcode() == ISD::SELECT ||

13499

PromOp.getOpcode() == ISD::SELECT_CC) {

13500

if ((SelectTruncOp[0].count(PromOp.getNode()) &&

13501

PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||

13502

(SelectTruncOp[1].count(PromOp.getNode()) &&

13503

PromOp.getOperand(1).getValueType() != N->getValueType(0))) {

13504

PromOpHandles.emplace_front(PromOp);

13505

continue;

13506

}

13507

}

13508

13509

SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

13510

PromOp.getNode()->op_end());

13511

13512

// If this node has constant inputs, then they'll need to be promoted here.

13513

for (unsigned i = 0; i < 2; ++i) {

13514

if (!isa<ConstantSDNode>(Ops[C+i]))

13515

continue;

13516

if (Ops[C+i].getValueType() == N->getValueType(0))

13517

continue;

13518

13519

if (N->getOpcode() == ISD::SIGN_EXTEND)

13520

Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13521

else if (N->getOpcode() == ISD::ZERO_EXTEND)

13522

Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13523

else

13524

Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13525

}

13526

13527

// If we've promoted the comparison inputs of a SELECT or SELECT_CC,

13528

// truncate them again to the original value type.

13529

if (PromOp.getOpcode() == ISD::SELECT ||

13530

PromOp.getOpcode() == ISD::SELECT_CC) {

13531

auto SI0 = SelectTruncOp[0].find(PromOp.getNode());

13532

if (SI0 != SelectTruncOp[0].end())

13533

Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);

13534

auto SI1 = SelectTruncOp[1].find(PromOp.getNode());

13535

if (SI1 != SelectTruncOp[1].end())

13536

Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);

13537

}

13538

13539

DAG.ReplaceAllUsesOfValueWith(PromOp,

13540

DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));

13541

}

13542

13543

// Now we're left with the initial extension itself.

13544

if (!ReallyNeedsExt)

13545

return N->getOperand(0);

13546

13547

// To zero extend, just mask off everything except for the first bit (in the

13548

// i1 case).

13549

if (N->getOpcode() == ISD::ZERO_EXTEND)

13550

return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),

13551

DAG.getConstant(APInt::getLowBitsSet(

13552

N->getValueSizeInBits(0), PromBits),

13553

dl, N->getValueType(0)));

13554

13555

assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13556, __PRETTY_FUNCTION__))

13556

"Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13556, __PRETTY_FUNCTION__));

13557

EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());

13558

SDValue ShiftCst =

13559

DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);

13560

return DAG.getNode(

13561

ISD::SRA, dl, N->getValueType(0),

13562

DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),

13563

ShiftCst);

13564

}

13565

13566

SDValue PPCTargetLowering::combineSetCC(SDNode *N,

13567

DAGCombinerInfo &DCI) const {

13568

assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13569, __PRETTY_FUNCTION__))

13569

"Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13569, __PRETTY_FUNCTION__));

13570

13571

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

13572

if (CC == ISD::SETNE || CC == ISD::SETEQ) {

13573

SDValue LHS = N->getOperand(0);

13574

SDValue RHS = N->getOperand(1);

13575

13576

// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.

13577

if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&

13578

LHS.hasOneUse())

13579

std::swap(LHS, RHS);

13580

13581

// x == 0-y --> x+y == 0

13582

// x != 0-y --> x+y != 0

13583

if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&

13584

RHS.hasOneUse()) {

13585

SDLoc DL(N);

13586

SelectionDAG &DAG = DCI.DAG;

13587

EVT VT = N->getValueType(0);

13588

EVT OpVT = LHS.getValueType();

13589

SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));

13590

return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);

13591

}

13592

}

13593

13594

return DAGCombineTruncBoolExt(N, DCI);

13595

}

13596

13597

// Is this an extending load from an f32 to an f64?

13598

static bool isFPExtLoad(SDValue Op) {

13599

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))

13600

return LD->getExtensionType() == ISD::EXTLOAD &&

13601

Op.getValueType() == MVT::f64;

13602

return false;

13603

}

13604

13605

/// Reduces the number of fp-to-int conversion when building a vector.

13606

///

13607

/// If this vector is built out of floating to integer conversions,

13608

/// transform it to a vector built out of floating point values followed by a

13609

/// single floating to integer conversion of the vector.

13610

/// Namely (build_vector (fptosi $A), (fptosi $B), ...)

13611

/// becomes (fptosi (build_vector ($A, $B, ...)))

13612

SDValue PPCTargetLowering::

13613

combineElementTruncationToVectorTruncation(SDNode *N,

13614

DAGCombinerInfo &DCI) const {

13615

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13616, __PRETTY_FUNCTION__))

13616

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13616, __PRETTY_FUNCTION__));

13617

13618

SelectionDAG &DAG = DCI.DAG;

13619

SDLoc dl(N);

13620

13621

SDValue FirstInput = N->getOperand(0);

13622

assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13623, __PRETTY_FUNCTION__))

13623

"The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13623, __PRETTY_FUNCTION__));

13624

13625

// This combine happens after legalization so the fp_to_[su]i nodes are

13626

// already converted to PPCSISD nodes.

13627

unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();

13628

if (FirstConversion == PPCISD::FCTIDZ ||

13629

FirstConversion == PPCISD::FCTIDUZ ||

13630

FirstConversion == PPCISD::FCTIWZ ||

13631

FirstConversion == PPCISD::FCTIWUZ) {

13632

bool IsSplat = true;

13633

bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||

13634

FirstConversion == PPCISD::FCTIWUZ;

13635

EVT SrcVT = FirstInput.getOperand(0).getValueType();

13636

SmallVector<SDValue, 4> Ops;

13637

EVT TargetVT = N->getValueType(0);

13638

for (int i = 0, e = N->getNumOperands(); i < e; ++i) {

13639

SDValue NextOp = N->getOperand(i);

13640

if (NextOp.getOpcode() != PPCISD::MFVSR)

13641

return SDValue();

13642

unsigned NextConversion = NextOp.getOperand(0).getOpcode();

13643

if (NextConversion != FirstConversion)

13644

return SDValue();

13645

// If we are converting to 32-bit integers, we need to add an FP_ROUND.

13646

// This is not valid if the input was originally double precision. It is

13647

// also not profitable to do unless this is an extending load in which

13648

// case doing this combine will allow us to combine consecutive loads.

13649

if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))

13650

return SDValue();

13651

if (N->getOperand(i) != FirstInput)

13652

IsSplat = false;

13653

}

13654

13655

// If this is a splat, we leave it as-is since there will be only a single

13656

// fp-to-int conversion followed by a splat of the integer. This is better

13657

// for 32-bit and smaller ints and neutral for 64-bit ints.

13658

if (IsSplat)

13659

return SDValue();

13660

13661

// Now that we know we have the right type of node, get its operands

13662

for (int i = 0, e = N->getNumOperands(); i < e; ++i) {

13663

SDValue In = N->getOperand(i).getOperand(0);

13664

if (Is32Bit) {

13665

// For 32-bit values, we need to add an FP_ROUND node (if we made it

13666

// here, we know that all inputs are extending loads so this is safe).

13667

if (In.isUndef())

13668

Ops.push_back(DAG.getUNDEF(SrcVT));

13669

else {

13670

SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,

13671

MVT::f32, In.getOperand(0),

13672

DAG.getIntPtrConstant(1, dl));

13673

Ops.push_back(Trunc);

13674

}

13675

} else

13676

Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));

13677

}

13678

13679

unsigned Opcode;

13680

if (FirstConversion == PPCISD::FCTIDZ ||

13681

FirstConversion == PPCISD::FCTIWZ)

13682

Opcode = ISD::FP_TO_SINT;

13683

else

13684

Opcode = ISD::FP_TO_UINT;

13685

13686

EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;

13687

SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);

13688

return DAG.getNode(Opcode, dl, TargetVT, BV);

13689

}

13690

return SDValue();

13691

}

13692

13693

/// Reduce the number of loads when building a vector.

13694

///

13695

/// Building a vector out of multiple loads can be converted to a load

13696

/// of the vector type if the loads are consecutive. If the loads are

13697

/// consecutive but in descending order, a shuffle is added at the end

13698

/// to reorder the vector.

13699

static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {

13700

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13701, __PRETTY_FUNCTION__))

13701

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13701, __PRETTY_FUNCTION__));

13702

13703

SDLoc dl(N);

13704

13705

// Return early for non byte-sized type, as they can't be consecutive.

13706

if (!N->getValueType(0).getVectorElementType().isByteSized())

13707

return SDValue();

13708

13709

bool InputsAreConsecutiveLoads = true;

13710

bool InputsAreReverseConsecutive = true;

13711

unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();

13712

SDValue FirstInput = N->getOperand(0);

13713

bool IsRoundOfExtLoad = false;

13714

13715

if (FirstInput.getOpcode() == ISD::FP_ROUND &&

13716

FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {

13717

LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));

13718

IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;

13719

}

13720

// Not a build vector of (possibly fp_rounded) loads.

13721

if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||

13722

N->getNumOperands() == 1)

13723

return SDValue();

13724

13725

for (int i = 1, e = N->getNumOperands(); i < e; ++i) {

13726

// If any inputs are fp_round(extload), they all must be.

13727

if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)

13728

return SDValue();

13729

13730

SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :

13731

N->getOperand(i);

13732

if (NextInput.getOpcode() != ISD::LOAD)

13733

return SDValue();

13734

13735

SDValue PreviousInput =

13736

IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);

13737

LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);

13738

LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);

13739

13740

// If any inputs are fp_round(extload), they all must be.

13741

if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)

13742

return SDValue();

13743

13744

if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))

13745

InputsAreConsecutiveLoads = false;

13746

if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))

13747

InputsAreReverseConsecutive = false;

13748

13749

// Exit early if the loads are neither consecutive nor reverse consecutive.

13750

if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)

13751

return SDValue();

13752

}

13753

13754

assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13755, __PRETTY_FUNCTION__))

13755

"The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13755, __PRETTY_FUNCTION__));

13756

13757

SDValue FirstLoadOp =

13758

IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;

13759

SDValue LastLoadOp =

13760

IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :

13761

N->getOperand(N->getNumOperands()-1);

13762

13763

LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);

13764

LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);

13765

if (InputsAreConsecutiveLoads) {

13766

assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13766, __PRETTY_FUNCTION__));

13767

return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),

13768

LD1->getBasePtr(), LD1->getPointerInfo(),

13769

LD1->getAlignment());

13770

}

13771

if (InputsAreReverseConsecutive) {

13772

assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13772, __PRETTY_FUNCTION__));

13773

SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),

13774

LDL->getBasePtr(), LDL->getPointerInfo(),

13775

LDL->getAlignment());

13776

SmallVector<int, 16> Ops;

13777

for (int i = N->getNumOperands() - 1; i >= 0; i--)

13778

Ops.push_back(i);

13779

13780

return DAG.getVectorShuffle(N->getValueType(0), dl, Load,

13781

DAG.getUNDEF(N->getValueType(0)), Ops);

13782

}

13783

return SDValue();

13784

}

13785

13786

// This function adds the required vector_shuffle needed to get

13787

// the elements of the vector extract in the correct position

13788

// as specified by the CorrectElems encoding.

13789

static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,

13790

SDValue Input, uint64_t Elems,

13791

uint64_t CorrectElems) {

13792

SDLoc dl(N);

13793

13794

unsigned NumElems = Input.getValueType().getVectorNumElements();

13795

SmallVector<int, 16> ShuffleMask(NumElems, -1);

13796

13797

// Knowing the element indices being extracted from the original

13798

// vector and the order in which they're being inserted, just put

13799

// them at element indices required for the instruction.

13800

for (unsigned i = 0; i < N->getNumOperands(); i++) {

13801

if (DAG.getDataLayout().isLittleEndian())

13802

ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;

13803

else

13804

ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;

13805

CorrectElems = CorrectElems >> 8;

13806

Elems = Elems >> 8;

13807

}

13808

13809

SDValue Shuffle =

13810

DAG.getVectorShuffle(Input.getValueType(), dl, Input,

13811

DAG.getUNDEF(Input.getValueType()), ShuffleMask);

13812

13813

EVT VT = N->getValueType(0);

13814

SDValue Conv = DAG.getBitcast(VT, Shuffle);

13815

13816

EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),

13817

Input.getValueType().getVectorElementType(),

13818

VT.getVectorNumElements());

13819

return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,

13820

DAG.getValueType(ExtVT));

13821

}

13822

13823

// Look for build vector patterns where input operands come from sign

13824

// extended vector_extract elements of specific indices. If the correct indices

13825

// aren't used, add a vector shuffle to fix up the indices and create

13826

// SIGN_EXTEND_INREG node which selects the vector sign extend instructions

13827

// during instruction selection.

13828

static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {

13829

// This array encodes the indices that the vector sign extend instructions

13830

// extract from when extending from one type to another for both BE and LE.

13831

// The right nibble of each byte corresponds to the LE incides.

13832

// and the left nibble of each byte corresponds to the BE incides.

13833

// For example: 0x3074B8FC byte->word

13834

// For LE: the allowed indices are: 0x0,0x4,0x8,0xC

13835

// For BE: the allowed indices are: 0x3,0x7,0xB,0xF

13836

// For example: 0x000070F8 byte->double word

13837

// For LE: the allowed indices are: 0x0,0x8

13838

// For BE: the allowed indices are: 0x7,0xF

13839

uint64_t TargetElems[] = {

13840

0x3074B8FC, // b->w

13841

0x000070F8, // b->d

13842

0x10325476, // h->w

13843

0x00003074, // h->d

13844

0x00001032, // w->d

13845

};

13846

13847

uint64_t Elems = 0;

13848

int Index;

13849

SDValue Input;

13850

13851

auto isSExtOfVecExtract = [&](SDValue Op) -> bool {

13852

if (!Op)

13853

return false;

13854

if (Op.getOpcode() != ISD::SIGN_EXTEND &&

13855

Op.getOpcode() != ISD::SIGN_EXTEND_INREG)

13856

return false;

13857

13858

// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value

13859

// of the right width.

13860

SDValue Extract = Op.getOperand(0);

13861

if (Extract.getOpcode() == ISD::ANY_EXTEND)

13862

Extract = Extract.getOperand(0);

13863

if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

13864

return false;

13865

13866

ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));

13867

if (!ExtOp)

13868

return false;

13869

13870

Index = ExtOp->getZExtValue();

13871

if (Input && Input != Extract.getOperand(0))

13872

return false;

13873

13874

if (!Input)

13875

Input = Extract.getOperand(0);

13876

13877

Elems = Elems << 8;

13878

Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;

13879

Elems |= Index;

13880

13881

return true;

13882

};

13883

13884

// If the build vector operands aren't sign extended vector extracts,

13885

// of the same input vector, then return.

13886

for (unsigned i = 0; i < N->getNumOperands(); i++) {

13887

if (!isSExtOfVecExtract(N->getOperand(i))) {

13888

return SDValue();

13889

}

13890

}

13891

13892

// If the vector extract indicies are not correct, add the appropriate

13893

// vector_shuffle.

13894

int TgtElemArrayIdx;

13895

int InputSize = Input.getValueType().getScalarSizeInBits();

13896

int OutputSize = N->getValueType(0).getScalarSizeInBits();

13897

if (InputSize + OutputSize == 40)

13898

TgtElemArrayIdx = 0;

13899

else if (InputSize + OutputSize == 72)

13900

TgtElemArrayIdx = 1;

13901

else if (InputSize + OutputSize == 48)

13902

TgtElemArrayIdx = 2;

13903

else if (InputSize + OutputSize == 80)

13904

TgtElemArrayIdx = 3;

13905

else if (InputSize + OutputSize == 96)

13906

TgtElemArrayIdx = 4;

13907

else

13908

return SDValue();

13909

13910

uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];

13911

CorrectElems = DAG.getDataLayout().isLittleEndian()

13912

? CorrectElems & 0x0F0F0F0F0F0F0F0F

13913

: CorrectElems & 0xF0F0F0F0F0F0F0F0;

13914

if (Elems != CorrectElems) {

13915

return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);

13916

}

13917

13918

// Regular lowering will catch cases where a shuffle is not needed.

13919

return SDValue();

13920

}

13921

13922

// Look for the pattern of a load from a narrow width to i128, feeding

13923

// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node

13924

// (LXVRZX). This node represents a zero extending load that will be matched

13925

// to the Load VSX Vector Rightmost instructions.

13926

static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {

13927

SDLoc DL(N);

13928

13929

// This combine is only eligible for a BUILD_VECTOR of v1i128.

13930

if (N->getValueType(0) != MVT::v1i128)

13931

return SDValue();

13932

13933

SDValue Operand = N->getOperand(0);

13934

// Proceed with the transformation if the operand to the BUILD_VECTOR

13935

// is a load instruction.

13936

if (Operand.getOpcode() != ISD::LOAD)

13937

return SDValue();

13938

13939

LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);

13940

EVT MemoryType = LD->getMemoryVT();

13941

13942

// This transformation is only valid if the we are loading either a byte,

13943

// halfword, word, or doubleword.

13944

bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||

13945

MemoryType == MVT::i32 || MemoryType == MVT::i64;

13946

13947

// Ensure that the load from the narrow width is being zero extended to i128.

13948

if (!ValidLDType ||

13949

(LD->getExtensionType() != ISD::ZEXTLOAD &&

13950

LD->getExtensionType() != ISD::EXTLOAD))

13951

return SDValue();

13952

13953

SDValue LoadOps[] = {

13954

LD->getChain(), LD->getBasePtr(),

13955

DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};

13956

13957

return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,

13958

DAG.getVTList(MVT::v1i128, MVT::Other),

13959

LoadOps, MemoryType, LD->getMemOperand());

13960

}

13961

13962

SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,

13963

DAGCombinerInfo &DCI) const {

13964

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13965, __PRETTY_FUNCTION__))

13965

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13965, __PRETTY_FUNCTION__));

13966

13967

SelectionDAG &DAG = DCI.DAG;

13968

SDLoc dl(N);

13969

13970

if (!Subtarget.hasVSX())

13971

return SDValue();

13972

13973

// The target independent DAG combiner will leave a build_vector of

13974

// float-to-int conversions intact. We can generate MUCH better code for

13975

// a float-to-int conversion of a vector of floats.

13976

SDValue FirstInput = N->getOperand(0);

13977

if (FirstInput.getOpcode() == PPCISD::MFVSR) {

13978

SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);

13979

if (Reduced)

13980

return Reduced;

13981

}

13982

13983

// If we're building a vector out of consecutive loads, just load that

13984

// vector type.

13985

SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);

13986

if (Reduced)

13987

return Reduced;

13988

13989

// If we're building a vector out of extended elements from another vector

13990

// we have P9 vector integer extend instructions. The code assumes legal

13991

// input types (i.e. it can't handle things like v4i16) so do not run before

13992

// legalization.

13993

if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {

13994

Reduced = combineBVOfVecSExt(N, DAG);

13995

if (Reduced)

13996

return Reduced;

13997

}

13998

13999

// On Power10, the Load VSX Vector Rightmost instructions can be utilized

14000

// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR

14001

// is a load from <valid narrow width> to i128.

14002

if (Subtarget.isISA3_1()) {

14003

SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);

14004

if (BVOfZLoad)

14005

return BVOfZLoad;

14006

}

14007

14008

if (N->getValueType(0) != MVT::v2f64)

14009

return SDValue();

14010

14011

// Looking for:

14012

// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))

14013

if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&

14014

FirstInput.getOpcode() != ISD::UINT_TO_FP)

14015

return SDValue();

14016

if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&

14017

N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)

14018

return SDValue();

14019

if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())

14020

return SDValue();

14021

14022

SDValue Ext1 = FirstInput.getOperand(0);

14023

SDValue Ext2 = N->getOperand(1).getOperand(0);

14024

if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

14025

Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

14026

return SDValue();

14027

14028

ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));

14029

ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));

14030

if (!Ext1Op || !Ext2Op)

14031

return SDValue();

14032

if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||

14033

Ext1.getOperand(0) != Ext2.getOperand(0))

14034

return SDValue();

14035

14036

int FirstElem = Ext1Op->getZExtValue();

14037

int SecondElem = Ext2Op->getZExtValue();

14038

int SubvecIdx;

14039

if (FirstElem == 0 && SecondElem == 1)

14040

SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;

14041

else if (FirstElem == 2 && SecondElem == 3)

14042

SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;

14043

else

14044

return SDValue();

14045

14046

SDValue SrcVec = Ext1.getOperand(0);

14047

auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?

14048

PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;

14049

return DAG.getNode(NodeType, dl, MVT::v2f64,

14050

SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));

14051

}

14052

14053

SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,

14054

DAGCombinerInfo &DCI) const {

14055

assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__))

14056

N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__))

14057

"Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__));

14058

14059

if (useSoftFloat() || !Subtarget.has64BitSupport())

14060

return SDValue();

14061

14062

SelectionDAG &DAG = DCI.DAG;

14063

SDLoc dl(N);

14064

SDValue Op(N, 0);

14065

14066

// Don't handle ppc_fp128 here or conversions that are out-of-range capable

14067

// from the hardware.

14068

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

14069

return SDValue();

14070

if (!Op.getOperand(0).getValueType().isSimple())

14071

return SDValue();

14072

if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||

14073

Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))

14074

return SDValue();

14075

14076

SDValue FirstOperand(Op.getOperand(0));

14077

bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&

14078

(FirstOperand.getValueType() == MVT::i8 ||

14079

FirstOperand.getValueType() == MVT::i16);

14080

if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {

14081

bool Signed = N->getOpcode() == ISD::SINT_TO_FP;

14082

bool DstDouble = Op.getValueType() == MVT::f64;

14083

unsigned ConvOp = Signed ?

14084

(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :

14085

(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);

14086

SDValue WidthConst =

14087

DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,

14088

dl, false);

14089

LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());

14090

SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };

14091

SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,

14092

DAG.getVTList(MVT::f64, MVT::Other),

14093

Ops, MVT::i8, LDN->getMemOperand());

14094

14095

// For signed conversion, we need to sign-extend the value in the VSR

14096

if (Signed) {

14097

SDValue ExtOps[] = { Ld, WidthConst };

14098

SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);

14099

return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);

14100

} else

14101

return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);

14102

}

14103

14104

14105

// For i32 intermediate values, unfortunately, the conversion functions

14106

// leave the upper 32 bits of the value are undefined. Within the set of

14107

// scalar instructions, we have no method for zero- or sign-extending the

14108

// value. Thus, we cannot handle i32 intermediate values here.

14109

if (Op.getOperand(0).getValueType() == MVT::i32)

14110

return SDValue();

14111

14112

assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14113, __PRETTY_FUNCTION__))

14113

"UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14113, __PRETTY_FUNCTION__));

14114

14115

// If we have FCFIDS, then use it when converting to single-precision.

14116

// Otherwise, convert to double-precision and then round.

14117

unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)

14118

? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS

14119

: PPCISD::FCFIDS)

14120

: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU

14121

: PPCISD::FCFID);

14122

MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)

14123

? MVT::f32

14124

: MVT::f64;

14125

14126

// If we're converting from a float, to an int, and back to a float again,

14127

// then we don't need the store/load pair at all.

14128

if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&

14129

Subtarget.hasFPCVT()) ||

14130

(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {

14131

SDValue Src = Op.getOperand(0).getOperand(0);

14132

if (Src.getValueType() == MVT::f32) {

14133

Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);

14134

DCI.AddToWorklist(Src.getNode());

14135

} else if (Src.getValueType() != MVT::f64) {

14136

// Make sure that we don't pick up a ppc_fp128 source value.

14137

return SDValue();

14138

}

14139

14140

unsigned FCTOp =

14141

Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :

14142

PPCISD::FCTIDUZ;

14143

14144

SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);

14145

SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);

14146

14147

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

14148

FP = DAG.getNode(ISD::FP_ROUND, dl,

14149

MVT::f32, FP, DAG.getIntPtrConstant(0, dl));

14150

DCI.AddToWorklist(FP.getNode());

14151

}

14152

14153

return FP;

14154

}

14155

14156

return SDValue();

14157

}

14158

14159

// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for

14160

// builtins) into loads with swaps.

14161

SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,

14162

DAGCombinerInfo &DCI) const {

14163

SelectionDAG &DAG = DCI.DAG;

14164

SDLoc dl(N);

14165

SDValue Chain;

14166

SDValue Base;

14167

MachineMemOperand *MMO;

14168

14169

switch (N->getOpcode()) {

14170

default:

14171

llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14171);

14172

case ISD::LOAD: {

14173

LoadSDNode *LD = cast<LoadSDNode>(N);

14174

Chain = LD->getChain();

14175

Base = LD->getBasePtr();

14176

MMO = LD->getMemOperand();

14177

// If the MMO suggests this isn't a load of a full vector, leave

14178

// things alone. For a built-in, we have to make the change for

14179

// correctness, so if there is a size problem that will be a bug.

14180

if (MMO->getSize() < 16)

14181

return SDValue();

14182

break;

14183

}

14184

case ISD::INTRINSIC_W_CHAIN: {

14185

MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);

14186

Chain = Intrin->getChain();

14187

// Similarly to the store case below, Intrin->getBasePtr() doesn't get

14188

// us what we want. Get operand 2 instead.

14189

Base = Intrin->getOperand(2);

14190

MMO = Intrin->getMemOperand();

14191

break;

14192

}

14193

}

14194

14195

MVT VecTy = N->getValueType(0).getSimpleVT();

14196

14197

// Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is

14198

// aligned and the type is a vector with elements up to 4 bytes

14199

if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&

14200

VecTy.getScalarSizeInBits() <= 32) {

14201

return SDValue();

14202

}

14203

14204

SDValue LoadOps[] = { Chain, Base };

14205

SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,

14206

DAG.getVTList(MVT::v2f64, MVT::Other),

14207

LoadOps, MVT::v2f64, MMO);

14208

14209

DCI.AddToWorklist(Load.getNode());

14210

Chain = Load.getValue(1);

14211

SDValue Swap = DAG.getNode(

14212

PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);

14213

DCI.AddToWorklist(Swap.getNode());

14214

14215

// Add a bitcast if the resulting load type doesn't match v2f64.

14216

if (VecTy != MVT::v2f64) {

14217

SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);

14218

DCI.AddToWorklist(N.getNode());

14219

// Package {bitcast value, swap's chain} to match Load's shape.

14220

return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),

14221

N, Swap.getValue(1));

14222

}

14223

14224

return Swap;

14225

}

14226

14227

// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for

14228

// builtins) into stores with swaps.

14229

SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,

14230

DAGCombinerInfo &DCI) const {

14231

SelectionDAG &DAG = DCI.DAG;

14232

SDLoc dl(N);

14233

SDValue Chain;

14234

SDValue Base;

14235

unsigned SrcOpnd;

14236

MachineMemOperand *MMO;

14237

14238

switch (N->getOpcode()) {

14239

default:

14240

llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14240);

14241

case ISD::STORE: {

14242

StoreSDNode *ST = cast<StoreSDNode>(N);

14243

Chain = ST->getChain();

14244

Base = ST->getBasePtr();

14245

MMO = ST->getMemOperand();

14246

SrcOpnd = 1;

14247

// If the MMO suggests this isn't a store of a full vector, leave

14248

// things alone. For a built-in, we have to make the change for

14249

// correctness, so if there is a size problem that will be a bug.

14250

if (MMO->getSize() < 16)

14251

return SDValue();

14252

break;

14253

}

14254

case ISD::INTRINSIC_VOID: {

14255

MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);

14256

Chain = Intrin->getChain();

14257

// Intrin->getBasePtr() oddly does not get what we want.

14258

Base = Intrin->getOperand(3);

14259

MMO = Intrin->getMemOperand();

14260

SrcOpnd = 2;

14261

break;

14262

}

14263

}

14264

14265

SDValue Src = N->getOperand(SrcOpnd);

14266

MVT VecTy = Src.getValueType().getSimpleVT();

14267

14268

// Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is

14269

// aligned and the type is a vector with elements up to 4 bytes

14270

if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&

14271

VecTy.getScalarSizeInBits() <= 32) {

14272

return SDValue();

14273

}

14274

14275

// All stores are done as v2f64 and possible bit cast.

14276

if (VecTy != MVT::v2f64) {

14277

Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);

14278

DCI.AddToWorklist(Src.getNode());

14279

}

14280

14281

SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,

14282

DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);

14283

DCI.AddToWorklist(Swap.getNode());

14284

Chain = Swap.getValue(1);

14285

SDValue StoreOps[] = { Chain, Swap, Base };

14286

SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,

14287

DAG.getVTList(MVT::Other),

14288

StoreOps, VecTy, MMO);

14289

DCI.AddToWorklist(Store.getNode());

14290

return Store;

14291

}

14292

14293

// Handle DAG combine for STORE (FP_TO_INT F).

14294

SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,

14295

DAGCombinerInfo &DCI) const {

14296

14297

SelectionDAG &DAG = DCI.DAG;

14298

SDLoc dl(N);

14299

unsigned Opcode = N->getOperand(1).getOpcode();

14300

14301

assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14302, __PRETTY_FUNCTION__))

14302

&& "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14302, __PRETTY_FUNCTION__));

14303

14304

SDValue Val = N->getOperand(1).getOperand(0);

14305

EVT Op1VT = N->getOperand(1).getValueType();

14306

EVT ResVT = Val.getValueType();

14307

14308

if (!isTypeLegal(ResVT))

14309

return SDValue();

14310

14311

// Only perform combine for conversion to i64/i32 or power9 i16/i8.

14312

bool ValidTypeForStoreFltAsInt =

14313

(Op1VT == MVT::i32 || Op1VT == MVT::i64 ||

14314

(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));

14315

14316

if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||

14317

cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)

14318

return SDValue();

14319

14320

// Extend f32 values to f64

14321

if (ResVT.getScalarSizeInBits() == 32) {

14322

Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);

14323

DCI.AddToWorklist(Val.getNode());

14324

}

14325

14326

// Set signed or unsigned conversion opcode.

14327

unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?

14328

PPCISD::FP_TO_SINT_IN_VSR :

14329

PPCISD::FP_TO_UINT_IN_VSR;

14330

14331

Val = DAG.getNode(ConvOpcode,

14332

dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);

14333

DCI.AddToWorklist(Val.getNode());

14334

14335

// Set number of bytes being converted.

14336

unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;

14337

SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),

14338

DAG.getIntPtrConstant(ByteSize, dl, false),

14339

DAG.getValueType(Op1VT) };

14340

14341

Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,

14342

DAG.getVTList(MVT::Other), Ops,

14343

cast<StoreSDNode>(N)->getMemoryVT(),

14344

cast<StoreSDNode>(N)->getMemOperand());

14345

14346

DCI.AddToWorklist(Val.getNode());

14347

return Val;

14348

}

14349

14350

static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {

14351

// Check that the source of the element keeps flipping

14352

// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).

14353

bool PrevElemFromFirstVec = Mask[0] < NumElts;

14354

for (int i = 1, e = Mask.size(); i < e; i++) {

14355

if (PrevElemFromFirstVec && Mask[i] < NumElts)

14356

return false;

14357

if (!PrevElemFromFirstVec && Mask[i] >= NumElts)

14358

return false;

14359

PrevElemFromFirstVec = !PrevElemFromFirstVec;

14360

}

14361

return true;

14362

}

14363

14364

static bool isSplatBV(SDValue Op) {

14365

if (Op.getOpcode() != ISD::BUILD_VECTOR)

14366

return false;

14367

SDValue FirstOp;

14368

14369

// Find first non-undef input.

14370

for (int i = 0, e = Op.getNumOperands(); i < e; i++) {

14371

FirstOp = Op.getOperand(i);

14372

if (!FirstOp.isUndef())

14373

break;

14374

}

14375

14376

// All inputs are undef or the same as the first non-undef input.

14377

for (int i = 1, e = Op.getNumOperands(); i < e; i++)

14378

if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())

14379

return false;

14380

return true;

14381

}

14382

14383

static SDValue isScalarToVec(SDValue Op) {

14384

if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)

14385

return Op;

14386

if (Op.getOpcode() != ISD::BITCAST)

14387

return SDValue();

14388

Op = Op.getOperand(0);

14389

if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)

14390

return Op;

14391

return SDValue();

14392

}

14393

14394

static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,

14395

int LHSMaxIdx, int RHSMinIdx,

14396

int RHSMaxIdx, int HalfVec) {

14397

for (int i = 0, e = ShuffV.size(); i < e; i++) {

14398

int Idx = ShuffV[i];

14399

if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))

14400

ShuffV[i] += HalfVec;

14401

}

14402

return;

14403

}

14404

14405

// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if

14406

// the original is:

14407

// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))

14408

// In such a case, just change the shuffle mask to extract the element

14409

// from the permuted index.

14410

static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {

14411

SDLoc dl(OrigSToV);

14412

EVT VT = OrigSToV.getValueType();

14413

assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14414, __PRETTY_FUNCTION__))

14414

"Expecting a SCALAR_TO_VECTOR here")((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14414, __PRETTY_FUNCTION__));

14415

SDValue Input = OrigSToV.getOperand(0);

14416

14417

if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

14418

ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));

14419

SDValue OrigVector = Input.getOperand(0);

14420

14421

// Can't handle non-const element indices or different vector types

14422

// for the input to the extract and the output of the scalar_to_vector.

14423

if (Idx && VT == OrigVector.getValueType()) {

14424

SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);

14425

NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();

14426

return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);

14427

}

14428

}

14429

return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,

14430

OrigSToV.getOperand(0));

14431

}

14432

14433

// On little endian subtargets, combine shuffles such as:

14434

// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b

14435

// into:

14436

// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b

14437

// because the latter can be matched to a single instruction merge.

14438

// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute

14439

// to put the value into element zero. Adjust the shuffle mask so that the

14440

// vector can remain in permuted form (to prevent a swap prior to a shuffle).

14441

SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,

14442

SelectionDAG &DAG) const {

14443

SDValue LHS = SVN->getOperand(0);

14444

SDValue RHS = SVN->getOperand(1);

14445

auto Mask = SVN->getMask();

14446

int NumElts = LHS.getValueType().getVectorNumElements();

14447

SDValue Res(SVN, 0);

14448

SDLoc dl(SVN);

14449

14450

// None of these combines are useful on big endian systems since the ISA

14451

// already has a big endian bias.

14452

if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())

14453

return Res;

14454

14455

// If this is not a shuffle of a shuffle and the first element comes from

14456

// the second vector, canonicalize to the commuted form. This will make it

14457

// more likely to match one of the single instruction patterns.

14458

if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&

14459

RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {

14460

std::swap(LHS, RHS);

14461

Res = DAG.getCommutedVectorShuffle(*SVN);

14462

Mask = cast<ShuffleVectorSDNode>(Res)->getMask();

14463

}

14464

14465

// Adjust the shuffle mask if either input vector comes from a

14466

// SCALAR_TO_VECTOR and keep the respective input vector in permuted

14467

// form (to prevent the need for a swap).

14468

SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());

14469

SDValue SToVLHS = isScalarToVec(LHS);

14470

SDValue SToVRHS = isScalarToVec(RHS);

14471

if (SToVLHS || SToVRHS) {

14472

int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()

14473

: SToVRHS.getValueType().getVectorNumElements();

14474

int NumEltsOut = ShuffV.size();

14475

14476

// Initially assume that neither input is permuted. These will be adjusted

14477

// accordingly if either input is.

14478

int LHSMaxIdx = -1;

14479

int RHSMinIdx = -1;

14480

int RHSMaxIdx = -1;

14481

int HalfVec = LHS.getValueType().getVectorNumElements() / 2;

14482

14483

// Get the permuted scalar to vector nodes for the source(s) that come from

14484

// ISD::SCALAR_TO_VECTOR.

14485

if (SToVLHS) {

14486

// Set up the values for the shuffle vector fixup.

14487

LHSMaxIdx = NumEltsOut / NumEltsIn;

14488

SToVLHS = getSToVPermuted(SToVLHS, DAG);

14489

if (SToVLHS.getValueType() != LHS.getValueType())

14490

SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);

14491

LHS = SToVLHS;

14492

}

14493

if (SToVRHS) {

14494

RHSMinIdx = NumEltsOut;

14495

RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;

14496

SToVRHS = getSToVPermuted(SToVRHS, DAG);

14497

if (SToVRHS.getValueType() != RHS.getValueType())

14498

SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);

14499

RHS = SToVRHS;

14500

}

14501

14502

// Fix up the shuffle mask to reflect where the desired element actually is.

14503

// The minimum and maximum indices that correspond to element zero for both

14504

// the LHS and RHS are computed and will control which shuffle mask entries

14505

// are to be changed. For example, if the RHS is permuted, any shuffle mask

14506

// entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by

14507

// HalfVec to refer to the corresponding element in the permuted vector.

14508

fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,

14509

HalfVec);

14510

Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

14511

14512

// We may have simplified away the shuffle. We won't be able to do anything

14513

// further with it here.

14514

if (!isa<ShuffleVectorSDNode>(Res))

14515

return Res;

14516

Mask = cast<ShuffleVectorSDNode>(Res)->getMask();

14517

}

14518

14519

// The common case after we commuted the shuffle is that the RHS is a splat

14520

// and we have elements coming in from the splat at indices that are not

14521

// conducive to using a merge.

14522

// Example:

14523

// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>

14524

if (!isSplatBV(RHS))

14525

return Res;

14526

14527

// We are looking for a mask such that all even elements are from

14528

// one vector and all odd elements from the other.

14529

if (!isAlternatingShuffMask(Mask, NumElts))

14530

return Res;

14531

14532

// Adjust the mask so we are pulling in the same index from the splat

14533

// as the index from the interesting vector in consecutive elements.

14534

// Example (even elements from first vector):

14535

// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>

14536

if (Mask[0] < NumElts)

14537

for (int i = 1, e = Mask.size(); i < e; i += 2)

14538

ShuffV[i] = (ShuffV[i - 1] + NumElts);

14539

// Example (odd elements from first vector):

14540

// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>

14541

else

14542

for (int i = 0, e = Mask.size(); i < e; i += 2)

14543

ShuffV[i] = (ShuffV[i + 1] + NumElts);

14544

14545

// If the RHS has undefs, we need to remove them since we may have created

14546

// a shuffle that adds those instead of the splat value.

14547

SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();

14548

RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);

14549

14550

Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

14551

return Res;

14552

}

14553

14554

SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,

14555

LSBaseSDNode *LSBase,

14556

DAGCombinerInfo &DCI) const {

14557

assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14558, __PRETTY_FUNCTION__))

14558

"Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14558, __PRETTY_FUNCTION__));

14559

14560

auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {

14561

auto Mask = SVN->getMask();

14562

int i = 0;

14563

auto I = Mask.rbegin();

14564

auto E = Mask.rend();

14565

14566

for (; I != E; ++I) {

14567

if (*I != i)

14568

return false;

14569

i++;

14570

}

14571

return true;

14572

};

14573

14574

SelectionDAG &DAG = DCI.DAG;

14575

EVT VT = SVN->getValueType(0);

14576

14577

if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())

14578

return SDValue();

14579

14580

// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.

14581

// See comment in PPCVSXSwapRemoval.cpp.

14582

// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.

14583

if (!Subtarget.hasP9Vector())

14584

return SDValue();

14585

14586

if(!IsElementReverse(SVN))

14587

return SDValue();

14588

14589

if (LSBase->getOpcode() == ISD::LOAD) {

14590

SDLoc dl(SVN);

14591

SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};

14592

return DAG.getMemIntrinsicNode(

14593

PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,

14594

LSBase->getMemoryVT(), LSBase->getMemOperand());

14595

}

14596

14597

if (LSBase->getOpcode() == ISD::STORE) {

14598

SDLoc dl(LSBase);

14599

SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),

14600

LSBase->getBasePtr()};

14601

return DAG.getMemIntrinsicNode(

14602

PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,

14603

LSBase->getMemoryVT(), LSBase->getMemOperand());

14604

}

14605

14606

llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14606);

14607

}

14608

14609

SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

14610

DAGCombinerInfo &DCI) const {

14611

SelectionDAG &DAG = DCI.DAG;

14612

SDLoc dl(N);

14613

switch (N->getOpcode()) {

14614

default: break;

14615

case ISD::ADD:

14616

return combineADD(N, DCI);

14617

case ISD::SHL:

14618

return combineSHL(N, DCI);

14619

case ISD::SRA:

14620

return combineSRA(N, DCI);

14621

case ISD::SRL:

14622

return combineSRL(N, DCI);

14623

case ISD::MUL:

14624

return combineMUL(N, DCI);

14625

case ISD::FMA:

14626

case PPCISD::FNMSUB:

14627

return combineFMALike(N, DCI);

14628

case PPCISD::SHL:

14629

if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.

14630

return N->getOperand(0);

14631

break;

14632

case PPCISD::SRL:

14633

if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.

14634

return N->getOperand(0);

14635

break;

14636

case PPCISD::SRA:

14637

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {

14638

if (C->isNullValue() || // 0 >>s V -> 0.

14639

C->isAllOnesValue()) // -1 >>s V -> -1.

14640

return N->getOperand(0);

14641

}

14642

break;

14643

case ISD::SIGN_EXTEND:

14644

case ISD::ZERO_EXTEND:

14645

case ISD::ANY_EXTEND:

14646

return DAGCombineExtBoolTrunc(N, DCI);

14647

case ISD::TRUNCATE:

14648

return combineTRUNCATE(N, DCI);

14649

case ISD::SETCC:

14650

if (SDValue CSCC = combineSetCC(N, DCI))

14651

return CSCC;

14652

LLVM_FALLTHROUGH[[gnu::fallthrough]];

14653

case ISD::SELECT_CC:

14654

return DAGCombineTruncBoolExt(N, DCI);

14655

case ISD::SINT_TO_FP:

14656

case ISD::UINT_TO_FP:

14657

return combineFPToIntToFP(N, DCI);

14658

case ISD::VECTOR_SHUFFLE:

14659

if (ISD::isNormalLoad(N->getOperand(0).getNode())) {

14660

LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));

14661

return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);

14662

}

14663

return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);

14664

case ISD::STORE: {

14665

14666

EVT Op1VT = N->getOperand(1).getValueType();

14667

unsigned Opcode = N->getOperand(1).getOpcode();

14668

14669

if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {

14670

SDValue Val= combineStoreFPToInt(N, DCI);

14671

if (Val)

14672

return Val;

14673

}

14674

14675

if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {

14676

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));

14677

SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);

14678

if (Val)

14679

return Val;

14680

}

14681

14682

// Turn STORE (BSWAP) -> sthbrx/stwbrx.

14683

if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&

14684

N->getOperand(1).getNode()->hasOneUse() &&

14685

(Op1VT == MVT::i32 || Op1VT == MVT::i16 ||

14686

(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {

14687

14688

// STBRX can only handle simple types and it makes no sense to store less

14689

// two bytes in byte-reversed order.

14690

EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();

14691

if (mVT.isExtended() || mVT.getSizeInBits() < 16)

14692

break;

14693

14694

SDValue BSwapOp = N->getOperand(1).getOperand(0);

14695

// Do an any-extend to 32-bits if this is a half-word input.

14696

if (BSwapOp.getValueType() == MVT::i16)

14697

BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);

14698

14699

// If the type of BSWAP operand is wider than stored memory width

14700

// it need to be shifted to the right side before STBRX.

14701

if (Op1VT.bitsGT(mVT)) {

14702

int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();

14703

BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,

14704

DAG.getConstant(Shift, dl, MVT::i32));

14705

// Need to truncate if this is a bswap of i64 stored as i32/i16.

14706

if (Op1VT == MVT::i64)

14707

BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);

14708

}

14709

14710

SDValue Ops[] = {

14711

N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)

14712

};

14713

return

14714

DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),

14715

Ops, cast<StoreSDNode>(N)->getMemoryVT(),

14716

cast<StoreSDNode>(N)->getMemOperand());

14717

}

14718

14719

// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>

14720

// So it can increase the chance of CSE constant construction.

14721

if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&

14722

isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {

14723

// Need to sign-extended to 64-bits to handle negative values.

14724

EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();

14725

uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),

14726

MemVT.getSizeInBits());

14727

SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);

14728

14729

// DAG.getTruncStore() can't be used here because it doesn't accept

14730

// the general (base + offset) addressing mode.

14731

// So we use UpdateNodeOperands and setTruncatingStore instead.

14732

DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),

14733

N->getOperand(3));

14734

cast<StoreSDNode>(N)->setTruncatingStore(true);

14735

return SDValue(N, 0);

14736

}

14737

14738

// For little endian, VSX stores require generating xxswapd/lxvd2x.

14739

// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.

14740

if (Op1VT.isSimple()) {

14741

MVT StoreVT = Op1VT.getSimpleVT();

14742

if (Subtarget.needsSwapsForVSXMemOps() &&

14743

(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||

14744

StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))

14745

return expandVSXStoreForLE(N, DCI);

14746

}

14747

break;

14748

}

14749

case ISD::LOAD: {

14750

LoadSDNode *LD = cast<LoadSDNode>(N);

14751

EVT VT = LD->getValueType(0);

14752

14753

// For little endian, VSX loads require generating lxvd2x/xxswapd.

14754

// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.

14755

if (VT.isSimple()) {

14756

MVT LoadVT = VT.getSimpleVT();

14757

if (Subtarget.needsSwapsForVSXMemOps() &&

14758

(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||

14759

LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))

14760

return expandVSXLoadForLE(N, DCI);

14761

}

14762

14763

// We sometimes end up with a 64-bit integer load, from which we extract

14764

// two single-precision floating-point numbers. This happens with

14765

// std::complex<float>, and other similar structures, because of the way we

14766

// canonicalize structure copies. However, if we lack direct moves,

14767

// then the final bitcasts from the extracted integer values to the

14768

// floating-point numbers turn into store/load pairs. Even with direct moves,

14769

// just loading the two floating-point numbers is likely better.

14770

auto ReplaceTwoFloatLoad = [&]() {

14771

if (VT != MVT::i64)

14772

return false;

14773

14774

if (LD->getExtensionType() != ISD::NON_EXTLOAD ||

14775

LD->isVolatile())

14776

return false;

14777

14778

// We're looking for a sequence like this:

14779

// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64

14780

// t16: i64 = srl t13, Constant:i32<32>

14781

// t17: i32 = truncate t16

14782

// t18: f32 = bitcast t17

14783

// t19: i32 = truncate t13

14784

// t20: f32 = bitcast t19

14785

14786

if (!LD->hasNUsesOfValue(2, 0))

14787

return false;

14788

14789

auto UI = LD->use_begin();

14790

while (UI.getUse().getResNo() != 0) ++UI;

14791

SDNode *Trunc = *UI++;

14792

while (UI.getUse().getResNo() != 0) ++UI;

14793

SDNode *RightShift = *UI;

14794

if (Trunc->getOpcode() != ISD::TRUNCATE)

14795

std::swap(Trunc, RightShift);

14796

14797

if (Trunc->getOpcode() != ISD::TRUNCATE ||

14798

Trunc->getValueType(0) != MVT::i32 ||

14799

!Trunc->hasOneUse())

14800

return false;

14801

if (RightShift->getOpcode() != ISD::SRL ||

14802

!isa<ConstantSDNode>(RightShift->getOperand(1)) ||

14803

RightShift->getConstantOperandVal(1) != 32 ||

14804

!RightShift->hasOneUse())

14805

return false;

14806

14807

SDNode *Trunc2 = *RightShift->use_begin();

14808

if (Trunc2->getOpcode() != ISD::TRUNCATE ||

14809

Trunc2->getValueType(0) != MVT::i32 ||

14810

!Trunc2->hasOneUse())

14811

return false;

14812

14813

SDNode *Bitcast = *Trunc->use_begin();

14814

SDNode *Bitcast2 = *Trunc2->use_begin();

14815

14816

if (Bitcast->getOpcode() != ISD::BITCAST ||

14817

Bitcast->getValueType(0) != MVT::f32)

14818

return false;

14819

if (Bitcast2->getOpcode() != ISD::BITCAST ||

14820

Bitcast2->getValueType(0) != MVT::f32)

14821

return false;

14822

14823

if (Subtarget.isLittleEndian())

14824

std::swap(Bitcast, Bitcast2);

14825

14826

// Bitcast has the second float (in memory-layout order) and Bitcast2

14827

// has the first one.

14828

14829

SDValue BasePtr = LD->getBasePtr();

14830

if (LD->isIndexed()) {

14831

assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14832, __PRETTY_FUNCTION__))

14832

"Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14832, __PRETTY_FUNCTION__));

14833

BasePtr =

14834

DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

14835

LD->getOffset());

14836

}

14837

14838

auto MMOFlags =

14839

LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;

14840

SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,

14841

LD->getPointerInfo(), LD->getAlignment(),

14842

MMOFlags, LD->getAAInfo());

14843

SDValue AddPtr =

14844

DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),

14845

BasePtr, DAG.getIntPtrConstant(4, dl));

14846

SDValue FloatLoad2 = DAG.getLoad(

14847

MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,

14848

LD->getPointerInfo().getWithOffset(4),

14849

MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());

14850

14851

if (LD->isIndexed()) {

14852

// Note that DAGCombine should re-form any pre-increment load(s) from

14853

// what is produced here if that makes sense.

14854

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);

14855

}

14856

14857

DCI.CombineTo(Bitcast2, FloatLoad);

14858

DCI.CombineTo(Bitcast, FloatLoad2);

14859

14860

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),

14861

SDValue(FloatLoad2.getNode(), 1));

14862

return true;

14863

};

14864

14865

if (ReplaceTwoFloatLoad())

14866

return SDValue(N, 0);

14867

14868

EVT MemVT = LD->getMemoryVT();

14869

Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());

14870

Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);

14871

if (LD->isUnindexed() && VT.isVector() &&

14872

((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&

14873

// P8 and later hardware should just use LOAD.

14874

!Subtarget.hasP8Vector() &&

14875

(VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||

14876

VT == MVT::v4f32))) &&

14877

LD->getAlign() < ABIAlignment) {

14878

// This is a type-legal unaligned Altivec load.

14879

SDValue Chain = LD->getChain();

14880

SDValue Ptr = LD->getBasePtr();

14881

bool isLittleEndian = Subtarget.isLittleEndian();

14882

14883

// This implements the loading of unaligned vectors as described in

14884

// the venerable Apple Velocity Engine overview. Specifically:

14885

// https://developer.apple.com/hardwaredrivers/ve/alignment.html

14886

// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html

14887

//

14888

// The general idea is to expand a sequence of one or more unaligned

14889

// loads into an alignment-based permutation-control instruction (lvsl

14890

// or lvsr), a series of regular vector loads (which always truncate

14891

// their input address to an aligned address), and a series of

14892

// permutations. The results of these permutations are the requested

14893

// loaded values. The trick is that the last "extra" load is not taken

14894

// from the address you might suspect (sizeof(vector) bytes after the

14895

// last requested load), but rather sizeof(vector) - 1 bytes after the

14896

// last requested vector. The point of this is to avoid a page fault if

14897

// the base address happened to be aligned. This works because if the

14898

// base address is aligned, then adding less than a full vector length

14899

// will cause the last vector in the sequence to be (re)loaded.

14900

// Otherwise, the next vector will be fetched as you might suspect was

14901

// necessary.

14902

14903

// We might be able to reuse the permutation generation from

14904

// a different base address offset from this one by an aligned amount.

14905

// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this

14906

// optimization later.

14907

Intrinsic::ID Intr, IntrLD, IntrPerm;

14908

MVT PermCntlTy, PermTy, LDTy;

14909

Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr

14910

: Intrinsic::ppc_altivec_lvsl;

14911

IntrLD = Intrinsic::ppc_altivec_lvx;

14912

IntrPerm = Intrinsic::ppc_altivec_vperm;

14913

PermCntlTy = MVT::v16i8;

14914

PermTy = MVT::v4i32;

14915

LDTy = MVT::v4i32;

14916

14917

SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);

14918

14919

// Create the new MMO for the new base load. It is like the original MMO,

14920

// but represents an area in memory almost twice the vector size centered

14921

// on the original address. If the address is unaligned, we might start

14922

// reading up to (sizeof(vector)-1) bytes below the address of the

14923

// original unaligned load.

14924

MachineFunction &MF = DAG.getMachineFunction();

14925

MachineMemOperand *BaseMMO =

14926

MF.getMachineMemOperand(LD->getMemOperand(),

14927

-(long)MemVT.getStoreSize()+1,

14928

2*MemVT.getStoreSize()-1);

14929

14930

// Create the new base load.

14931

SDValue LDXIntID =

14932

DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));

14933

SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };

14934

SDValue BaseLoad =

14935

DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,

14936

DAG.getVTList(PermTy, MVT::Other),

14937

BaseLoadOps, LDTy, BaseMMO);

14938

14939

// Note that the value of IncOffset (which is provided to the next

14940

// load's pointer info offset value, and thus used to calculate the

14941

// alignment), and the value of IncValue (which is actually used to

14942

// increment the pointer value) are different! This is because we

14943

// require the next load to appear to be aligned, even though it

14944

// is actually offset from the base pointer by a lesser amount.

14945

int IncOffset = VT.getSizeInBits() / 8;

14946

int IncValue = IncOffset;

14947

14948

// Walk (both up and down) the chain looking for another load at the real

14949

// (aligned) offset (the alignment of the other load does not matter in

14950

// this case). If found, then do not use the offset reduction trick, as

14951

// that will prevent the loads from being later combined (as they would

14952

// otherwise be duplicates).

14953

if (!findConsecutiveLoad(LD, DAG))

14954

--IncValue;

14955

14956

SDValue Increment =

14957

DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));

14958

Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);

14959

14960

MachineMemOperand *ExtraMMO =

14961

MF.getMachineMemOperand(LD->getMemOperand(),

14962

1, 2*MemVT.getStoreSize()-1);

14963

SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };

14964

SDValue ExtraLoad =

14965

DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,

14966

DAG.getVTList(PermTy, MVT::Other),

14967

ExtraLoadOps, LDTy, ExtraMMO);

14968

14969

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

14970

BaseLoad.getValue(1), ExtraLoad.getValue(1));

14971

14972

// Because vperm has a big-endian bias, we must reverse the order

14973

// of the input vectors and complement the permute control vector

14974

// when generating little endian code. We have already handled the

14975

// latter by using lvsr instead of lvsl, so just reverse BaseLoad

14976

// and ExtraLoad here.

14977

SDValue Perm;

14978

if (isLittleEndian)

14979

Perm = BuildIntrinsicOp(IntrPerm,

14980

ExtraLoad, BaseLoad, PermCntl, DAG, dl);

14981

else

14982

Perm = BuildIntrinsicOp(IntrPerm,

14983

BaseLoad, ExtraLoad, PermCntl, DAG, dl);

14984

14985

if (VT != PermTy)

14986

Perm = Subtarget.hasAltivec()

14987

? DAG.getNode(ISD::BITCAST, dl, VT, Perm)

14988

: DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,

14989

DAG.getTargetConstant(1, dl, MVT::i64));

14990

// second argument is 1 because this rounding

14991

// is always exact.

14992

14993

// The output of the permutation is our loaded result, the TokenFactor is

14994

// our new chain.

14995

DCI.CombineTo(N, Perm, TF);

14996

return SDValue(N, 0);

14997

}

14998

}

14999

break;

15000

case ISD::INTRINSIC_WO_CHAIN: {

15001

bool isLittleEndian = Subtarget.isLittleEndian();

15002

unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();

15003

Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr

15004

: Intrinsic::ppc_altivec_lvsl);

15005

if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {

15006

SDValue Add = N->getOperand(1);

15007

15008

int Bits = 4 /* 16 byte alignment */;

15009

15010

if (DAG.MaskedValueIsZero(Add->getOperand(1),

15011

APInt::getAllOnesValue(Bits /* alignment */)

15012

.zext(Add.getScalarValueSizeInBits()))) {

15013

SDNode *BasePtr = Add->getOperand(0).getNode();

15014

for (SDNode::use_iterator UI = BasePtr->use_begin(),

15015

UE = BasePtr->use_end();

15016

UI != UE; ++UI) {

15017

if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15018

cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==

15019

IID) {

15020

// We've found another LVSL/LVSR, and this address is an aligned

15021

// multiple of that one. The results will be the same, so use the

15022

// one we've just found instead.

15023

15024

return SDValue(*UI, 0);

15025

}

15026

}

15027

}

15028

15029

if (isa<ConstantSDNode>(Add->getOperand(1))) {

15030

SDNode *BasePtr = Add->getOperand(0).getNode();

15031

for (SDNode::use_iterator UI = BasePtr->use_begin(),

15032

UE = BasePtr->use_end(); UI != UE; ++UI) {

15033

if (UI->getOpcode() == ISD::ADD &&

15034

isa<ConstantSDNode>(UI->getOperand(1)) &&

15035

(cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -

15036

cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %

15037

(1ULL << Bits) == 0) {

15038

SDNode *OtherAdd = *UI;

15039

for (SDNode::use_iterator VI = OtherAdd->use_begin(),

15040

VE = OtherAdd->use_end(); VI != VE; ++VI) {

15041

if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15042

cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {

15043

return SDValue(*VI, 0);

15044

}

15045

}

15046

}

15047

}

15048

}

15049

}

15050

15051

// Combine vmaxsw/h/b(a, a's negation) to abs(a)

15052

// Expose the vabsduw/h/b opportunity for down stream

15053

if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&

15054

(IID == Intrinsic::ppc_altivec_vmaxsw ||

15055

IID == Intrinsic::ppc_altivec_vmaxsh ||

15056

IID == Intrinsic::ppc_altivec_vmaxsb)) {

15057

SDValue V1 = N->getOperand(1);

15058

SDValue V2 = N->getOperand(2);

15059

if ((V1.getSimpleValueType() == MVT::v4i32 ||

15060

V1.getSimpleValueType() == MVT::v8i16 ||

15061

V1.getSimpleValueType() == MVT::v16i8) &&

15062

V1.getSimpleValueType() == V2.getSimpleValueType()) {

15063

// (0-a, a)

15064

if (V1.getOpcode() == ISD::SUB &&

15065

ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&

15066

V1.getOperand(1) == V2) {

15067

return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);

15068

}

15069

// (a, 0-a)

15070

if (V2.getOpcode() == ISD::SUB &&

15071

ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&

15072

V2.getOperand(1) == V1) {

15073

return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);

15074

}

15075

// (x-y, y-x)

15076

if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&

15077

V1.getOperand(0) == V2.getOperand(1) &&

15078

V1.getOperand(1) == V2.getOperand(0)) {

15079

return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);

15080

}

15081

}

15082

}

15083

}

15084

15085

break;

15086

case ISD::INTRINSIC_W_CHAIN:

15087

// For little endian, VSX loads require generating lxvd2x/xxswapd.

15088

// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.

15089

if (Subtarget.needsSwapsForVSXMemOps()) {

15090

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

15091

default:

15092

break;

15093

case Intrinsic::ppc_vsx_lxvw4x:

15094

case Intrinsic::ppc_vsx_lxvd2x:

15095

return expandVSXLoadForLE(N, DCI);

15096

}

15097

}

15098

break;

15099

case ISD::INTRINSIC_VOID:

15100

// For little endian, VSX stores require generating xxswapd/stxvd2x.

15101

// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.

15102

if (Subtarget.needsSwapsForVSXMemOps()) {

15103

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

15104

default:

15105

break;

15106

case Intrinsic::ppc_vsx_stxvw4x:

15107

case Intrinsic::ppc_vsx_stxvd2x:

15108

return expandVSXStoreForLE(N, DCI);

15109

}

15110

}

15111

break;

15112

case ISD::BSWAP:

15113

// Turn BSWAP (LOAD) -> lhbrx/lwbrx.

15114

if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&

15115

N->getOperand(0).hasOneUse() &&

15116

(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||

15117

(Subtarget.hasLDBRX() && Subtarget.isPPC64() &&

15118

N->getValueType(0) == MVT::i64))) {

15119

SDValue Load = N->getOperand(0);

15120

LoadSDNode *LD = cast<LoadSDNode>(Load);

15121

// Create the byte-swapping load.

15122

SDValue Ops[] = {

15123

LD->getChain(), // Chain

15124

LD->getBasePtr(), // Ptr

15125

DAG.getValueType(N->getValueType(0)) // VT

15126

};

15127

SDValue BSLoad =

15128

DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,

15129

DAG.getVTList(N->getValueType(0) == MVT::i64 ?

15130

MVT::i64 : MVT::i32, MVT::Other),

15131

Ops, LD->getMemoryVT(), LD->getMemOperand());

15132

15133

// If this is an i16 load, insert the truncate.

15134

SDValue ResVal = BSLoad;

15135

if (N->getValueType(0) == MVT::i16)

15136

ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);

15137

15138

// First, combine the bswap away. This makes the value produced by the

15139

// load dead.

15140

DCI.CombineTo(N, ResVal);

15141

15142

// Next, combine the load away, we give it a bogus result value but a real

15143

// chain result. The result value is dead because the bswap is dead.

15144

DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));

15145

15146

// Return N so it doesn't get rechecked!

15147

return SDValue(N, 0);

15148

}

15149

break;

15150

case PPCISD::VCMP:

15151

// If a VCMPo node already exists with exactly the same operands as this

15152

// node, use its result instead of this node (VCMPo computes both a CR6 and

15153

// a normal output).

15154

//

15155

if (!N->getOperand(0).hasOneUse() &&

15156

!N->getOperand(1).hasOneUse() &&

15157

!N->getOperand(2).hasOneUse()) {

15158

15159

// Scan all of the users of the LHS, looking for VCMPo's that match.

15160

SDNode *VCMPoNode = nullptr;

15161

15162

SDNode *LHSN = N->getOperand(0).getNode();

15163

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

15164

UI != E; ++UI)

15165

if (UI->getOpcode() == PPCISD::VCMPo &&

15166

UI->getOperand(1) == N->getOperand(1) &&

15167

UI->getOperand(2) == N->getOperand(2) &&

15168

UI->getOperand(0) == N->getOperand(0)) {

15169

VCMPoNode = *UI;

15170

break;

15171

}

15172

15173

// If there is no VCMPo node, or if the flag value has a single use, don't

15174

// transform this.

15175

if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))

15176

break;

15177

15178

// Look at the (necessarily single) use of the flag value. If it has a

15179

// chain, this transformation is more complex. Note that multiple things

15180

// could use the value result, which we should ignore.

15181

SDNode *FlagUser = nullptr;

15182

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

15183

FlagUser == nullptr; ++UI) {

15184

assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15184, __PRETTY_FUNCTION__));

15185

SDNode *User = *UI;

15186

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

15187

if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {

15188

FlagUser = User;

15189

break;

15190

}

15191

}

15192

}

15193

15194

// If the user is a MFOCRF instruction, we know this is safe.

15195

// Otherwise we give up for right now.

15196

if (FlagUser->getOpcode() == PPCISD::MFOCRF)

15197

return SDValue(VCMPoNode, 0);

15198

}

15199

break;

15200

case ISD::BRCOND: {

15201

SDValue Cond = N->getOperand(1);

15202

SDValue Target = N->getOperand(2);

15203

15204

if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15205

cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==

15206

Intrinsic::loop_decrement) {

15207

15208

// We now need to make the intrinsic dead (it cannot be instruction

15209

// selected).

15210

DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));

15211

assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15212, __PRETTY_FUNCTION__))

15212

"Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15212, __PRETTY_FUNCTION__));

15213

15214

return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,

15215

N->getOperand(0), Target);

15216

}

15217

}

15218

break;

15219

case ISD::BR_CC: {

15220

// If this is a branch on an altivec predicate comparison, lower this so

15221

// that we don't have to do a MFOCRF: instead, branch directly on CR6. This

15222

// lowering is done pre-legalize, because the legalizer lowers the predicate

15223

// compare down to code that is difficult to reassemble.

15224

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

15225

SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);

15226

15227

// Sometimes the promoted value of the intrinsic is ANDed by some non-zero

15228

// value. If so, pass-through the AND to get to the intrinsic.

15229

if (LHS.getOpcode() == ISD::AND &&

15230

LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15231

cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==

15232

Intrinsic::loop_decrement &&

15233

isa<ConstantSDNode>(LHS.getOperand(1)) &&

15234

!isNullConstant(LHS.getOperand(1)))

15235

LHS = LHS.getOperand(0);

15236

15237

if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15238

cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==

15239

Intrinsic::loop_decrement &&

15240

isa<ConstantSDNode>(RHS)) {

15241

assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15242, __PRETTY_FUNCTION__))

15242

"Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15242, __PRETTY_FUNCTION__));

15243

15244

unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();

15245

bool isBDNZ = (CC == ISD::SETEQ && Val) ||

15246

(CC == ISD::SETNE && !Val);

15247

15248

// We now need to make the intrinsic dead (it cannot be instruction

15249

// selected).

15250

DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));

15251

assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15252, __PRETTY_FUNCTION__))

15252

"Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15252, __PRETTY_FUNCTION__));

15253

15254

return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,

15255

N->getOperand(0), N->getOperand(4));

15256

}

15257

15258

int CompareOpc;

15259

bool isDot;

15260

15261

if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15262

isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&

15263

getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {

15264

assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15264, __PRETTY_FUNCTION__));

15265

15266

// If this is a comparison against something other than 0/1, then we know

15267

// that the condition is never/always true.

15268

unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();

15269

if (Val != 0 && Val != 1) {

15270

if (CC == ISD::SETEQ) // Cond never true, remove branch.

15271

return N->getOperand(0);

15272

// Always !=, turn it into an unconditional branch.

15273

return DAG.getNode(ISD::BR, dl, MVT::Other,

15274

N->getOperand(0), N->getOperand(4));

15275

}

15276

15277

bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);

15278

15279

// Create the PPCISD altivec 'dot' comparison node.

15280

SDValue Ops[] = {

15281

LHS.getOperand(2), // LHS of compare

15282

LHS.getOperand(3), // RHS of compare

15283

DAG.getConstant(CompareOpc, dl, MVT::i32)

15284

};

15285

EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };

15286

SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

15287

15288

// Unpack the result based on how the target uses it.

15289

PPC::Predicate CompOpc;

15290

switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {

15291

default: // Can't happen, don't crash on invalid number though.

15292

case 0: // Branch on the value of the EQ bit of CR6.

15293

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;

15294

break;

15295

case 1: // Branch on the inverted value of the EQ bit of CR6.

15296

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;

15297

break;

15298

case 2: // Branch on the value of the LT bit of CR6.

15299

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;

15300

break;

15301

case 3: // Branch on the inverted value of the LT bit of CR6.

15302

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;

15303

break;

15304

}

15305

15306

return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),

15307

DAG.getConstant(CompOpc, dl, MVT::i32),

15308

DAG.getRegister(PPC::CR6, MVT::i32),

15309

N->getOperand(4), CompNode.getValue(1));

15310

}

15311

break;

15312

}

15313

case ISD::BUILD_VECTOR:

15314

return DAGCombineBuildVector(N, DCI);

15315

case ISD::ABS:

15316

return combineABS(N, DCI);

15317

case ISD::VSELECT:

15318

return combineVSelect(N, DCI);

15319

}

15320

15321

return SDValue();

15322

}

15323

15324

SDValue

15325

PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,

15326

SelectionDAG &DAG,

15327

SmallVectorImpl<SDNode *> &Created) const {

15328

// fold (sdiv X, pow2)

15329

EVT VT = N->getValueType(0);

15330

if (VT == MVT::i64 && !Subtarget.isPPC64())

15331

return SDValue();

15332

if ((VT != MVT::i32 && VT != MVT::i64) ||

15333

!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))

15334

return SDValue();

15335

15336

SDLoc DL(N);

15337

SDValue N0 = N->getOperand(0);

15338

15339

bool IsNegPow2 = (-Divisor).isPowerOf2();

15340

unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();

15341

SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);

15342

15343

SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);

15344

Created.push_back(Op.getNode());

15345

15346

if (IsNegPow2) {

15347

Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);

15348

Created.push_back(Op.getNode());

15349

}

15350

15351

return Op;

15352

}

15353

15354

//===----------------------------------------------------------------------===//

15355

// Inline Assembly Support

15356

//===----------------------------------------------------------------------===//

15357

15358

void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

15359

KnownBits &Known,

15360

const APInt &DemandedElts,

15361

const SelectionDAG &DAG,

15362

unsigned Depth) const {

15363

Known.resetAll();

15364

switch (Op.getOpcode()) {

15365

default: break;

15366

case PPCISD::LBRX: {

15367

// lhbrx is known to have the top bits cleared out.

15368

if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)

15369

Known.Zero = 0xFFFF0000;

15370

break;

15371

}

15372

case ISD::INTRINSIC_WO_CHAIN: {

15373

switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {

15374

default: break;

15375

case Intrinsic::ppc_altivec_vcmpbfp_p:

15376

case Intrinsic::ppc_altivec_vcmpeqfp_p:

15377

case Intrinsic::ppc_altivec_vcmpequb_p:

15378

case Intrinsic::ppc_altivec_vcmpequh_p:

15379

case Intrinsic::ppc_altivec_vcmpequw_p:

15380

case Intrinsic::ppc_altivec_vcmpequd_p:

15381

case Intrinsic::ppc_altivec_vcmpequq_p:

15382

case Intrinsic::ppc_altivec_vcmpgefp_p:

15383

case Intrinsic::ppc_altivec_vcmpgtfp_p:

15384

case Intrinsic::ppc_altivec_vcmpgtsb_p:

15385

case Intrinsic::ppc_altivec_vcmpgtsh_p:

15386

case Intrinsic::ppc_altivec_vcmpgtsw_p:

15387

case Intrinsic::ppc_altivec_vcmpgtsd_p:

15388

case Intrinsic::ppc_altivec_vcmpgtsq_p:

15389

case Intrinsic::ppc_altivec_vcmpgtub_p:

15390

case Intrinsic::ppc_altivec_vcmpgtuh_p:

15391

case Intrinsic::ppc_altivec_vcmpgtuw_p:

15392

case Intrinsic::ppc_altivec_vcmpgtud_p:

15393

case Intrinsic::ppc_altivec_vcmpgtuq_p:

15394

Known.Zero = ~1U; // All bits but the low one are known to be zero.

15395

break;

15396

}

15397

}

15398

}

15399

}

15400

15401

Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {

15402

switch (Subtarget.getCPUDirective()) {

15403

default: break;

15404

case PPC::DIR_970:

15405

case PPC::DIR_PWR4:

15406

case PPC::DIR_PWR5:

15407

case PPC::DIR_PWR5X:

15408

case PPC::DIR_PWR6:

15409

case PPC::DIR_PWR6X:

15410

case PPC::DIR_PWR7:

15411

case PPC::DIR_PWR8:

15412

case PPC::DIR_PWR9:

15413

case PPC::DIR_PWR10:

15414

case PPC::DIR_PWR_FUTURE: {

15415

if (!ML)

15416

break;

15417

15418

if (!DisableInnermostLoopAlign32) {

15419

// If the nested loop is an innermost loop, prefer to a 32-byte alignment,

15420

// so that we can decrease cache misses and branch-prediction misses.

15421

// Actual alignment of the loop will depend on the hotness check and other

15422

// logic in alignBlocks.

15423

if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())

15424

return Align(32);

15425

}

15426

15427

const PPCInstrInfo *TII = Subtarget.getInstrInfo();

15428

15429

// For small loops (between 5 and 8 instructions), align to a 32-byte

15430

// boundary so that the entire loop fits in one instruction-cache line.

15431

uint64_t LoopSize = 0;

15432

for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)

15433

for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {

15434

LoopSize += TII->getInstSizeInBytes(*J);

15435

if (LoopSize > 32)

15436

break;

15437

}

15438

15439

if (LoopSize > 16 && LoopSize <= 32)

15440

return Align(32);

15441

15442

break;

15443

}

15444

}

15445

15446

return TargetLowering::getPrefLoopAlignment(ML);

15447

}

15448

15449

/// getConstraintType - Given a constraint, return the type of

15450

/// constraint it is for this target.

15451

PPCTargetLowering::ConstraintType

15452

PPCTargetLowering::getConstraintType(StringRef Constraint) const {

15453

if (Constraint.size() == 1) {

15454

switch (Constraint[0]) {

15455

default: break;

15456

case 'b':

15457

case 'r':

15458

case 'f':

15459

case 'd':

15460

case 'v':

15461

case 'y':

15462

return C_RegisterClass;

15463

case 'Z':

15464

// FIXME: While Z does indicate a memory constraint, it specifically

15465

// indicates an r+r address (used in conjunction with the 'y' modifier

15466

// in the replacement string). Currently, we're forcing the base

15467

// register to be r0 in the asm printer (which is interpreted as zero)

15468

// and forming the complete address in the second register. This is

15469

// suboptimal.

15470

return C_Memory;

15471

}

15472

} else if (Constraint == "wc") { // individual CR bits.

15473

return C_RegisterClass;

15474

} else if (Constraint == "wa" || Constraint == "wd" ||

15475

Constraint == "wf" || Constraint == "ws" ||

15476

Constraint == "wi" || Constraint == "ww") {

15477

return C_RegisterClass; // VSX registers.

15478

}

15479

return TargetLowering::getConstraintType(Constraint);

15480

}

15481

15482

/// Examine constraint type and operand type and determine a weight value.

15483

/// This object must already have been set up with the operand type

15484

/// and the current alternative constraint selected.

15485

TargetLowering::ConstraintWeight

15486

PPCTargetLowering::getSingleConstraintMatchWeight(

15487

AsmOperandInfo &info, const char *constraint) const {

15488

ConstraintWeight weight = CW_Invalid;

15489

Value *CallOperandVal = info.CallOperandVal;

15490

// If we don't have a value, we can't do a match,

15491

// but allow it at the lowest weight.

15492

if (!CallOperandVal)

15493

return CW_Default;

15494

Type *type = CallOperandVal->getType();

15495

15496

// Look at the constraint type.

15497

if (StringRef(constraint) == "wc" && type->isIntegerTy(1))

15498

return CW_Register; // an individual CR bit.

15499

else if ((StringRef(constraint) == "wa" ||

15500

StringRef(constraint) == "wd" ||

15501

StringRef(constraint) == "wf") &&

15502

type->isVectorTy())

15503

return CW_Register;

15504

else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))

15505

return CW_Register; // just hold 64-bit integers data.

15506

else if (StringRef(constraint) == "ws" && type->isDoubleTy())

15507

return CW_Register;

15508

else if (StringRef(constraint) == "ww" && type->isFloatTy())

15509

return CW_Register;

15510

15511

switch (*constraint) {

15512

default:

15513

weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);

15514

break;

15515

case 'b':

15516

if (type->isIntegerTy())

15517

weight = CW_Register;

15518

break;

15519

case 'f':

15520

if (type->isFloatTy())

15521

weight = CW_Register;

15522

break;

15523

case 'd':

15524

if (type->isDoubleTy())

15525

weight = CW_Register;

15526

break;

15527

case 'v':

15528

if (type->isVectorTy())

15529

weight = CW_Register;

15530

break;

15531

case 'y':

15532

weight = CW_Register;

15533

break;

15534

case 'Z':

15535

weight = CW_Memory;

15536

break;

15537

}

15538

return weight;

15539

}

15540

15541

std::pair<unsigned, const TargetRegisterClass *>

15542

PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

15543

StringRef Constraint,

15544

MVT VT) const {

15545

if (Constraint.size() == 1) {

15546

// GCC RS6000 Constraint Letters

15547

switch (Constraint[0]) {

15548

case 'b': // R1-R31

15549

if (VT == MVT::i64 && Subtarget.isPPC64())

15550

return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);

15551

return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);

15552

case 'r': // R0-R31

15553

if (VT == MVT::i64 && Subtarget.isPPC64())

15554

return std::make_pair(0U, &PPC::G8RCRegClass);

15555

return std::make_pair(0U, &PPC::GPRCRegClass);

15556

// 'd' and 'f' constraints are both defined to be "the floating point

15557

// registers", where one is for 32-bit and the other for 64-bit. We don't

15558

// really care overly much here so just give them all the same reg classes.

15559

case 'd':

15560

case 'f':

15561

if (Subtarget.hasSPE()) {

15562

if (VT == MVT::f32 || VT == MVT::i32)

15563

return std::make_pair(0U, &PPC::GPRCRegClass);

15564

if (VT == MVT::f64 || VT == MVT::i64)

15565

return std::make_pair(0U, &PPC::SPERCRegClass);

15566

} else {

15567

if (VT == MVT::f32 || VT == MVT::i32)

15568

return std::make_pair(0U, &PPC::F4RCRegClass);

15569

if (VT == MVT::f64 || VT == MVT::i64)

15570

return std::make_pair(0U, &PPC::F8RCRegClass);

15571

}

15572

break;

15573

case 'v':

15574

if (Subtarget.hasAltivec())

15575

return std::make_pair(0U, &PPC::VRRCRegClass);

15576

break;

15577

case 'y': // crrc

15578

return std::make_pair(0U, &PPC::CRRCRegClass);

15579

}

15580

} else if (Constraint == "wc" && Subtarget.useCRBits()) {

15581

// An individual CR bit.

15582

return std::make_pair(0U, &PPC::CRBITRCRegClass);

15583

} else if ((Constraint == "wa" || Constraint == "wd" ||

15584

Constraint == "wf" || Constraint == "wi") &&

15585

Subtarget.hasVSX()) {

15586

return std::make_pair(0U, &PPC::VSRCRegClass);

15587

} else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {

15588

if (VT == MVT::f32 && Subtarget.hasP8Vector())

15589

return std::make_pair(0U, &PPC::VSSRCRegClass);

15590

else

15591

return std::make_pair(0U, &PPC::VSFRCRegClass);

15592

}

15593

15594

// If we name a VSX register, we can't defer to the base class because it

15595

// will not recognize the correct register (their names will be VSL{0-31}

15596

// and V{0-31} so they won't match). So we match them here.

15597

if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {

15598

int VSNum = atoi(Constraint.data() + 3);

15599

assert(VSNum >= 0 && VSNum <= 63 &&((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15600, __PRETTY_FUNCTION__))

15600

"Attempted to access a vsr out of range")((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15600, __PRETTY_FUNCTION__));

15601

if (VSNum < 32)

15602

return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);

15603

return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);

15604

}

15605

std::pair<unsigned, const TargetRegisterClass *> R =

15606

TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

15607

15608

// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers

15609

// (which we call X[0-9]+). If a 64-bit value has been requested, and a

15610

// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent

15611

// register.

15612

// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use

15613

// the AsmName field from *RegisterInfo.td, then this would not be necessary.

15614

if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&

15615

PPC::GPRCRegClass.contains(R.first))

15616

return std::make_pair(TRI->getMatchingSuperReg(R.first,

15617

PPC::sub_32, &PPC::G8RCRegClass),

15618

&PPC::G8RCRegClass);

15619

15620

// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.

15621

if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {

15622

R.first = PPC::CR0;

15623

R.second = &PPC::CRRCRegClass;

15624

}

15625

15626

return R;

15627

}

15628

15629

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

15630

/// vector. If it is invalid, don't add anything to Ops.

15631

void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

15632

std::string &Constraint,

15633

std::vector<SDValue>&Ops,

15634

SelectionDAG &DAG) const {

15635

SDValue Result;

15636

15637

// Only support length 1 constraints.

15638

if (Constraint.length() > 1) return;

15639

15640

char Letter = Constraint[0];

15641

switch (Letter) {

15642

default: break;

15643

case 'I':

15644

case 'J':

15645

case 'K':

15646

case 'L':

15647

case 'M':

15648

case 'N':

15649

case 'O':

15650

case 'P': {

15651

ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);

15652

if (!CST) return; // Must be an immediate to match.

15653

SDLoc dl(Op);

15654

int64_t Value = CST->getSExtValue();

15655

EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative

15656

// numbers are printed as such.

15657

switch (Letter) {

15658

default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15658);

15659

case 'I': // "I" is a signed 16-bit constant.

15660

if (isInt<16>(Value))

15661

Result = DAG.getTargetConstant(Value, dl, TCVT);

15662

break;

15663

case 'J': // "J" is a constant with only the high-order 16 bits nonzero.

15664

if (isShiftedUInt<16, 16>(Value))

15665

Result = DAG.getTargetConstant(Value, dl, TCVT);

15666

break;

15667

case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.

15668

if (isShiftedInt<16, 16>(Value))

15669

Result = DAG.getTargetConstant(Value, dl, TCVT);

15670

break;

15671

case 'K': // "K" is a constant with only the low-order 16 bits nonzero.

15672

if (isUInt<16>(Value))

15673

Result = DAG.getTargetConstant(Value, dl, TCVT);

15674

break;

15675

case 'M': // "M" is a constant that is greater than 31.

15676

if (Value > 31)

15677

Result = DAG.getTargetConstant(Value, dl, TCVT);

15678

break;

15679

case 'N': // "N" is a positive constant that is an exact power of two.

15680

if (Value > 0 && isPowerOf2_64(Value))

15681

Result = DAG.getTargetConstant(Value, dl, TCVT);

15682

break;

15683

case 'O': // "O" is the constant zero.

15684

if (Value == 0)

15685

Result = DAG.getTargetConstant(Value, dl, TCVT);

15686

break;

15687

case 'P': // "P" is a constant whose negation is a signed 16-bit constant.

15688

if (isInt<16>(-Value))

15689

Result = DAG.getTargetConstant(Value, dl, TCVT);

15690

break;

15691

}

15692

break;

15693

}

15694

}

15695

15696

if (Result.getNode()) {

15697

Ops.push_back(Result);

15698

return;

15699

}

15700

15701

// Handle standard constraint letters.

15702

TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

15703

}

15704

15705

// isLegalAddressingMode - Return true if the addressing mode represented

15706

// by AM is legal for this target, for a load/store of the specified type.

15707

bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,

15708

const AddrMode &AM, Type *Ty,

15709

unsigned AS,

15710

Instruction *I) const {

15711

// Vector type r+i form is supported since power9 as DQ form. We don't check

15712

// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,

15713

// imm form is preferred and the offset can be adjusted to use imm form later

15714

// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and

15715

// max offset to check legal addressing mode, we should be a little aggressive

15716

// to contain other offsets for that LSRUse.

15717

if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())

15718

return false;

15719

15720

// PPC allows a sign-extended 16-bit immediate field.

15721

if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)

15722

return false;

15723

15724

// No global is ever allowed as a base.

15725

if (AM.BaseGV)

15726

return false;

15727

15728

// PPC only support r+r,

15729

switch (AM.Scale) {

15730

case 0: // "r+i" or just "i", depending on HasBaseReg.

15731

break;

15732

case 1:

15733

if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.

15734

return false;

15735

// Otherwise we have r+r or r+i.

15736

break;

15737

case 2:

15738

if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.

15739

return false;

15740

// Allow 2*r as r+r.

15741

break;

15742

default:

15743

// No other scales are supported.

15744

return false;

15745

}

15746

15747

return true;

15748

}

15749

15750

SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,

15751

SelectionDAG &DAG) const {

15752

MachineFunction &MF = DAG.getMachineFunction();

15753

MachineFrameInfo &MFI = MF.getFrameInfo();

15754

MFI.setReturnAddressIsTaken(true);

15755

15756

if (verifyReturnAddressArgumentIsConstant(Op, DAG))

15757

return SDValue();

15758

15759

SDLoc dl(Op);

15760

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

15761

15762

// Make sure the function does not optimize away the store of the RA to

15763

// the stack.

15764

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

15765

FuncInfo->setLRStoreRequired();

15766

bool isPPC64 = Subtarget.isPPC64();

15767

auto PtrVT = getPointerTy(MF.getDataLayout());

15768

15769

if (Depth > 0) {

15770

SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);

15771

SDValue Offset =

15772

DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,

15773

isPPC64 ? MVT::i64 : MVT::i32);

15774

return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),

15775

DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),

15776

MachinePointerInfo());

15777

}

15778

15779

// Just load the return address off the stack.

15780

SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);

15781

return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,

15782

MachinePointerInfo());

15783

}

15784

15785

SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,

15786

SelectionDAG &DAG) const {

15787

SDLoc dl(Op);

15788

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

15789

15790

MachineFunction &MF = DAG.getMachineFunction();

15791

MachineFrameInfo &MFI = MF.getFrameInfo();

15792

MFI.setFrameAddressIsTaken(true);

15793

15794

EVT PtrVT = getPointerTy(MF.getDataLayout());

15795

bool isPPC64 = PtrVT == MVT::i64;

15796

15797

// Naked functions never have a frame pointer, and so we use r1. For all

15798

// other functions, this decision must be delayed until during PEI.

15799

unsigned FrameReg;

15800

if (MF.getFunction().hasFnAttribute(Attribute::Naked))

15801

FrameReg = isPPC64 ? PPC::X1 : PPC::R1;

15802

else

15803

FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;

15804

15805

SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,

15806

PtrVT);

15807

while (Depth--)

15808

FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),

15809

FrameAddr, MachinePointerInfo());

15810

return FrameAddr;

15811

}

15812

15813

// FIXME? Maybe this could be a TableGen attribute on some registers and

15814

// this table could be generated automatically from RegInfo.

15815

Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,

15816

const MachineFunction &MF) const {

15817

bool isPPC64 = Subtarget.isPPC64();

15818

15819

bool is64Bit = isPPC64 && VT == LLT::scalar(64);

15820

if (!is64Bit && VT != LLT::scalar(32))

15821

report_fatal_error("Invalid register global variable type");

15822

15823

Register Reg = StringSwitch<Register>(RegName)

15824

.Case("r1", is64Bit ? PPC::X1 : PPC::R1)

15825

.Case("r2", isPPC64 ? Register() : PPC::R2)

15826

.Case("r13", (is64Bit ? PPC::X13 : PPC::R13))

15827

.Default(Register());

15828

15829

if (Reg)

15830

return Reg;

15831

report_fatal_error("Invalid register name global variable");

15832

}

15833

15834

bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {

15835

// 32-bit SVR4 ABI access everything as got-indirect.

15836

if (Subtarget.is32BitELFABI())

15837

return true;

15838

15839

// AIX accesses everything indirectly through the TOC, which is similar to

15840

// the GOT.

15841

if (Subtarget.isAIXABI())

15842

return true;

15843

15844

CodeModel::Model CModel = getTargetMachine().getCodeModel();

15845

// If it is small or large code model, module locals are accessed

15846

// indirectly by loading their address from .toc/.got.

15847

if (CModel == CodeModel::Small || CModel == CodeModel::Large)

15848

return true;

15849

15850

// JumpTable and BlockAddress are accessed as got-indirect.

15851

if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))

15852

return true;

15853

15854

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))

15855

return Subtarget.isGVIndirectSymbol(G->getGlobal());

15856

15857

return false;

15858

}

15859

15860

bool

15861

PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

15862

// The PowerPC target isn't yet aware of offsets.

15863

return false;

15864

}

15865

15866

bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

15867

const CallInst &I,

15868

MachineFunction &MF,

15869

unsigned Intrinsic) const {

15870

switch (Intrinsic) {

15871

case Intrinsic::ppc_altivec_lvx:

15872

case Intrinsic::ppc_altivec_lvxl:

15873

case Intrinsic::ppc_altivec_lvebx:

15874

case Intrinsic::ppc_altivec_lvehx:

15875

case Intrinsic::ppc_altivec_lvewx:

15876

case Intrinsic::ppc_vsx_lxvd2x:

15877

case Intrinsic::ppc_vsx_lxvw4x:

15878

case Intrinsic::ppc_vsx_lxvd2x_be:

15879

case Intrinsic::ppc_vsx_lxvw4x_be:

15880

case Intrinsic::ppc_vsx_lxvl:

15881

case Intrinsic::ppc_vsx_lxvll: {

15882

EVT VT;

15883

switch (Intrinsic) {

15884

case Intrinsic::ppc_altivec_lvebx:

15885

VT = MVT::i8;

15886

break;

15887

case Intrinsic::ppc_altivec_lvehx:

15888

VT = MVT::i16;

15889

break;

15890

case Intrinsic::ppc_altivec_lvewx:

15891

VT = MVT::i32;

15892

break;

15893

case Intrinsic::ppc_vsx_lxvd2x:

15894

case Intrinsic::ppc_vsx_lxvd2x_be:

15895

VT = MVT::v2f64;

15896

break;

15897

default:

15898

VT = MVT::v4i32;

15899

break;

15900

}

15901

15902

Info.opc = ISD::INTRINSIC_W_CHAIN;

15903

Info.memVT = VT;

15904

Info.ptrVal = I.getArgOperand(0);

15905

Info.offset = -VT.getStoreSize()+1;

15906

Info.size = 2*VT.getStoreSize()-1;

15907

Info.align = Align(1);

15908

Info.flags = MachineMemOperand::MOLoad;

15909

return true;

15910

}

15911

case Intrinsic::ppc_altivec_stvx:

15912

case Intrinsic::ppc_altivec_stvxl:

15913

case Intrinsic::ppc_altivec_stvebx:

15914

case Intrinsic::ppc_altivec_stvehx:

15915

case Intrinsic::ppc_altivec_stvewx:

15916

case Intrinsic::ppc_vsx_stxvd2x:

15917

case Intrinsic::ppc_vsx_stxvw4x:

15918

case Intrinsic::ppc_vsx_stxvd2x_be:

15919

case Intrinsic::ppc_vsx_stxvw4x_be:

15920

case Intrinsic::ppc_vsx_stxvl:

15921

case Intrinsic::ppc_vsx_stxvll: {

15922

EVT VT;

15923

switch (Intrinsic) {

15924

case Intrinsic::ppc_altivec_stvebx:

15925

VT = MVT::i8;

15926

break;

15927

case Intrinsic::ppc_altivec_stvehx:

15928

VT = MVT::i16;

15929

break;

15930

case Intrinsic::ppc_altivec_stvewx:

15931

VT = MVT::i32;

15932

break;

15933

case Intrinsic::ppc_vsx_stxvd2x:

15934

case Intrinsic::ppc_vsx_stxvd2x_be:

15935

VT = MVT::v2f64;

15936

break;

15937

default:

15938

VT = MVT::v4i32;

15939

break;

15940

}

15941

15942

Info.opc = ISD::INTRINSIC_VOID;

15943

Info.memVT = VT;

15944

Info.ptrVal = I.getArgOperand(1);

15945

Info.offset = -VT.getStoreSize()+1;

15946

Info.size = 2*VT.getStoreSize()-1;

15947

Info.align = Align(1);

15948

Info.flags = MachineMemOperand::MOStore;

15949

return true;

15950

}

15951

default:

15952

break;

15953

}

15954

15955

return false;

15956

}

15957

15958

/// It returns EVT::Other if the type should be determined using generic

15959

/// target-independent logic.

15960

EVT PPCTargetLowering::getOptimalMemOpType(

15961

const MemOp &Op, const AttributeList &FuncAttributes) const {

15962

if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {

15963

// We should use Altivec/VSX loads and stores when available. For unaligned

15964

// addresses, unaligned VSX loads are only fast starting with the P8.

15965

if (Subtarget.hasAltivec() && Op.size() >= 16 &&

15966

(Op.isAligned(Align(16)) ||

15967

((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))

15968

return MVT::v4i32;

15969

}

15970

15971

if (Subtarget.isPPC64()) {

15972

return MVT::i64;

15973

}

15974

15975

return MVT::i32;

15976

}

15977

15978

/// Returns true if it is beneficial to convert a load of a constant

15979

/// to just the constant itself.

15980

bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

15981

Type *Ty) const {

15982

assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15982, __PRETTY_FUNCTION__));

15983

15984

unsigned BitSize = Ty->getPrimitiveSizeInBits();

15985

return !(BitSize == 0 || BitSize > 64);

15986

}

15987

15988

bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {

15989

if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())

15990

return false;

15991

unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();

15992

unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();

15993

return NumBits1 == 64 && NumBits2 == 32;

15994

}

15995

15996

bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {

15997

if (!VT1.isInteger() || !VT2.isInteger())

15998

return false;

15999

unsigned NumBits1 = VT1.getSizeInBits();

16000

unsigned NumBits2 = VT2.getSizeInBits();

16001

return NumBits1 == 64 && NumBits2 == 32;

16002

}

16003

16004

bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {

16005

// Generally speaking, zexts are not free, but they are free when they can be

16006

// folded with other operations.

16007

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {

16008

EVT MemVT = LD->getMemoryVT();

16009

if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||

16010

(Subtarget.isPPC64() && MemVT == MVT::i32)) &&

16011

(LD->getExtensionType() == ISD::NON_EXTLOAD ||

16012

LD->getExtensionType() == ISD::ZEXTLOAD))

16013

return true;

16014

}

16015

16016

// FIXME: Add other cases...

16017

// - 32-bit shifts with a zext to i64

16018

// - zext after ctlz, bswap, etc.

16019

// - zext after and by a constant mask

16020

16021

return TargetLowering::isZExtFree(Val, VT2);

16022

}

16023

16024

bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {

16025

assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16026, __PRETTY_FUNCTION__))

16026

"invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16026, __PRETTY_FUNCTION__));

16027

// Extending to float128 is not free.

16028

if (DestVT == MVT::f128)

16029

return false;

16030

return true;

16031

}

16032

16033

bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

16034

return isInt<16>(Imm) || isUInt<16>(Imm);

16035

}

16036

16037

bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {

16038

return isInt<16>(Imm) || isUInt<16>(Imm);

16039

}

16040

16041

bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,

16042

unsigned,

16043

unsigned,

16044

MachineMemOperand::Flags,

16045

bool *Fast) const {

16046

if (DisablePPCUnaligned)

16047

return false;

16048

16049

// PowerPC supports unaligned memory access for simple non-vector types.

16050

// Although accessing unaligned addresses is not as efficient as accessing

16051

// aligned addresses, it is generally more efficient than manual expansion,

16052

// and generally only traps for software emulation when crossing page

16053

// boundaries.

16054

16055

if (!VT.isSimple())

16056

return false;

16057

16058

if (VT.isFloatingPoint() && !VT.isVector() &&

16059

!Subtarget.allowsUnalignedFPAccess())

16060

return false;

16061

16062

if (VT.getSimpleVT().isVector()) {

16063

if (Subtarget.hasVSX()) {

16064

if (VT != MVT::v2f64 && VT != MVT::v2i64 &&

16065

VT != MVT::v4f32 && VT != MVT::v4i32)

16066

return false;

16067

} else {

16068

return false;

16069

}

16070

}

16071

16072

if (VT == MVT::ppcf128)

16073

return false;

16074

16075

if (Fast)

16076

*Fast = true;

16077

16078

return true;

16079

}

16080

16081

bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,

16082

SDValue C) const {

16083

// Check integral scalar types.

16084

if (!VT.isScalarInteger())

16085

return false;

16086

if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {

16087

if (!ConstNode->getAPIntValue().isSignedIntN(64))

16088

return false;

16089

// This transformation will generate >= 2 operations. But the following

16090

// cases will generate <= 2 instructions during ISEL. So exclude them.

16091

// 1. If the constant multiplier fits 16 bits, it can be handled by one

16092

// HW instruction, ie. MULLI

16093

// 2. If the multiplier after shifted fits 16 bits, an extra shift

16094

// instruction is needed than case 1, ie. MULLI and RLDICR

16095

int64_t Imm = ConstNode->getSExtValue();

16096

unsigned Shift = countTrailingZeros<uint64_t>(Imm);

16097

Imm >>= Shift;

16098

if (isInt<16>(Imm))

16099

return false;

16100

uint64_t UImm = static_cast<uint64_t>(Imm);

16101

if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||

16102

isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))

16103

return true;

16104

}

16105

return false;

16106

}

16107

16108

bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,

16109

EVT VT) const {

16110

return isFMAFasterThanFMulAndFAdd(

16111

MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));

16112

}

16113

16114

bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,

16115

Type *Ty) const {

16116

switch (Ty->getScalarType()->getTypeID()) {

16117

case Type::FloatTyID:

16118

case Type::DoubleTyID:

16119

return true;

16120

case Type::FP128TyID:

16121

return Subtarget.hasP9Vector();

16122

default:

16123

return false;

16124

}

16125

}

16126

16127

// FIXME: add more patterns which are not profitable to hoist.

16128

bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {

16129

if (!I->hasOneUse())

16130

return true;

16131

16132

Instruction *User = I->user_back();

16133

assert(User && "A single use instruction with no uses.")((User && "A single use instruction with no uses.") ?
static_cast<void> (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16133, __PRETTY_FUNCTION__));

16134

16135

switch (I->getOpcode()) {

16136

case Instruction::FMul: {

16137

// Don't break FMA, PowerPC prefers FMA.

16138

if (User->getOpcode() != Instruction::FSub &&

16139

User->getOpcode() != Instruction::FAdd)

16140

return true;

16141

16142

const TargetOptions &Options = getTargetMachine().Options;

16143

const Function *F = I->getFunction();

16144

const DataLayout &DL = F->getParent()->getDataLayout();

16145

Type *Ty = User->getOperand(0)->getType();

16146

16147

return !(

16148

isFMAFasterThanFMulAndFAdd(*F, Ty) &&

16149

isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&

16150

(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));

16151

}

16152

case Instruction::Load: {

16153

// Don't break "store (load float*)" pattern, this pattern will be combined

16154

// to "store (load int32)" in later InstCombine pass. See function

16155

// combineLoadToOperationType. On PowerPC, loading a float point takes more

16156

// cycles than loading a 32 bit integer.

16157

LoadInst *LI = cast<LoadInst>(I);

16158

// For the loads that combineLoadToOperationType does nothing, like

16159

// ordered load, it should be profitable to hoist them.

16160

// For swifterror load, it can only be used for pointer to pointer type, so

16161

// later type check should get rid of this case.

16162

if (!LI->isUnordered())

16163

return true;

16164

16165

if (User->getOpcode() != Instruction::Store)

16166

return true;

16167

16168

if (I->getType()->getTypeID() != Type::FloatTyID)

16169

return true;

16170

16171

return false;

16172

}

16173

default:

16174

return true;

16175

}

16176

return true;

16177

}

16178

16179

const MCPhysReg *

16180

PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {

16181

// LR is a callee-save register, but we must treat it as clobbered by any call

16182

// site. Hence we include LR in the scratch registers, which are in turn added

16183

// as implicit-defs for stackmaps and patchpoints. The same reasoning applies

16184

// to CTR, which is used by any indirect call.

16185

static const MCPhysReg ScratchRegs[] = {

16186

PPC::X12, PPC::LR8, PPC::CTR8, 0

16187

};

16188

16189

return ScratchRegs;

16190

}

16191

16192

Register PPCTargetLowering::getExceptionPointerRegister(

16193

const Constant *PersonalityFn) const {

16194

return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;

16195

}

16196

16197

Register PPCTargetLowering::getExceptionSelectorRegister(

16198

const Constant *PersonalityFn) const {

16199

return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;

16200

}

16201

16202

bool

16203

PPCTargetLowering::shouldExpandBuildVectorWithShuffles(

16204

EVT VT , unsigned DefinedValues) const {

16205

if (VT == MVT::v2i64)

16206

return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves

16207

16208

if (Subtarget.hasVSX())

16209

return true;

16210

16211

return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);

16212

}

16213

16214

Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {

16215

if (DisableILPPref || Subtarget.enableMachineScheduler())

16216

return TargetLowering::getSchedulingPreference(N);

16217

16218

return Sched::ILP;

16219

}

16220

16221

// Create a fast isel object.

16222

FastISel *

16223

PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,

16224

const TargetLibraryInfo *LibInfo) const {

16225

return PPC::createFastISel(FuncInfo, LibInfo);

16226

}

16227

16228

// 'Inverted' means the FMA opcode after negating one multiplicand.

16229

// For example, (fma -a b c) = (fnmsub a b c)

16230

static unsigned invertFMAOpcode(unsigned Opc) {

16231

switch (Opc) {

16232

default:

16233

llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16233);

16234

case ISD::FMA:

16235

return PPCISD::FNMSUB;

16236

case PPCISD::FNMSUB:

16237

return ISD::FMA;

16238

}

16239

}

16240

16241

SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,

16242

bool LegalOps, bool OptForSize,

16243

NegatibleCost &Cost,

16244

unsigned Depth) const {

16245

if (Depth > SelectionDAG::MaxRecursionDepth)

16246

return SDValue();

16247

16248

unsigned Opc = Op.getOpcode();

16249

EVT VT = Op.getValueType();

16250

SDNodeFlags Flags = Op.getNode()->getFlags();

16251

16252

switch (Opc) {

16253

case PPCISD::FNMSUB:

16254

if (!Op.hasOneUse() || !isTypeLegal(VT))

16255

break;

16256

16257

const TargetOptions &Options = getTargetMachine().Options;

16258

SDValue N0 = Op.getOperand(0);

16259

SDValue N1 = Op.getOperand(1);

16260

SDValue N2 = Op.getOperand(2);

16261

SDLoc Loc(Op);

16262

16263

NegatibleCost N2Cost = NegatibleCost::Expensive;

16264

SDValue NegN2 =

16265

getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);

16266

16267

if (!NegN2)

16268

return SDValue();

16269

16270

// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))

16271

// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))

16272

// These transformations may change sign of zeroes. For example,

16273

// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.

16274

if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {

16275

// Try and choose the cheaper one to negate.

16276

NegatibleCost N0Cost = NegatibleCost::Expensive;

16277

SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,

16278

N0Cost, Depth + 1);

16279

16280

NegatibleCost N1Cost = NegatibleCost::Expensive;

16281

SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,

16282

N1Cost, Depth + 1);

16283

16284

if (NegN0 && N0Cost <= N1Cost) {

16285

Cost = std::min(N0Cost, N2Cost);

16286

return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);

16287

} else if (NegN1) {

16288

Cost = std::min(N1Cost, N2Cost);

16289

return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);

16290

}

16291

}

16292

16293

// (fneg (fnmsub a b c)) => (fma a b (fneg c))

16294

if (isOperationLegal(ISD::FMA, VT)) {

16295

Cost = N2Cost;

16296

return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);

16297

}

16298

16299

break;

16300

}

16301

16302

return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,

16303

Cost, Depth);

16304

}

16305

16306

// Override to enable LOAD_STACK_GUARD lowering on Linux.

16307

bool PPCTargetLowering::useLoadStackGuardNode() const {

16308

if (!Subtarget.isTargetLinux())

16309

return TargetLowering::useLoadStackGuardNode();

16310

return true;

16311

}

16312

16313

// Override to disable global variable loading on Linux.

16314

void PPCTargetLowering::insertSSPDeclarations(Module &M) const {

16315

if (!Subtarget.isTargetLinux())

16316

return TargetLowering::insertSSPDeclarations(M);

16317

}

16318

16319

bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,

16320

bool ForCodeSize) const {

16321

if (!VT.isSimple() || !Subtarget.hasVSX())

16322

return false;

16323

16324

switch(VT.getSimpleVT().SimpleTy) {

16325

default:

16326

// For FP types that are currently not supported by PPC backend, return

16327

// false. Examples: f16, f80.

16328

return false;

16329

case MVT::f32:

16330

case MVT::f64:

16331

if (Subtarget.hasPrefixInstrs()) {

16332

// With prefixed instructions, we can materialize anything that can be

16333

// represented with a 32-bit immediate, not just positive zero.

16334

APFloat APFloatOfImm = Imm;

16335

return convertToNonDenormSingle(APFloatOfImm);

16336

}

16337

LLVM_FALLTHROUGH[[gnu::fallthrough]];

16338

case MVT::ppcf128:

16339

return Imm.isPosZero();

16340

}

16341

}

16342

16343

// For vector shift operation op, fold

16344

// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)

16345

static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,

16346

SelectionDAG &DAG) {

16347

SDValue N0 = N->getOperand(0);

16348

SDValue N1 = N->getOperand(1);

16349

EVT VT = N0.getValueType();

16350

unsigned OpSizeInBits = VT.getScalarSizeInBits();

16351

unsigned Opcode = N->getOpcode();

16352

unsigned TargetOpcode;

16353

16354

switch (Opcode) {

16355

default:

16356

llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16356);

16357

case ISD::SHL:

16358

TargetOpcode = PPCISD::SHL;

16359

break;

16360

case ISD::SRL:

16361

TargetOpcode = PPCISD::SRL;

16362

break;

16363

case ISD::SRA:

16364

TargetOpcode = PPCISD::SRA;

16365

break;

16366

}

16367

16368

if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&

16369

N1->getOpcode() == ISD::AND)

16370

if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))

16371

if (Mask->getZExtValue() == OpSizeInBits - 1)

16372

return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));

16373

16374

return SDValue();

16375

}

16376

16377

SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {

16378

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16379

return Value;

16380

16381

SDValue N0 = N->getOperand(0);

16382

ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));

16383

if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||

16384

N0.getOpcode() != ISD::SIGN_EXTEND ||

16385

N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||

16386

N->getValueType(0) != MVT::i64)

16387

return SDValue();

16388

16389

// We can't save an operation here if the value is already extended, and

16390

// the existing shift is easier to combine.

16391

SDValue ExtsSrc = N0.getOperand(0);

16392

if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&

16393

ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)

16394

return SDValue();

16395

16396

SDLoc DL(N0);

16397

SDValue ShiftBy = SDValue(CN1, 0);

16398

// We want the shift amount to be i32 on the extswli, but the shift could

16399

// have an i64.

16400

if (ShiftBy.getValueType() == MVT::i64)

16401

ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);

16402

16403

return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),

16404

ShiftBy);

16405

}

16406

16407

SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {

16408

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16409

return Value;

16410

16411

return SDValue();

16412

}

16413

16414

SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {

16415

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16416

return Value;

16417

16418

return SDValue();

16419

}

16420

16421

// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))

16422

// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))

16423

// When C is zero, the equation (addi Z, -C) can be simplified to Z

16424

// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types

16425

static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,

16426

const PPCSubtarget &Subtarget) {

16427

if (!Subtarget.isPPC64())

16428

return SDValue();

16429

16430

SDValue LHS = N->getOperand(0);

16431

SDValue RHS = N->getOperand(1);

16432

16433

auto isZextOfCompareWithConstant = [](SDValue Op) {

16434

if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||

16435

Op.getValueType() != MVT::i64)

16436

return false;

16437

16438

SDValue Cmp = Op.getOperand(0);

16439

if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||

16440

Cmp.getOperand(0).getValueType() != MVT::i64)

16441

return false;

16442

16443

if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {

16444

int64_t NegConstant = 0 - Constant->getSExtValue();

16445

// Due to the limitations of the addi instruction,

16446

// -C is required to be [-32768, 32767].

16447

return isInt<16>(NegConstant);

16448

}

16449

16450

return false;

16451

};

16452

16453

bool LHSHasPattern = isZextOfCompareWithConstant(LHS);

16454

bool RHSHasPattern = isZextOfCompareWithConstant(RHS);

16455

16456

// If there is a pattern, canonicalize a zext operand to the RHS.

16457

if (LHSHasPattern && !RHSHasPattern)

16458

std::swap(LHS, RHS);

16459

else if (!LHSHasPattern && !RHSHasPattern)

16460

return SDValue();

16461

16462

SDLoc DL(N);

16463

SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);

16464

SDValue Cmp = RHS.getOperand(0);

16465

SDValue Z = Cmp.getOperand(0);

16466

auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));

16467

16468

assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16468, __PRETTY_FUNCTION__));

16469

int64_t NegConstant = 0 - Constant->getSExtValue();

16470

16471

switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {

16472

default: break;

16473

case ISD::SETNE: {

16474

// when C == 0

16475

// --> addze X, (addic Z, -1).carry

16476

// /

16477

// add X, (zext(setne Z, C))--

16478

// \ when -32768 <= -C <= 32767 && C != 0

16479

// --> addze X, (addic (addi Z, -C), -1).carry

16480

SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,

16481

DAG.getConstant(NegConstant, DL, MVT::i64));

16482

SDValue AddOrZ = NegConstant != 0 ? Add : Z;

16483

SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),

16484

AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));

16485

return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),

16486

SDValue(Addc.getNode(), 1));

16487

}

16488

case ISD::SETEQ: {

16489

// when C == 0

16490

// --> addze X, (subfic Z, 0).carry

16491

// /

16492

// add X, (zext(sete Z, C))--

16493

// \ when -32768 <= -C <= 32767 && C != 0

16494

// --> addze X, (subfic (addi Z, -C), 0).carry

16495

SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,

16496

DAG.getConstant(NegConstant, DL, MVT::i64));

16497

SDValue AddOrZ = NegConstant != 0 ? Add : Z;

16498

SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),

16499

DAG.getConstant(0, DL, MVT::i64), AddOrZ);

16500

return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),

16501

SDValue(Subc.getNode(), 1));

16502

}

16503

}

16504

16505

return SDValue();

16506

}

16507

16508

// Transform

16509

// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to

16510

// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))

16511

// In this case both C1 and C2 must be known constants.

16512

// C1+C2 must fit into a 34 bit signed integer.

16513

static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,

16514

const PPCSubtarget &Subtarget) {

16515

if (!Subtarget.isUsingPCRelativeCalls())

16516

return SDValue();

16517

16518

// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.

16519

// If we find that node try to cast the Global Address and the Constant.

16520

SDValue LHS = N->getOperand(0);

16521

SDValue RHS = N->getOperand(1);

16522

16523

if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)

16524

std::swap(LHS, RHS);

16525

16526

if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)

16527

return SDValue();

16528

16529

// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.

16530

GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));

16531

ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);

16532

16533

// Check that both casts succeeded.

16534

if (!GSDN || !ConstNode)

16535

return SDValue();

16536

16537

int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();

16538

SDLoc DL(GSDN);

16539

16540

// The signed int offset needs to fit in 34 bits.

16541

if (!isInt<34>(NewOffset))

16542

return SDValue();

16543

16544

// The new global address is a copy of the old global address except

16545

// that it has the updated Offset.

16546

SDValue GA =

16547

DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),

16548

NewOffset, GSDN->getTargetFlags());

16549

SDValue MatPCRel =

16550

DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);

16551

return MatPCRel;

16552

}

16553

16554

SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {

16555

if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))

16556

return Value;

16557

16558

if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))

16559

return Value;

16560

16561

return SDValue();

16562

}

16563

16564

// Detect TRUNCATE operations on bitcasts of float128 values.

16565

// What we are looking for here is the situtation where we extract a subset

16566

// of bits from a 128 bit float.

16567

// This can be of two forms:

16568

// 1) BITCAST of f128 feeding TRUNCATE

16569

// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE

16570

// The reason this is required is because we do not have a legal i128 type

16571

// and so we want to prevent having to store the f128 and then reload part

16572

// of it.

16573

SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,

16574

DAGCombinerInfo &DCI) const {

16575

// If we are using CRBits then try that first.

16576

if (Subtarget.useCRBits()) {

16577

// Check if CRBits did anything and return that if it did.

16578

if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))

16579

return CRTruncValue;

16580

}

16581

16582

SDLoc dl(N);

16583

SDValue Op0 = N->getOperand(0);

16584

16585

// fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)

16586

if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {

16587

EVT VT = N->getValueType(0);

16588

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16589

return SDValue();

16590

SDValue Sub = Op0.getOperand(0);

16591

if (Sub.getOpcode() == ISD::SUB) {

16592

SDValue SubOp0 = Sub.getOperand(0);

16593

SDValue SubOp1 = Sub.getOperand(1);

16594

if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&

16595

(SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {

16596

return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),

16597

SubOp1.getOperand(0),

16598

DCI.DAG.getTargetConstant(0, dl, MVT::i32));

16599

}

16600

}

16601

}

16602

16603

// Looking for a truncate of i128 to i64.

16604

if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)

16605

return SDValue();

16606

16607

int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;

16608

16609

// SRL feeding TRUNCATE.

16610

if (Op0.getOpcode() == ISD::SRL) {

16611

ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));

16612

// The right shift has to be by 64 bits.

16613

if (!ConstNode || ConstNode->getZExtValue() != 64)

16614

return SDValue();

16615

16616

// Switch the element number to extract.

16617

EltToExtract = EltToExtract ? 0 : 1;

16618

// Update Op0 past the SRL.

16619

Op0 = Op0.getOperand(0);

16620

}

16621

16622

// BITCAST feeding a TRUNCATE possibly via SRL.

16623

if (Op0.getOpcode() == ISD::BITCAST &&

16624

Op0.getValueType() == MVT::i128 &&

16625

Op0.getOperand(0).getValueType() == MVT::f128) {

16626

SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));

16627

return DCI.DAG.getNode(

16628

ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,

16629

DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));

16630

}

16631

return SDValue();

16632

}

16633

16634

SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {

16635

SelectionDAG &DAG = DCI.DAG;

16636

16637

ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));

16638

if (!ConstOpOrElement)

16639

return SDValue();

16640

16641

// An imul is usually smaller than the alternative sequence for legal type.

16642

if (DAG.getMachineFunction().getFunction().hasMinSize() &&

16643

isOperationLegal(ISD::MUL, N->getValueType(0)))

16644

return SDValue();

16645

16646

auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {

16647

switch (this->Subtarget.getCPUDirective()) {

16648

default:

16649

// TODO: enhance the condition for subtarget before pwr8

16650

return false;

16651

case PPC::DIR_PWR8:

16652

// type mul add shl

16653

// scalar 4 1 1

16654

// vector 7 2 2

16655

return true;

16656

case PPC::DIR_PWR9:

16657

case PPC::DIR_PWR10:

16658

case PPC::DIR_PWR_FUTURE:

16659

// type mul add shl

16660

// scalar 5 2 2

16661

// vector 7 2 2

16662

16663

// The cycle RATIO of related operations are showed as a table above.

16664

// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both

16665

// scalar and vector type. For 2 instrs patterns, add/sub + shl

16666

// are 4, it is always profitable; but for 3 instrs patterns

16667

// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.

16668

// So we should only do it for vector type.

16669

return IsAddOne && IsNeg ? VT.isVector() : true;

16670

}

16671

};

16672

16673

EVT VT = N->getValueType(0);

16674

SDLoc DL(N);

16675

16676

const APInt &MulAmt = ConstOpOrElement->getAPIntValue();

16677

bool IsNeg = MulAmt.isNegative();

16678

APInt MulAmtAbs = MulAmt.abs();

16679

16680

if ((MulAmtAbs - 1).isPowerOf2()) {

16681

// (mul x, 2^N + 1) => (add (shl x, N), x)

16682

// (mul x, -(2^N + 1)) => -(add (shl x, N), x)

16683

16684

if (!IsProfitable(IsNeg, true, VT))

16685

return SDValue();

16686

16687

SDValue Op0 = N->getOperand(0);

16688

SDValue Op1 =

16689

DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),

16690

DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));

16691

SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);

16692

16693

if (!IsNeg)

16694

return Res;

16695

16696

return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);

16697

} else if ((MulAmtAbs + 1).isPowerOf2()) {

16698

// (mul x, 2^N - 1) => (sub (shl x, N), x)

16699

// (mul x, -(2^N - 1)) => (sub x, (shl x, N))

16700

16701

if (!IsProfitable(IsNeg, false, VT))

16702

return SDValue();

16703

16704

SDValue Op0 = N->getOperand(0);

16705

SDValue Op1 =

16706

DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),

16707

DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));

16708

16709

if (!IsNeg)

16710

return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);

16711

else

16712

return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);

16713

16714

} else {

16715

return SDValue();

16716

}

16717

}

16718

16719

// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this

16720

// in combiner since we need to check SD flags and other subtarget features.

16721

SDValue PPCTargetLowering::combineFMALike(SDNode *N,

16722

DAGCombinerInfo &DCI) const {

16723

SDValue N0 = N->getOperand(0);

16724

SDValue N1 = N->getOperand(1);

16725

SDValue N2 = N->getOperand(2);

16726

SDNodeFlags Flags = N->getFlags();

16727

EVT VT = N->getValueType(0);

16728

SelectionDAG &DAG = DCI.DAG;

16729

const TargetOptions &Options = getTargetMachine().Options;

16730

unsigned Opc = N->getOpcode();

16731

bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();

16732

bool LegalOps = !DCI.isBeforeLegalizeOps();

16733

SDLoc Loc(N);

16734

16735

if (!isOperationLegal(ISD::FMA, VT))

16736

return SDValue();

16737

16738

// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0

16739

// since (fnmsub a b c)=-0 while c-ab=+0.

16740

if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)

16741

return SDValue();

16742

16743

// (fma (fneg a) b c) => (fnmsub a b c)

16744

// (fnmsub (fneg a) b c) => (fma a b c)

16745

if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))

16746

return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);

16747

16748

// (fma a (fneg b) c) => (fnmsub a b c)

16749

// (fnmsub a (fneg b) c) => (fma a b c)

16750

if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))

16751

return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);

16752

16753

return SDValue();

16754

}

16755

16756

bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {

16757

// Only duplicate to increase tail-calls for the 64bit SysV ABIs.

16758

if (!Subtarget.is64BitELFABI())

16759

return false;

16760

16761

// If not a tail call then no need to proceed.

16762

if (!CI->isTailCall())

16763

return false;

16764

16765

// If sibling calls have been disabled and tail-calls aren't guaranteed

16766

// there is no reason to duplicate.

16767

auto &TM = getTargetMachine();

16768

if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)

16769

return false;

16770

16771

// Can't tail call a function called indirectly, or if it has variadic args.

16772

const Function *Callee = CI->getCalledFunction();

16773

if (!Callee || Callee->isVarArg())

16774

return false;

16775

16776

// Make sure the callee and caller calling conventions are eligible for tco.

16777

const Function *Caller = CI->getParent()->getParent();

16778

if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),

16779

CI->getCallingConv()))

16780

return false;

16781

16782

// If the function is local then we have a good chance at tail-calling it

16783

return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);

16784

}

16785

16786

bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {

16787

if (!Subtarget.hasVSX())

16788

return false;

16789

if (Subtarget.hasP9Vector() && VT == MVT::f128)

16790

return true;

16791

return VT == MVT::f32 || VT == MVT::f64 ||

16792

VT == MVT::v4f32 || VT == MVT::v2f64;

16793

}

16794

16795

bool PPCTargetLowering::

16796

isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {

16797

const Value *Mask = AndI.getOperand(1);

16798

// If the mask is suitable for andi. or andis. we should sink the and.

16799

if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {

16800

// Can't handle constants wider than 64-bits.

16801

if (CI->getBitWidth() > 64)

16802

return false;

16803

int64_t ConstVal = CI->getZExtValue();

16804

return isUInt<16>(ConstVal) ||

16805

(isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));

16806

}

16807

16808

// For non-constant masks, we can always use the record-form and.

16809

return true;

16810

}

16811

16812

// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)

16813

// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)

16814

// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)

16815

// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)

16816

// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32

16817

SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {

16818

assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__));

16819

assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16820, __PRETTY_FUNCTION__))

16820

"Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16820, __PRETTY_FUNCTION__));

16821

EVT VT = N->getValueType(0);

16822

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16823

return SDValue();

16824

16825

SelectionDAG &DAG = DCI.DAG;

16826

SDLoc dl(N);

16827

if (N->getOperand(0).getOpcode() == ISD::SUB) {

16828

// Even for signed integers, if it's known to be positive (as signed

16829

// integer) due to zero-extended inputs.

16830

unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();

16831

unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();

16832

if ((SubOpcd0 == ISD::ZERO_EXTEND ||

16833

SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&

16834

(SubOpcd1 == ISD::ZERO_EXTEND ||

16835

SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {

16836

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),

16837

N->getOperand(0)->getOperand(0),

16838

N->getOperand(0)->getOperand(1),

16839

DAG.getTargetConstant(0, dl, MVT::i32));

16840

}

16841

16842

// For type v4i32, it can be optimized with xvnegsp + vabsduw

16843

if (N->getOperand(0).getValueType() == MVT::v4i32 &&

16844

N->getOperand(0).hasOneUse()) {

16845

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),

16846

N->getOperand(0)->getOperand(0),

16847

N->getOperand(0)->getOperand(1),

16848

DAG.getTargetConstant(1, dl, MVT::i32));

16849

}

16850

}

16851

16852

return SDValue();

16853

}

16854

16855

// For type v4i32/v8ii16/v16i8, transform

16856

// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)

16857

// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)

16858

// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)

16859

// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)

16860

SDValue PPCTargetLowering::combineVSelect(SDNode *N,

16861

DAGCombinerInfo &DCI) const {

16862

assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__));

16863

assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16864, __PRETTY_FUNCTION__))

16864

"Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16864, __PRETTY_FUNCTION__));

16865

16866

SelectionDAG &DAG = DCI.DAG;

16867

SDLoc dl(N);

16868

SDValue Cond = N->getOperand(0);

16869

SDValue TrueOpnd = N->getOperand(1);

16870

SDValue FalseOpnd = N->getOperand(2);

16871

EVT VT = N->getOperand(1).getValueType();

16872

16873

if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||

16874

FalseOpnd.getOpcode() != ISD::SUB)

16875

return SDValue();

16876

16877

// ABSD only available for type v4i32/v8i16/v16i8

16878

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16879

return SDValue();

16880

16881

// At least to save one more dependent computation

16882

if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))

16883

return SDValue();

16884

16885

ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

16886

16887

// Can only handle unsigned comparison here

16888

switch (CC) {

16889

default:

16890

return SDValue();

16891

case ISD::SETUGT:

16892

case ISD::SETUGE:

16893

break;

16894

case ISD::SETULT:

16895

case ISD::SETULE:

16896

std::swap(TrueOpnd, FalseOpnd);

16897

break;

16898

}

16899

16900

SDValue CmpOpnd1 = Cond.getOperand(0);

16901

SDValue CmpOpnd2 = Cond.getOperand(1);

16902

16903

// SETCC CmpOpnd1 CmpOpnd2 cond

16904

// TrueOpnd = CmpOpnd1 - CmpOpnd2

16905

// FalseOpnd = CmpOpnd2 - CmpOpnd1

16906

if (TrueOpnd.getOperand(0) == CmpOpnd1 &&

16907

TrueOpnd.getOperand(1) == CmpOpnd2 &&

16908

FalseOpnd.getOperand(0) == CmpOpnd2 &&

16909

FalseOpnd.getOperand(1) == CmpOpnd1) {

16910

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),

16911

CmpOpnd1, CmpOpnd2,

16912

DAG.getTargetConstant(0, dl, MVT::i32));

16913

}

16914

16915

return SDValue();

16916

}

File:	llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:	line 9266, column 36 Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Bug Summary

Annotated Source Code