/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

1

//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//

2

//

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6

//

7

//===----------------------------------------------------------------------===//

8

//

9

// This file implements the PPCISelLowering class.

10

//

11

//===----------------------------------------------------------------------===//

12

13

#include "PPCISelLowering.h"

14

#include "MCTargetDesc/PPCPredicates.h"

15

#include "PPC.h"

16

#include "PPCCCState.h"

17

#include "PPCCallingConv.h"

18

#include "PPCFrameLowering.h"

19

#include "PPCInstrInfo.h"

20

#include "PPCMachineFunctionInfo.h"

21

#include "PPCPerfectShuffle.h"

22

#include "PPCRegisterInfo.h"

23

#include "PPCSubtarget.h"

24

#include "PPCTargetMachine.h"

25

#include "llvm/ADT/APFloat.h"

26

#include "llvm/ADT/APInt.h"

27

#include "llvm/ADT/ArrayRef.h"

28

#include "llvm/ADT/DenseMap.h"

29

#include "llvm/ADT/None.h"

30

#include "llvm/ADT/STLExtras.h"

31

#include "llvm/ADT/SmallPtrSet.h"

32

#include "llvm/ADT/SmallSet.h"

33

#include "llvm/ADT/SmallVector.h"

34

#include "llvm/ADT/Statistic.h"

35

#include "llvm/ADT/StringRef.h"

36

#include "llvm/ADT/StringSwitch.h"

37

#include "llvm/CodeGen/CallingConvLower.h"

38

#include "llvm/CodeGen/ISDOpcodes.h"

39

#include "llvm/CodeGen/MachineBasicBlock.h"

40

#include "llvm/CodeGen/MachineFrameInfo.h"

41

#include "llvm/CodeGen/MachineFunction.h"

42

#include "llvm/CodeGen/MachineInstr.h"

43

#include "llvm/CodeGen/MachineInstrBuilder.h"

44

#include "llvm/CodeGen/MachineJumpTableInfo.h"

45

#include "llvm/CodeGen/MachineLoopInfo.h"

46

#include "llvm/CodeGen/MachineMemOperand.h"

47

#include "llvm/CodeGen/MachineModuleInfo.h"

48

#include "llvm/CodeGen/MachineOperand.h"

49

#include "llvm/CodeGen/MachineRegisterInfo.h"

50

#include "llvm/CodeGen/RuntimeLibcalls.h"

51

#include "llvm/CodeGen/SelectionDAG.h"

52

#include "llvm/CodeGen/SelectionDAGNodes.h"

53

#include "llvm/CodeGen/TargetInstrInfo.h"

54

#include "llvm/CodeGen/TargetLowering.h"

55

#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

56

#include "llvm/CodeGen/TargetRegisterInfo.h"

57

#include "llvm/CodeGen/ValueTypes.h"

58

#include "llvm/IR/CallingConv.h"

59

#include "llvm/IR/Constant.h"

60

#include "llvm/IR/Constants.h"

61

#include "llvm/IR/DataLayout.h"

62

#include "llvm/IR/DebugLoc.h"

63

#include "llvm/IR/DerivedTypes.h"

64

#include "llvm/IR/Function.h"

65

#include "llvm/IR/GlobalValue.h"

66

#include "llvm/IR/IRBuilder.h"

67

#include "llvm/IR/Instructions.h"

68

#include "llvm/IR/Intrinsics.h"

69

#include "llvm/IR/IntrinsicsPowerPC.h"

70

#include "llvm/IR/Module.h"

71

#include "llvm/IR/Type.h"

72

#include "llvm/IR/Use.h"

73

#include "llvm/IR/Value.h"

74

#include "llvm/MC/MCContext.h"

75

#include "llvm/MC/MCExpr.h"

76

#include "llvm/MC/MCRegisterInfo.h"

77

#include "llvm/MC/MCSectionXCOFF.h"

78

#include "llvm/MC/MCSymbolXCOFF.h"

79

#include "llvm/Support/AtomicOrdering.h"

80

#include "llvm/Support/BranchProbability.h"

81

#include "llvm/Support/Casting.h"

82

#include "llvm/Support/CodeGen.h"

83

#include "llvm/Support/CommandLine.h"

84

#include "llvm/Support/Compiler.h"

85

#include "llvm/Support/Debug.h"

86

#include "llvm/Support/ErrorHandling.h"

87

#include "llvm/Support/Format.h"

88

#include "llvm/Support/KnownBits.h"

89

#include "llvm/Support/MachineValueType.h"

90

#include "llvm/Support/MathExtras.h"

91

#include "llvm/Support/raw_ostream.h"

92

#include "llvm/Target/TargetMachine.h"

93

#include "llvm/Target/TargetOptions.h"

94

#include <algorithm>

95

#include <cassert>

96

#include <cstdint>

97

#include <iterator>

98

#include <list>

99

#include <utility>

100

#include <vector>

101

102

using namespace llvm;

103

104

#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"

105

106

static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",

107

cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);

108

109

static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",

110

cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);

111

112

static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",

113

cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);

114

115

static cl::opt<bool> DisableSCO("disable-ppc-sco",

116

cl::desc("disable sibling call optimization on ppc"), cl::Hidden);

117

118

static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",

119

cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);

120

121

static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",

122

cl::desc("use absolute jump tables on ppc"), cl::Hidden);

123

124

static cl::opt<bool> EnablePPCPCRelTLS(

125

"enable-ppc-pcrel-tls",

126

cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),

127

cl::Hidden);

128

129

STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"};

130

STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"};

131

STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
};

132

STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
};

133

134

static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);

135

136

static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);

137

138

// FIXME: Remove this once the bug has been fixed!

139

extern cl::opt<bool> ANDIGlueBug;

140

141

PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

142

const PPCSubtarget &STI)

143

: TargetLowering(TM), Subtarget(STI) {

144

// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all

145

// arguments are at least 4/8 bytes aligned.

146

bool isPPC64 = Subtarget.isPPC64();

147

setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));

148

149

// Set up the register classes.

150

addRegisterClass(MVT::i32, &PPC::GPRCRegClass);

151

if (!useSoftFloat()) {

152

if (hasSPE()) {

153

addRegisterClass(MVT::f32, &PPC::GPRCRegClass);

154

addRegisterClass(MVT::f64, &PPC::SPERCRegClass);

155

} else {

156

addRegisterClass(MVT::f32, &PPC::F4RCRegClass);

157

addRegisterClass(MVT::f64, &PPC::F8RCRegClass);

158

}

159

}

160

161

// Match BITREVERSE to customized fast code sequence in the td file.

162

setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);

163

setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);

164

165

// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.

166

setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);

167

168

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.

169

for (MVT VT : MVT::integer_valuetypes()) {

170

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

171

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);

172

}

173

174

if (Subtarget.isISA3_0()) {

175

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);

176

setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);

177

setTruncStoreAction(MVT::f64, MVT::f16, Legal);

178

setTruncStoreAction(MVT::f32, MVT::f16, Legal);

179

} else {

180

// No extending loads from f16 or HW conversions back and forth.

181

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);

182

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);

183

setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);

184

setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);

185

setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);

186

setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

187

setTruncStoreAction(MVT::f64, MVT::f16, Expand);

188

setTruncStoreAction(MVT::f32, MVT::f16, Expand);

189

}

190

191

setTruncStoreAction(MVT::f64, MVT::f32, Expand);

192

193

// PowerPC has pre-inc load and store's.

194

setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);

195

setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);

196

setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);

197

setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);

198

setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);

199

setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);

200

setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);

201

setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);

202

setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);

203

setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);

204

if (!Subtarget.hasSPE()) {

205

setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);

206

setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);

207

setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);

208

setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);

209

}

210

211

// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.

212

const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

213

for (MVT VT : ScalarIntVTs) {

214

setOperationAction(ISD::ADDC, VT, Legal);

215

setOperationAction(ISD::ADDE, VT, Legal);

216

setOperationAction(ISD::SUBC, VT, Legal);

217

setOperationAction(ISD::SUBE, VT, Legal);

218

}

219

220

if (Subtarget.useCRBits()) {

221

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

222

223

if (isPPC64 || Subtarget.hasFPCVT()) {

224

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);

225

AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,

226

isPPC64 ? MVT::i64 : MVT::i32);

227

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);

228

AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,

229

isPPC64 ? MVT::i64 : MVT::i32);

230

231

setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);

232

AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,

233

isPPC64 ? MVT::i64 : MVT::i32);

234

setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);

235

AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,

236

isPPC64 ? MVT::i64 : MVT::i32);

237

} else {

238

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);

239

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);

240

setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);

241

setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);

242

}

243

244

// PowerPC does not support direct load/store of condition registers.

245

setOperationAction(ISD::LOAD, MVT::i1, Custom);

246

setOperationAction(ISD::STORE, MVT::i1, Custom);

247

248

// FIXME: Remove this once the ANDI glue bug is fixed:

249

if (ANDIGlueBug)

250

setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);

251

252

for (MVT VT : MVT::integer_valuetypes()) {

253

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

254

setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);

255

setTruncStoreAction(VT, MVT::i1, Expand);

256

}

257

258

addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);

259

}

260

261

// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

262

// PPC (the libcall is not available).

263

setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);

264

setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);

265

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);

266

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);

267

268

// We do not currently implement these libm ops for PowerPC.

269

setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);

270

setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);

271

setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);

272

setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);

273

setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);

274

setOperationAction(ISD::FREM, MVT::ppcf128, Expand);

275

276

// PowerPC has no SREM/UREM instructions unless we are on P9

277

// On P9 we may use a hardware instruction to compute the remainder.

278

// When the result of both the remainder and the division is required it is

279

// more efficient to compute the remainder from the result of the division

280

// rather than use the remainder instruction. The instructions are legalized

281

// directly because the DivRemPairsPass performs the transformation at the IR

282

// level.

283

if (Subtarget.isISA3_0()) {

284

setOperationAction(ISD::SREM, MVT::i32, Legal);

285

setOperationAction(ISD::UREM, MVT::i32, Legal);

286

setOperationAction(ISD::SREM, MVT::i64, Legal);

287

setOperationAction(ISD::UREM, MVT::i64, Legal);

288

} else {

289

setOperationAction(ISD::SREM, MVT::i32, Expand);

290

setOperationAction(ISD::UREM, MVT::i32, Expand);

291

setOperationAction(ISD::SREM, MVT::i64, Expand);

292

setOperationAction(ISD::UREM, MVT::i64, Expand);

293

}

294

295

// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.

296

setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

297

setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);

298

setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);

299

setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

300

setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

301

setOperationAction(ISD::SDIVREM, MVT::i32, Expand);

302

setOperationAction(ISD::UDIVREM, MVT::i64, Expand);

303

setOperationAction(ISD::SDIVREM, MVT::i64, Expand);

304

305

// Handle constrained floating-point operations of scalar.

306

// TODO: Handle SPE specific operation.

307

setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);

308

setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);

309

setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);

310

setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);

311

setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);

312

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

313

314

setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);

315

setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);

316

setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);

317

setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);

318

setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);

319

if (Subtarget.hasVSX()) {

320

setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);

321

setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);

322

}

323

324

if (Subtarget.hasFSQRT()) {

325

setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);

326

setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);

327

}

328

329

if (Subtarget.hasFPRND()) {

330

setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);

331

setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);

332

setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);

333

setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);

334

335

setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);

336

setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);

337

setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);

338

setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);

339

}

340

341

// We don't support sin/cos/sqrt/fmod/pow

342

setOperationAction(ISD::FSIN , MVT::f64, Expand);

343

setOperationAction(ISD::FCOS , MVT::f64, Expand);

344

setOperationAction(ISD::FSINCOS, MVT::f64, Expand);

345

setOperationAction(ISD::FREM , MVT::f64, Expand);

346

setOperationAction(ISD::FPOW , MVT::f64, Expand);

347

setOperationAction(ISD::FSIN , MVT::f32, Expand);

348

setOperationAction(ISD::FCOS , MVT::f32, Expand);

349

setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

350

setOperationAction(ISD::FREM , MVT::f32, Expand);

351

setOperationAction(ISD::FPOW , MVT::f32, Expand);

352

if (Subtarget.hasSPE()) {

353

setOperationAction(ISD::FMA , MVT::f64, Expand);

354

setOperationAction(ISD::FMA , MVT::f32, Expand);

355

} else {

356

setOperationAction(ISD::FMA , MVT::f64, Legal);

357

setOperationAction(ISD::FMA , MVT::f32, Legal);

358

}

359

360

if (Subtarget.hasSPE())

361

setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

362

363

setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);

364

365

// If we're enabling GP optimizations, use hardware square root

366

if (!Subtarget.hasFSQRT() &&

367

!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&

368

Subtarget.hasFRE()))

369

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

370

371

if (!Subtarget.hasFSQRT() &&

372

!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&

373

Subtarget.hasFRES()))

374

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

375

376

if (Subtarget.hasFCPSGN()) {

377

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);

378

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);

379

} else {

380

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

381

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

382

}

383

384

if (Subtarget.hasFPRND()) {

385

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

386

setOperationAction(ISD::FCEIL, MVT::f64, Legal);

387

setOperationAction(ISD::FTRUNC, MVT::f64, Legal);

388

setOperationAction(ISD::FROUND, MVT::f64, Legal);

389

390

setOperationAction(ISD::FFLOOR, MVT::f32, Legal);

391

setOperationAction(ISD::FCEIL, MVT::f32, Legal);

392

setOperationAction(ISD::FTRUNC, MVT::f32, Legal);

393

setOperationAction(ISD::FROUND, MVT::f32, Legal);

394

}

395

396

// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd

397

// to speed up scalar BSWAP64.

398

// CTPOP or CTTZ were introduced in P8/P9 respectively

399

setOperationAction(ISD::BSWAP, MVT::i32 , Expand);

400

if (Subtarget.hasP9Vector())

401

setOperationAction(ISD::BSWAP, MVT::i64 , Custom);

402

else

403

setOperationAction(ISD::BSWAP, MVT::i64 , Expand);

404

if (Subtarget.isISA3_0()) {

405

setOperationAction(ISD::CTTZ , MVT::i32 , Legal);

406

setOperationAction(ISD::CTTZ , MVT::i64 , Legal);

407

} else {

408

setOperationAction(ISD::CTTZ , MVT::i32 , Expand);

409

setOperationAction(ISD::CTTZ , MVT::i64 , Expand);

410

}

411

412

if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {

413

setOperationAction(ISD::CTPOP, MVT::i32 , Legal);

414

setOperationAction(ISD::CTPOP, MVT::i64 , Legal);

415

} else {

416

setOperationAction(ISD::CTPOP, MVT::i32 , Expand);

417

setOperationAction(ISD::CTPOP, MVT::i64 , Expand);

418

}

419

420

// PowerPC does not have ROTR

421

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

422

setOperationAction(ISD::ROTR, MVT::i64 , Expand);

423

424

if (!Subtarget.useCRBits()) {

425

// PowerPC does not have Select

426

setOperationAction(ISD::SELECT, MVT::i32, Expand);

427

setOperationAction(ISD::SELECT, MVT::i64, Expand);

428

setOperationAction(ISD::SELECT, MVT::f32, Expand);

429

setOperationAction(ISD::SELECT, MVT::f64, Expand);

430

}

431

432

// PowerPC wants to turn select_cc of FP into fsel when possible.

433

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

434

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

435

436

// PowerPC wants to optimize integer setcc a bit

437

if (!Subtarget.useCRBits())

438

setOperationAction(ISD::SETCC, MVT::i32, Custom);

439

440

if (Subtarget.hasFPU()) {

441

setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);

442

setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);

443

setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);

444

445

setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);

446

setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);

447

setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);

448

}

449

450

// PowerPC does not have BRCOND which requires SetCC

451

if (!Subtarget.useCRBits())

452

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

453

454

setOperationAction(ISD::BR_JT, MVT::Other, Expand);

455

456

if (Subtarget.hasSPE()) {

457

// SPE has built-in conversions

458

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);

459

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);

460

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);

461

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);

462

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);

463

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);

464

} else {

465

// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.

466

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);

467

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

468

469

// PowerPC does not have [U|S]INT_TO_FP

470

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);

471

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);

472

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);

473

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);

474

}

475

476

if (Subtarget.hasDirectMove() && isPPC64) {

477

setOperationAction(ISD::BITCAST, MVT::f32, Legal);

478

setOperationAction(ISD::BITCAST, MVT::i32, Legal);

479

setOperationAction(ISD::BITCAST, MVT::i64, Legal);

480

setOperationAction(ISD::BITCAST, MVT::f64, Legal);

481

if (TM.Options.UnsafeFPMath) {

482

setOperationAction(ISD::LRINT, MVT::f64, Legal);

483

setOperationAction(ISD::LRINT, MVT::f32, Legal);

484

setOperationAction(ISD::LLRINT, MVT::f64, Legal);

485

setOperationAction(ISD::LLRINT, MVT::f32, Legal);

486

setOperationAction(ISD::LROUND, MVT::f64, Legal);

487

setOperationAction(ISD::LROUND, MVT::f32, Legal);

488

setOperationAction(ISD::LLROUND, MVT::f64, Legal);

489

setOperationAction(ISD::LLROUND, MVT::f32, Legal);

490

}

491

} else {

492

setOperationAction(ISD::BITCAST, MVT::f32, Expand);

493

setOperationAction(ISD::BITCAST, MVT::i32, Expand);

494

setOperationAction(ISD::BITCAST, MVT::i64, Expand);

495

setOperationAction(ISD::BITCAST, MVT::f64, Expand);

496

}

497

498

// We cannot sextinreg(i1). Expand to shifts.

499

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

500

501

// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support

502

// SjLj exception handling but a light-weight setjmp/longjmp replacement to

503

// support continuation, user-level threading, and etc.. As a result, no

504

// other SjLj exception interfaces are implemented and please don't build

505

// your own exception handling based on them.

506

// LLVM/Clang supports zero-cost DWARF exception handling.

507

setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);

508

setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

509

510

// We want to legalize GlobalAddress and ConstantPool nodes into the

511

// appropriate instructions to materialize the address.

512

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

513

setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);

514

setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

515

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

516

setOperationAction(ISD::JumpTable, MVT::i32, Custom);

517

setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);

518

setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);

519

setOperationAction(ISD::BlockAddress, MVT::i64, Custom);

520

setOperationAction(ISD::ConstantPool, MVT::i64, Custom);

521

setOperationAction(ISD::JumpTable, MVT::i64, Custom);

522

523

// TRAP is legal.

524

setOperationAction(ISD::TRAP, MVT::Other, Legal);

525

526

// TRAMPOLINE is custom lowered.

527

setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);

528

setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

529

530

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

531

setOperationAction(ISD::VASTART , MVT::Other, Custom);

532

533

if (Subtarget.is64BitELFABI()) {

534

// VAARG always uses double-word chunks, so promote anything smaller.

535

setOperationAction(ISD::VAARG, MVT::i1, Promote);

536

AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);

537

setOperationAction(ISD::VAARG, MVT::i8, Promote);

538

AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);

539

setOperationAction(ISD::VAARG, MVT::i16, Promote);

540

AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);

541

setOperationAction(ISD::VAARG, MVT::i32, Promote);

542

AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);

543

setOperationAction(ISD::VAARG, MVT::Other, Expand);

544

} else if (Subtarget.is32BitELFABI()) {

545

// VAARG is custom lowered with the 32-bit SVR4 ABI.

546

setOperationAction(ISD::VAARG, MVT::Other, Custom);

547

setOperationAction(ISD::VAARG, MVT::i64, Custom);

548

} else

549

setOperationAction(ISD::VAARG, MVT::Other, Expand);

550

551

// VACOPY is custom lowered with the 32-bit SVR4 ABI.

552

if (Subtarget.is32BitELFABI())

553

setOperationAction(ISD::VACOPY , MVT::Other, Custom);

554

else

555

setOperationAction(ISD::VACOPY , MVT::Other, Expand);

556

557

// Use the default implementation.

558

setOperationAction(ISD::VAEND , MVT::Other, Expand);

559

setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);

560

setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);

561

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);

562

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);

563

setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);

564

setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);

565

setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);

566

setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);

567

568

// We want to custom lower some of our intrinsics.

569

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

570

571

// To handle counter-based loop conditions.

572

setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);

573

574

setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);

575

setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);

576

setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);

577

setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);

578

579

// Comparisons that require checking two conditions.

580

if (Subtarget.hasSPE()) {

581

setCondCodeAction(ISD::SETO, MVT::f32, Expand);

582

setCondCodeAction(ISD::SETO, MVT::f64, Expand);

583

setCondCodeAction(ISD::SETUO, MVT::f32, Expand);

584

setCondCodeAction(ISD::SETUO, MVT::f64, Expand);

585

}

586

setCondCodeAction(ISD::SETULT, MVT::f32, Expand);

587

setCondCodeAction(ISD::SETULT, MVT::f64, Expand);

588

setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);

589

setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);

590

setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);

591

setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);

592

setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);

593

setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);

594

setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);

595

setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);

596

setCondCodeAction(ISD::SETONE, MVT::f32, Expand);

597

setCondCodeAction(ISD::SETONE, MVT::f64, Expand);

598

599

if (Subtarget.has64BitSupport()) {

600

// They also have instructions for converting between i64 and fp.

601

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);

602

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);

603

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

604

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);

605

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

606

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);

607

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

608

setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);

609

// This is just the low 32 bits of a (signed) fp->i64 conversion.

610

// We cannot do this with Promote because i64 is not a legal type.

611

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);

612

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

613

614

if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {

615

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

616

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);

617

}

618

} else {

619

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

620

if (Subtarget.hasSPE()) {

621

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);

622

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);

623

} else {

624

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);

625

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);

626

}

627

}

628

629

// With the instructions enabled under FPCVT, we can do everything.

630

if (Subtarget.hasFPCVT()) {

631

if (Subtarget.has64BitSupport()) {

632

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);

633

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);

634

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

635

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);

636

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

637

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);

638

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

639

setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);

640

}

641

642

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);

643

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);

644

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);

645

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);

646

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

647

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

648

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

649

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);

650

}

651

652

if (Subtarget.use64BitRegs()) {

653

// 64-bit PowerPC implementations can support i64 types directly

654

addRegisterClass(MVT::i64, &PPC::G8RCRegClass);

655

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

656

setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

657

// 64-bit PowerPC wants to expand i128 shifts itself.

658

setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);

659

setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);

660

setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);

661

} else {

662

// 32-bit PowerPC wants to expand i64 shifts itself.

663

setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);

664

setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);

665

setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

666

}

667

668

// PowerPC has better expansions for funnel shifts than the generic

669

// TargetLowering::expandFunnelShift.

670

if (Subtarget.has64BitSupport()) {

671

setOperationAction(ISD::FSHL, MVT::i64, Custom);

672

setOperationAction(ISD::FSHR, MVT::i64, Custom);

673

}

674

setOperationAction(ISD::FSHL, MVT::i32, Custom);

675

setOperationAction(ISD::FSHR, MVT::i32, Custom);

676

677

if (Subtarget.hasVSX()) {

678

setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);

679

setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);

680

setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);

681

setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);

682

}

683

684

if (Subtarget.hasAltivec()) {

685

for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {

686

setOperationAction(ISD::SADDSAT, VT, Legal);

687

setOperationAction(ISD::SSUBSAT, VT, Legal);

688

setOperationAction(ISD::UADDSAT, VT, Legal);

689

setOperationAction(ISD::USUBSAT, VT, Legal);

690

}

691

// First set operation action for all vector types to expand. Then we

692

// will selectively turn on ones that can be effectively codegen'd.

693

for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

694

// add/sub are legal for all supported vector VT's.

695

setOperationAction(ISD::ADD, VT, Legal);

696

setOperationAction(ISD::SUB, VT, Legal);

697

698

// For v2i64, these are only valid with P8Vector. This is corrected after

699

// the loop.

700

if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {

701

setOperationAction(ISD::SMAX, VT, Legal);

702

setOperationAction(ISD::SMIN, VT, Legal);

703

setOperationAction(ISD::UMAX, VT, Legal);

704

setOperationAction(ISD::UMIN, VT, Legal);

705

}

706

else {

707

setOperationAction(ISD::SMAX, VT, Expand);

708

setOperationAction(ISD::SMIN, VT, Expand);

709

setOperationAction(ISD::UMAX, VT, Expand);

710

setOperationAction(ISD::UMIN, VT, Expand);

711

}

712

713

if (Subtarget.hasVSX()) {

714

setOperationAction(ISD::FMAXNUM, VT, Legal);

715

setOperationAction(ISD::FMINNUM, VT, Legal);

716

}

717

718

// Vector instructions introduced in P8

719

if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {

720

setOperationAction(ISD::CTPOP, VT, Legal);

721

setOperationAction(ISD::CTLZ, VT, Legal);

722

}

723

else {

724

setOperationAction(ISD::CTPOP, VT, Expand);

725

setOperationAction(ISD::CTLZ, VT, Expand);

726

}

727

728

// Vector instructions introduced in P9

729

if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))

730

setOperationAction(ISD::CTTZ, VT, Legal);

731

else

732

setOperationAction(ISD::CTTZ, VT, Expand);

733

734

// We promote all shuffles to v16i8.

735

setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);

736

AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);

737

738

// We promote all non-typed operations to v4i32.

739

setOperationAction(ISD::AND , VT, Promote);

740

AddPromotedToType (ISD::AND , VT, MVT::v4i32);

741

setOperationAction(ISD::OR , VT, Promote);

742

AddPromotedToType (ISD::OR , VT, MVT::v4i32);

743

setOperationAction(ISD::XOR , VT, Promote);

744

AddPromotedToType (ISD::XOR , VT, MVT::v4i32);

745

setOperationAction(ISD::LOAD , VT, Promote);

746

AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);

747

setOperationAction(ISD::SELECT, VT, Promote);

748

AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);

749

setOperationAction(ISD::VSELECT, VT, Legal);

750

setOperationAction(ISD::SELECT_CC, VT, Promote);

751

AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);

752

setOperationAction(ISD::STORE, VT, Promote);

753

AddPromotedToType (ISD::STORE, VT, MVT::v4i32);

754

755

// No other operations are legal.

756

setOperationAction(ISD::MUL , VT, Expand);

757

setOperationAction(ISD::SDIV, VT, Expand);

758

setOperationAction(ISD::SREM, VT, Expand);

759

setOperationAction(ISD::UDIV, VT, Expand);

760

setOperationAction(ISD::UREM, VT, Expand);

761

setOperationAction(ISD::FDIV, VT, Expand);

762

setOperationAction(ISD::FREM, VT, Expand);

763

setOperationAction(ISD::FNEG, VT, Expand);

764

setOperationAction(ISD::FSQRT, VT, Expand);

765

setOperationAction(ISD::FLOG, VT, Expand);

766

setOperationAction(ISD::FLOG10, VT, Expand);

767

setOperationAction(ISD::FLOG2, VT, Expand);

768

setOperationAction(ISD::FEXP, VT, Expand);

769

setOperationAction(ISD::FEXP2, VT, Expand);

770

setOperationAction(ISD::FSIN, VT, Expand);

771

setOperationAction(ISD::FCOS, VT, Expand);

772

setOperationAction(ISD::FABS, VT, Expand);

773

setOperationAction(ISD::FFLOOR, VT, Expand);

774

setOperationAction(ISD::FCEIL, VT, Expand);

775

setOperationAction(ISD::FTRUNC, VT, Expand);

776

setOperationAction(ISD::FRINT, VT, Expand);

777

setOperationAction(ISD::FNEARBYINT, VT, Expand);

778

setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);

779

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);

780

setOperationAction(ISD::BUILD_VECTOR, VT, Expand);

781

setOperationAction(ISD::MULHU, VT, Expand);

782

setOperationAction(ISD::MULHS, VT, Expand);

783

setOperationAction(ISD::UMUL_LOHI, VT, Expand);

784

setOperationAction(ISD::SMUL_LOHI, VT, Expand);

785

setOperationAction(ISD::UDIVREM, VT, Expand);

786

setOperationAction(ISD::SDIVREM, VT, Expand);

787

setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);

788

setOperationAction(ISD::FPOW, VT, Expand);

789

setOperationAction(ISD::BSWAP, VT, Expand);

790

setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

791

setOperationAction(ISD::ROTL, VT, Expand);

792

setOperationAction(ISD::ROTR, VT, Expand);

793

794

for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {

795

setTruncStoreAction(VT, InnerVT, Expand);

796

setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);

797

setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);

798

setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);

799

}

800

}

801

setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);

802

if (!Subtarget.hasP8Vector()) {

803

setOperationAction(ISD::SMAX, MVT::v2i64, Expand);

804

setOperationAction(ISD::SMIN, MVT::v2i64, Expand);

805

setOperationAction(ISD::UMAX, MVT::v2i64, Expand);

806

setOperationAction(ISD::UMIN, MVT::v2i64, Expand);

807

}

808

809

for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})

810

setOperationAction(ISD::ABS, VT, Custom);

811

812

// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle

813

// with merges, splats, etc.

814

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

815

816

// Vector truncates to sub-word integer that fit in an Altivec/VSX register

817

// are cheap, so handle them before they get expanded to scalar.

818

setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);

819

setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);

820

setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);

821

setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);

822

setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);

823

824

setOperationAction(ISD::AND , MVT::v4i32, Legal);

825

setOperationAction(ISD::OR , MVT::v4i32, Legal);

826

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

827

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

828

setOperationAction(ISD::SELECT, MVT::v4i32,

829

Subtarget.useCRBits() ? Legal : Expand);

830

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

831

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);

832

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);

833

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);

834

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);

835

setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);

836

setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);

837

setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);

838

setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);

839

setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);

840

setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);

841

setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);

842

setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

843

844

// Without hasP8Altivec set, v2i64 SMAX isn't available.

845

// But ABS custom lowering requires SMAX support.

846

if (!Subtarget.hasP8Altivec())

847

setOperationAction(ISD::ABS, MVT::v2i64, Expand);

848

849

// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.

850

setOperationAction(ISD::ROTL, MVT::v1i128, Custom);

851

// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).

852

if (Subtarget.hasAltivec())

853

for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})

854

setOperationAction(ISD::ROTL, VT, Legal);

855

// With hasP8Altivec set, we can lower ISD::ROTL to vrld.

856

if (Subtarget.hasP8Altivec())

857

setOperationAction(ISD::ROTL, MVT::v2i64, Legal);

858

859

addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);

860

addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);

861

addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);

862

addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);

863

864

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

865

setOperationAction(ISD::FMA, MVT::v4f32, Legal);

866

867

if (Subtarget.hasVSX()) {

868

setOperationAction(ISD::FDIV, MVT::v4f32, Legal);

869

setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);

870

}

871

872

if (Subtarget.hasP8Altivec())

873

setOperationAction(ISD::MUL, MVT::v4i32, Legal);

874

else

875

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

876

877

if (Subtarget.isISA3_1()) {

878

setOperationAction(ISD::MUL, MVT::v2i64, Legal);

879

setOperationAction(ISD::MULHS, MVT::v2i64, Legal);

880

setOperationAction(ISD::MULHU, MVT::v2i64, Legal);

881

setOperationAction(ISD::MULHS, MVT::v4i32, Legal);

882

setOperationAction(ISD::MULHU, MVT::v4i32, Legal);

883

setOperationAction(ISD::UDIV, MVT::v2i64, Legal);

884

setOperationAction(ISD::SDIV, MVT::v2i64, Legal);

885

setOperationAction(ISD::UDIV, MVT::v4i32, Legal);

886

setOperationAction(ISD::SDIV, MVT::v4i32, Legal);

887

setOperationAction(ISD::UREM, MVT::v2i64, Legal);

888

setOperationAction(ISD::SREM, MVT::v2i64, Legal);

889

setOperationAction(ISD::UREM, MVT::v4i32, Legal);

890

setOperationAction(ISD::SREM, MVT::v4i32, Legal);

891

setOperationAction(ISD::UREM, MVT::v1i128, Legal);

892

setOperationAction(ISD::SREM, MVT::v1i128, Legal);

893

setOperationAction(ISD::UDIV, MVT::v1i128, Legal);

894

setOperationAction(ISD::SDIV, MVT::v1i128, Legal);

895

setOperationAction(ISD::ROTL, MVT::v1i128, Legal);

896

}

897

898

setOperationAction(ISD::MUL, MVT::v8i16, Legal);

899

setOperationAction(ISD::MUL, MVT::v16i8, Custom);

900

901

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);

902

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

903

904

setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);

905

setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);

906

setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);

907

setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

908

909

// Altivec does not contain unordered floating-point compare instructions

910

setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);

911

setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);

912

setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);

913

setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);

914

915

if (Subtarget.hasVSX()) {

916

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);

917

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

918

if (Subtarget.hasP8Vector()) {

919

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);

920

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);

921

}

922

if (Subtarget.hasDirectMove() && isPPC64) {

923

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);

924

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);

925

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);

926

setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);

927

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);

928

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);

929

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);

930

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);

931

}

932

setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

933

934

// The nearbyint variants are not allowed to raise the inexact exception

935

// so we can only code-gen them with unsafe math.

936

if (TM.Options.UnsafeFPMath) {

937

setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);

938

setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);

939

}

940

941

setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);

942

setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);

943

setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);

944

setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);

945

setOperationAction(ISD::FRINT, MVT::v2f64, Legal);

946

setOperationAction(ISD::FROUND, MVT::v2f64, Legal);

947

setOperationAction(ISD::FROUND, MVT::f64, Legal);

948

setOperationAction(ISD::FRINT, MVT::f64, Legal);

949

950

setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

951

setOperationAction(ISD::FRINT, MVT::v4f32, Legal);

952

setOperationAction(ISD::FROUND, MVT::v4f32, Legal);

953

setOperationAction(ISD::FROUND, MVT::f32, Legal);

954

setOperationAction(ISD::FRINT, MVT::f32, Legal);

955

956

setOperationAction(ISD::MUL, MVT::v2f64, Legal);

957

setOperationAction(ISD::FMA, MVT::v2f64, Legal);

958

959

setOperationAction(ISD::FDIV, MVT::v2f64, Legal);

960

setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);

961

962

// Share the Altivec comparison restrictions.

963

setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);

964

setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);

965

setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);

966

setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);

967

968

setOperationAction(ISD::LOAD, MVT::v2f64, Legal);

969

setOperationAction(ISD::STORE, MVT::v2f64, Legal);

970

971

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);

972

973

if (Subtarget.hasP8Vector())

974

addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);

975

976

addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);

977

978

addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);

979

addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);

980

addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

981

982

if (Subtarget.hasP8Altivec()) {

983

setOperationAction(ISD::SHL, MVT::v2i64, Legal);

984

setOperationAction(ISD::SRA, MVT::v2i64, Legal);

985

setOperationAction(ISD::SRL, MVT::v2i64, Legal);

986

987

// 128 bit shifts can be accomplished via 3 instructions for SHL and

988

// SRL, but not for SRA because of the instructions available:

989

// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth

990

// doing

991

setOperationAction(ISD::SHL, MVT::v1i128, Expand);

992

setOperationAction(ISD::SRL, MVT::v1i128, Expand);

993

setOperationAction(ISD::SRA, MVT::v1i128, Expand);

994

995

setOperationAction(ISD::SETCC, MVT::v2i64, Legal);

996

}

997

else {

998

setOperationAction(ISD::SHL, MVT::v2i64, Expand);

999

setOperationAction(ISD::SRA, MVT::v2i64, Expand);

1000

setOperationAction(ISD::SRL, MVT::v2i64, Expand);

1001

1002

setOperationAction(ISD::SETCC, MVT::v2i64, Custom);

1003

1004

// VSX v2i64 only supports non-arithmetic operations.

1005

setOperationAction(ISD::ADD, MVT::v2i64, Expand);

1006

setOperationAction(ISD::SUB, MVT::v2i64, Expand);

1007

}

1008

1009

if (Subtarget.isISA3_1())

1010

setOperationAction(ISD::SETCC, MVT::v1i128, Legal);

1011

else

1012

setOperationAction(ISD::SETCC, MVT::v1i128, Expand);

1013

1014

setOperationAction(ISD::LOAD, MVT::v2i64, Promote);

1015

AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);

1016

setOperationAction(ISD::STORE, MVT::v2i64, Promote);

1017

AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);

1018

1019

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);

1020

1021

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);

1022

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);

1023

setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);

1024

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);

1025

setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);

1026

setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);

1027

setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);

1028

setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);

1029

1030

// Custom handling for partial vectors of integers converted to

1031

// floating point. We already have optimal handling for v2i32 through

1032

// the DAG combine, so those aren't necessary.

1033

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);

1034

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);

1035

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);

1036

setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);

1037

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);

1038

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);

1039

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);

1040

setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);

1041

setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);

1042

setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);

1043

setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);

1044

setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);

1045

setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);

1046

setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);

1047

setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);

1048

setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);

1049

1050

setOperationAction(ISD::FNEG, MVT::v4f32, Legal);

1051

setOperationAction(ISD::FNEG, MVT::v2f64, Legal);

1052

setOperationAction(ISD::FABS, MVT::v4f32, Legal);

1053

setOperationAction(ISD::FABS, MVT::v2f64, Legal);

1054

setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);

1055

setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);

1056

1057

if (Subtarget.hasDirectMove())

1058

setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);

1059

setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);

1060

1061

// Handle constrained floating-point operations of vector.

1062

// The predictor is `hasVSX` because altivec instruction has

1063

// no exception but VSX vector instruction has.

1064

setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);

1065

setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);

1066

setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);

1067

setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);

1068

setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);

1069

setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);

1070

setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);

1071

setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);

1072

setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);

1073

setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);

1074

setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);

1075

setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);

1076

setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);

1077

1078

setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);

1079

setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);

1080

setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);

1081

setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);

1082

setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);

1083

setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);

1084

setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);

1085

setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);

1086

setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);

1087

setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);

1088

setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);

1089

setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);

1090

setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);

1091

1092

addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);

1093

}

1094

1095

if (Subtarget.hasP8Altivec()) {

1096

addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);

1097

addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);

1098

}

1099

1100

if (Subtarget.hasP9Vector()) {

1101

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);

1102

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

1103

1104

// 128 bit shifts can be accomplished via 3 instructions for SHL and

1105

// SRL, but not for SRA because of the instructions available:

1106

// VS{RL} and VS{RL}O.

1107

setOperationAction(ISD::SHL, MVT::v1i128, Legal);

1108

setOperationAction(ISD::SRL, MVT::v1i128, Legal);

1109

setOperationAction(ISD::SRA, MVT::v1i128, Expand);

1110

1111

addRegisterClass(MVT::f128, &PPC::VRRCRegClass);

1112

setOperationAction(ISD::FADD, MVT::f128, Legal);

1113

setOperationAction(ISD::FSUB, MVT::f128, Legal);

1114

setOperationAction(ISD::FDIV, MVT::f128, Legal);

1115

setOperationAction(ISD::FMUL, MVT::f128, Legal);

1116

setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);

1117

// No extending loads to f128 on PPC.

1118

for (MVT FPT : MVT::fp_valuetypes())

1119

setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);

1120

setOperationAction(ISD::FMA, MVT::f128, Legal);

1121

setCondCodeAction(ISD::SETULT, MVT::f128, Expand);

1122

setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);

1123

setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);

1124

setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);

1125

setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);

1126

setCondCodeAction(ISD::SETONE, MVT::f128, Expand);

1127

1128

setOperationAction(ISD::FTRUNC, MVT::f128, Legal);

1129

setOperationAction(ISD::FRINT, MVT::f128, Legal);

1130

setOperationAction(ISD::FFLOOR, MVT::f128, Legal);

1131

setOperationAction(ISD::FCEIL, MVT::f128, Legal);

1132

setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);

1133

setOperationAction(ISD::FROUND, MVT::f128, Legal);

1134

1135

setOperationAction(ISD::SELECT, MVT::f128, Expand);

1136

setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);

1137

setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);

1138

setTruncStoreAction(MVT::f128, MVT::f64, Expand);

1139

setTruncStoreAction(MVT::f128, MVT::f32, Expand);

1140

setOperationAction(ISD::BITCAST, MVT::i128, Custom);

1141

// No implementation for these ops for PowerPC.

1142

setOperationAction(ISD::FSIN, MVT::f128, Expand);

1143

setOperationAction(ISD::FCOS, MVT::f128, Expand);

1144

setOperationAction(ISD::FPOW, MVT::f128, Expand);

1145

setOperationAction(ISD::FPOWI, MVT::f128, Expand);

1146

setOperationAction(ISD::FREM, MVT::f128, Expand);

1147

1148

// Handle constrained floating-point operations of fp128

1149

setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);

1150

setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);

1151

setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);

1152

setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);

1153

setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);

1154

setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);

1155

setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);

1156

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);

1157

setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

1158

setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);

1159

setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);

1160

setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);

1161

setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);

1162

setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);

1163

setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);

1164

setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);

1165

setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);

1166

setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);

1167

setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);

1168

setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);

1169

}

1170

1171

if (Subtarget.hasP9Altivec()) {

1172

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);

1173

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);

1174

1175

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);

1176

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);

1177

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);

1178

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);

1179

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);

1180

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);

1181

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);

1182

}

1183

}

1184

1185

if (Subtarget.pairedVectorMemops()) {

1186

addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);

1187

setOperationAction(ISD::LOAD, MVT::v256i1, Custom);

1188

setOperationAction(ISD::STORE, MVT::v256i1, Custom);

1189

}

1190

if (Subtarget.hasMMA()) {

1191

addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);

1192

setOperationAction(ISD::LOAD, MVT::v512i1, Custom);

1193

setOperationAction(ISD::STORE, MVT::v512i1, Custom);

1194

setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);

1195

}

1196

1197

if (Subtarget.has64BitSupport())

1198

setOperationAction(ISD::PREFETCH, MVT::Other, Legal);

1199

1200

if (Subtarget.isISA3_1())

1201

setOperationAction(ISD::SRA, MVT::v1i128, Legal);

1202

1203

setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);

1204

1205

if (!isPPC64) {

1206

setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);

1207

setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);

1208

}

1209

1210

setBooleanContents(ZeroOrOneBooleanContent);

1211

1212

if (Subtarget.hasAltivec()) {

1213

// Altivec instructions set fields to all zeros or all ones.

1214

setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

1215

}

1216

1217

if (!isPPC64) {

1218

// These libcalls are not available in 32-bit.

1219

setLibcallName(RTLIB::SHL_I128, nullptr);

1220

setLibcallName(RTLIB::SRL_I128, nullptr);

1221

setLibcallName(RTLIB::SRA_I128, nullptr);

1222

}

1223

1224

if (!isPPC64)

1225

setMaxAtomicSizeInBitsSupported(32);

1226

1227

setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);

1228

1229

// We have target-specific dag combine patterns for the following nodes:

1230

setTargetDAGCombine(ISD::ADD);

1231

setTargetDAGCombine(ISD::SHL);

1232

setTargetDAGCombine(ISD::SRA);

1233

setTargetDAGCombine(ISD::SRL);

1234

setTargetDAGCombine(ISD::MUL);

1235

setTargetDAGCombine(ISD::FMA);

1236

setTargetDAGCombine(ISD::SINT_TO_FP);

1237

setTargetDAGCombine(ISD::BUILD_VECTOR);

1238

if (Subtarget.hasFPCVT())

1239

setTargetDAGCombine(ISD::UINT_TO_FP);

1240

setTargetDAGCombine(ISD::LOAD);

1241

setTargetDAGCombine(ISD::STORE);

1242

setTargetDAGCombine(ISD::BR_CC);

1243

if (Subtarget.useCRBits())

1244

setTargetDAGCombine(ISD::BRCOND);

1245

setTargetDAGCombine(ISD::BSWAP);

1246

setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

1247

setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);

1248

setTargetDAGCombine(ISD::INTRINSIC_VOID);

1249

1250

setTargetDAGCombine(ISD::SIGN_EXTEND);

1251

setTargetDAGCombine(ISD::ZERO_EXTEND);

1252

setTargetDAGCombine(ISD::ANY_EXTEND);

1253

1254

setTargetDAGCombine(ISD::TRUNCATE);

1255

setTargetDAGCombine(ISD::VECTOR_SHUFFLE);

1256

1257

1258

if (Subtarget.useCRBits()) {

1259

setTargetDAGCombine(ISD::TRUNCATE);

1260

setTargetDAGCombine(ISD::SETCC);

1261

setTargetDAGCombine(ISD::SELECT_CC);

1262

}

1263

1264

if (Subtarget.hasP9Altivec()) {

1265

setTargetDAGCombine(ISD::ABS);

1266

setTargetDAGCombine(ISD::VSELECT);

1267

}

1268

1269

setLibcallName(RTLIB::LOG_F128, "logf128");

1270

setLibcallName(RTLIB::LOG2_F128, "log2f128");

1271

setLibcallName(RTLIB::LOG10_F128, "log10f128");

1272

setLibcallName(RTLIB::EXP_F128, "expf128");

1273

setLibcallName(RTLIB::EXP2_F128, "exp2f128");

1274

setLibcallName(RTLIB::SIN_F128, "sinf128");

1275

setLibcallName(RTLIB::COS_F128, "cosf128");

1276

setLibcallName(RTLIB::POW_F128, "powf128");

1277

setLibcallName(RTLIB::FMIN_F128, "fminf128");

1278

setLibcallName(RTLIB::FMAX_F128, "fmaxf128");

1279

setLibcallName(RTLIB::POWI_F128, "__powikf2");

1280

setLibcallName(RTLIB::REM_F128, "fmodf128");

1281

1282

// With 32 condition bits, we don't need to sink (and duplicate) compares

1283

// aggressively in CodeGenPrep.

1284

if (Subtarget.useCRBits()) {

1285

setHasMultipleConditionRegisters();

1286

setJumpIsExpensive();

1287

}

1288

1289

setMinFunctionAlignment(Align(4));

1290

1291

switch (Subtarget.getCPUDirective()) {

1292

default: break;

1293

case PPC::DIR_970:

1294

case PPC::DIR_A2:

1295

case PPC::DIR_E500:

1296

case PPC::DIR_E500mc:

1297

case PPC::DIR_E5500:

1298

case PPC::DIR_PWR4:

1299

case PPC::DIR_PWR5:

1300

case PPC::DIR_PWR5X:

1301

case PPC::DIR_PWR6:

1302

case PPC::DIR_PWR6X:

1303

case PPC::DIR_PWR7:

1304

case PPC::DIR_PWR8:

1305

case PPC::DIR_PWR9:

1306

case PPC::DIR_PWR10:

1307

case PPC::DIR_PWR_FUTURE:

1308

setPrefLoopAlignment(Align(16));

1309

setPrefFunctionAlignment(Align(16));

1310

break;

1311

}

1312

1313

if (Subtarget.enableMachineScheduler())

1314

setSchedulingPreference(Sched::Source);

1315

else

1316

setSchedulingPreference(Sched::Hybrid);

1317

1318

computeRegisterProperties(STI.getRegisterInfo());

1319

1320

// The Freescale cores do better with aggressive inlining of memcpy and

1321

// friends. GCC uses same threshold of 128 bytes (= 32 word stores).

1322

if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||

1323

Subtarget.getCPUDirective() == PPC::DIR_E5500) {

1324

MaxStoresPerMemset = 32;

1325

MaxStoresPerMemsetOptSize = 16;

1326

MaxStoresPerMemcpy = 32;

1327

MaxStoresPerMemcpyOptSize = 8;

1328

MaxStoresPerMemmove = 32;

1329

MaxStoresPerMemmoveOptSize = 8;

1330

} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {

1331

// The A2 also benefits from (very) aggressive inlining of memcpy and

1332

// friends. The overhead of a the function call, even when warm, can be

1333

// over one hundred cycles.

1334

MaxStoresPerMemset = 128;

1335

MaxStoresPerMemcpy = 128;

1336

MaxStoresPerMemmove = 128;

1337

MaxLoadsPerMemcmp = 128;

1338

} else {

1339

MaxLoadsPerMemcmp = 8;

1340

MaxLoadsPerMemcmpOptSize = 4;

1341

}

1342

1343

IsStrictFPEnabled = true;

1344

1345

// Let the subtarget (CPU) decide if a predictable select is more expensive

1346

// than the corresponding branch. This information is used in CGP to decide

1347

// when to convert selects into branches.

1348

PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();

1349

}

1350

1351

/// getMaxByValAlign - Helper for getByValTypeAlignment to determine

1352

/// the desired ByVal argument alignment.

1353

static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {

1354

if (MaxAlign == MaxMaxAlign)

1355

return;

1356

if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {

1357

if (MaxMaxAlign >= 32 &&

1358

VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)

1359

MaxAlign = Align(32);

1360

else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&

1361

MaxAlign < 16)

1362

MaxAlign = Align(16);

1363

} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {

1364

Align EltAlign;

1365

getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);

1366

if (EltAlign > MaxAlign)

1367

MaxAlign = EltAlign;

1368

} else if (StructType *STy = dyn_cast<StructType>(Ty)) {

1369

for (auto *EltTy : STy->elements()) {

1370

Align EltAlign;

1371

getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);

1372

if (EltAlign > MaxAlign)

1373

MaxAlign = EltAlign;

1374

if (MaxAlign == MaxMaxAlign)

1375

break;

1376

}

1377

}

1378

}

1379

1380

/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate

1381

/// function arguments in the caller parameter area.

1382

unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,

1383

const DataLayout &DL) const {

1384

// 16byte and wider vectors are passed on 16byte boundary.

1385

// The rest is 8 on PPC64 and 4 on PPC32 boundary.

1386

Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);

1387

if (Subtarget.hasAltivec())

1388

getMaxByValAlign(Ty, Alignment, Align(16));

1389

return Alignment.value();

1390

}

1391

1392

bool PPCTargetLowering::useSoftFloat() const {

1393

return Subtarget.useSoftFloat();

1394

}

1395

1396

bool PPCTargetLowering::hasSPE() const {

1397

return Subtarget.hasSPE();

1398

}

1399

1400

bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {

1401

return VT.isScalarInteger();

1402

}

1403

1404

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

1405

switch ((PPCISD::NodeType)Opcode) {

1406

case PPCISD::FIRST_NUMBER: break;

1407

case PPCISD::FSEL: return "PPCISD::FSEL";

1408

case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";

1409

case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";

1410

case PPCISD::FCFID: return "PPCISD::FCFID";

1411

case PPCISD::FCFIDU: return "PPCISD::FCFIDU";

1412

case PPCISD::FCFIDS: return "PPCISD::FCFIDS";

1413

case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";

1414

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

1415

case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";

1416

case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";

1417

case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";

1418

case PPCISD::FP_TO_UINT_IN_VSR:

1419

return "PPCISD::FP_TO_UINT_IN_VSR,";

1420

case PPCISD::FP_TO_SINT_IN_VSR:

1421

return "PPCISD::FP_TO_SINT_IN_VSR";

1422

case PPCISD::FRE: return "PPCISD::FRE";

1423

case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";

1424

case PPCISD::STFIWX: return "PPCISD::STFIWX";

1425

case PPCISD::VPERM: return "PPCISD::VPERM";

1426

case PPCISD::XXSPLT: return "PPCISD::XXSPLT";

1427

case PPCISD::XXSPLTI_SP_TO_DP:

1428

return "PPCISD::XXSPLTI_SP_TO_DP";

1429

case PPCISD::XXSPLTI32DX:

1430

return "PPCISD::XXSPLTI32DX";

1431

case PPCISD::VECINSERT: return "PPCISD::VECINSERT";

1432

case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";

1433

case PPCISD::VECSHL: return "PPCISD::VECSHL";

1434

case PPCISD::CMPB: return "PPCISD::CMPB";

1435

case PPCISD::Hi: return "PPCISD::Hi";

1436

case PPCISD::Lo: return "PPCISD::Lo";

1437

case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";

1438

case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";

1439

case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";

1440

case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";

1441

case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";

1442

case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";

1443

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

1444

case PPCISD::SRL: return "PPCISD::SRL";

1445

case PPCISD::SRA: return "PPCISD::SRA";

1446

case PPCISD::SHL: return "PPCISD::SHL";

1447

case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";

1448

case PPCISD::CALL: return "PPCISD::CALL";

1449

case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";

1450

case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";

1451

case PPCISD::MTCTR: return "PPCISD::MTCTR";

1452

case PPCISD::BCTRL: return "PPCISD::BCTRL";

1453

case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";

1454

case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";

1455

case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";

1456

case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";

1457

case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";

1458

case PPCISD::MFOCRF: return "PPCISD::MFOCRF";

1459

case PPCISD::MFVSR: return "PPCISD::MFVSR";

1460

case PPCISD::MTVSRA: return "PPCISD::MTVSRA";

1461

case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";

1462

case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";

1463

case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";

1464

case PPCISD::SCALAR_TO_VECTOR_PERMUTED:

1465

return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";

1466

case PPCISD::ANDI_rec_1_EQ_BIT:

1467

return "PPCISD::ANDI_rec_1_EQ_BIT";

1468

case PPCISD::ANDI_rec_1_GT_BIT:

1469

return "PPCISD::ANDI_rec_1_GT_BIT";

1470

case PPCISD::VCMP: return "PPCISD::VCMP";

1471

case PPCISD::VCMPo: return "PPCISD::VCMPo";

1472

case PPCISD::LBRX: return "PPCISD::LBRX";

1473

case PPCISD::STBRX: return "PPCISD::STBRX";

1474

case PPCISD::LFIWAX: return "PPCISD::LFIWAX";

1475

case PPCISD::LFIWZX: return "PPCISD::LFIWZX";

1476

case PPCISD::LXSIZX: return "PPCISD::LXSIZX";

1477

case PPCISD::STXSIX: return "PPCISD::STXSIX";

1478

case PPCISD::VEXTS: return "PPCISD::VEXTS";

1479

case PPCISD::LXVD2X: return "PPCISD::LXVD2X";

1480

case PPCISD::STXVD2X: return "PPCISD::STXVD2X";

1481

case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";

1482

case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";

1483

case PPCISD::ST_VSR_SCAL_INT:

1484

return "PPCISD::ST_VSR_SCAL_INT";

1485

case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";

1486

case PPCISD::BDNZ: return "PPCISD::BDNZ";

1487

case PPCISD::BDZ: return "PPCISD::BDZ";

1488

case PPCISD::MFFS: return "PPCISD::MFFS";

1489

case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";

1490

case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";

1491

case PPCISD::CR6SET: return "PPCISD::CR6SET";

1492

case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";

1493

case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";

1494

case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";

1495

case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";

1496

case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";

1497

case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";

1498

case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";

1499

case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";

1500

case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";

1501

case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";

1502

case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";

1503

case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";

1504

case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";

1505

case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";

1506

case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";

1507

case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";

1508

case PPCISD::PADDI_DTPREL:

1509

return "PPCISD::PADDI_DTPREL";

1510

case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";

1511

case PPCISD::SC: return "PPCISD::SC";

1512

case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";

1513

case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";

1514

case PPCISD::RFEBB: return "PPCISD::RFEBB";

1515

case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";

1516

case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";

1517

case PPCISD::VABSD: return "PPCISD::VABSD";

1518

case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";

1519

case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";

1520

case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";

1521

case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";

1522

case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";

1523

case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";

1524

case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";

1525

case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:

1526

return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";

1527

case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:

1528

return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";

1529

case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";

1530

case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";

1531

case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";

1532

case PPCISD::XXMFACC: return "PPCISD::XXMFACC";

1533

case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";

1534

case PPCISD::FNMSUB: return "PPCISD::FNMSUB";

1535

case PPCISD::STRICT_FADDRTZ:

1536

return "PPCISD::STRICT_FADDRTZ";

1537

case PPCISD::STRICT_FCTIDZ:

1538

return "PPCISD::STRICT_FCTIDZ";

1539

case PPCISD::STRICT_FCTIWZ:

1540

return "PPCISD::STRICT_FCTIWZ";

1541

case PPCISD::STRICT_FCTIDUZ:

1542

return "PPCISD::STRICT_FCTIDUZ";

1543

case PPCISD::STRICT_FCTIWUZ:

1544

return "PPCISD::STRICT_FCTIWUZ";

1545

case PPCISD::STRICT_FCFID:

1546

return "PPCISD::STRICT_FCFID";

1547

case PPCISD::STRICT_FCFIDU:

1548

return "PPCISD::STRICT_FCFIDU";

1549

case PPCISD::STRICT_FCFIDS:

1550

return "PPCISD::STRICT_FCFIDS";

1551

case PPCISD::STRICT_FCFIDUS:

1552

return "PPCISD::STRICT_FCFIDUS";

1553

case PPCISD::LXVRZX: return "PPCISD::LXVRZX";

1554

}

1555

return nullptr;

1556

}

1557

1558

EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,

1559

EVT VT) const {

1560

if (!VT.isVector())

1561

return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;

1562

1563

return VT.changeVectorElementTypeToInteger();

1564

}

1565

1566

bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {

1567

assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1567, __PRETTY_FUNCTION__));

1568

return true;

1569

}

1570

1571

//===----------------------------------------------------------------------===//

1572

// Node matching predicates, for use by the tblgen matching code.

1573

//===----------------------------------------------------------------------===//

1574

1575

/// isFloatingPointZero - Return true if this is 0.0 or -0.0.

1576

static bool isFloatingPointZero(SDValue Op) {

1577

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

1578

return CFP->getValueAPF().isZero();

1579

else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {

1580

// Maybe this has already been legalized into the constant pool?

1581

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))

1582

if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

1583

return CFP->getValueAPF().isZero();

1584

}

1585

return false;

1586

}

1587

1588

/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return

1589

/// true if Op is undef or if it matches the specified value.

1590

static bool isConstantOrUndef(int Op, int Val) {

1591

return Op < 0 || Op == Val;

1592

}

1593

1594

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

1595

/// VPKUHUM instruction.

1596

/// The ShuffleKind distinguishes between big-endian operations with

1597

/// two different inputs (0), either-endian operations with two identical

1598

/// inputs (1), and little-endian operations with two different inputs (2).

1599

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1600

bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1601

SelectionDAG &DAG) {

1602

bool IsLE = DAG.getDataLayout().isLittleEndian();

1603

if (ShuffleKind == 0) {

1604

if (IsLE)

1605

return false;

1606

for (unsigned i = 0; i != 16; ++i)

1607

if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))

1608

return false;

1609

} else if (ShuffleKind == 2) {

1610

if (!IsLE)

1611

return false;

1612

for (unsigned i = 0; i != 16; ++i)

1613

if (!isConstantOrUndef(N->getMaskElt(i), i*2))

1614

return false;

1615

} else if (ShuffleKind == 1) {

1616

unsigned j = IsLE ? 0 : 1;

1617

for (unsigned i = 0; i != 8; ++i)

1618

if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||

1619

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))

1620

return false;

1621

}

1622

return true;

1623

}

1624

1625

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

1626

/// VPKUWUM instruction.

1627

/// The ShuffleKind distinguishes between big-endian operations with

1628

/// two different inputs (0), either-endian operations with two identical

1629

/// inputs (1), and little-endian operations with two different inputs (2).

1630

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1631

bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1632

SelectionDAG &DAG) {

1633

bool IsLE = DAG.getDataLayout().isLittleEndian();

1634

if (ShuffleKind == 0) {

1635

if (IsLE)

1636

return false;

1637

for (unsigned i = 0; i != 16; i += 2)

1638

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||

1639

!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))

1640

return false;

1641

} else if (ShuffleKind == 2) {

1642

if (!IsLE)

1643

return false;

1644

for (unsigned i = 0; i != 16; i += 2)

1645

if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||

1646

!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))

1647

return false;

1648

} else if (ShuffleKind == 1) {

1649

unsigned j = IsLE ? 0 : 2;

1650

for (unsigned i = 0; i != 8; i += 2)

1651

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||

1652

!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||

1653

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||

1654

!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))

1655

return false;

1656

}

1657

return true;

1658

}

1659

1660

/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a

1661

/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the

1662

/// current subtarget.

1663

///

1664

/// The ShuffleKind distinguishes between big-endian operations with

1665

/// two different inputs (0), either-endian operations with two identical

1666

/// inputs (1), and little-endian operations with two different inputs (2).

1667

/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

1668

bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

1669

SelectionDAG &DAG) {

1670

const PPCSubtarget& Subtarget =

1671

static_cast<const PPCSubtarget&>(DAG.getSubtarget());

1672

if (!Subtarget.hasP8Vector())

1673

return false;

1674

1675

bool IsLE = DAG.getDataLayout().isLittleEndian();

1676

if (ShuffleKind == 0) {

1677

if (IsLE)

1678

return false;

1679

for (unsigned i = 0; i != 16; i += 4)

1680

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||

1681

!isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||

1682

!isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||

1683

!isConstantOrUndef(N->getMaskElt(i+3), i*2+7))

1684

return false;

1685

} else if (ShuffleKind == 2) {

1686

if (!IsLE)

1687

return false;

1688

for (unsigned i = 0; i != 16; i += 4)

1689

if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||

1690

!isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||

1691

!isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||

1692

!isConstantOrUndef(N->getMaskElt(i+3), i*2+3))

1693

return false;

1694

} else if (ShuffleKind == 1) {

1695

unsigned j = IsLE ? 0 : 4;

1696

for (unsigned i = 0; i != 8; i += 4)

1697

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||

1698

!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||

1699

!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||

1700

!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||

1701

!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||

1702

!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||

1703

!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||

1704

!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))

1705

return false;

1706

}

1707

return true;

1708

}

1709

1710

/// isVMerge - Common function, used to match vmrg* shuffles.

1711

///

1712

static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,

1713

unsigned LHSStart, unsigned RHSStart) {

1714

if (N->getValueType(0) != MVT::v16i8)

1715

return false;

1716

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__))

1717

"Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__));

1718

1719

for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units

1720

for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit

1721

if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),

1722

LHSStart+j+i*UnitSize) ||

1723

!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),

1724

RHSStart+j+i*UnitSize))

1725

return false;

1726

}

1727

return true;

1728

}

1729

1730

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

1731

/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).

1732

/// The ShuffleKind distinguishes between big-endian merges with two

1733

/// different inputs (0), either-endian merges with two identical inputs (1),

1734

/// and little-endian merges with two different inputs (2). For the latter,

1735

/// the input operands are swapped (see PPCInstrAltivec.td).

1736

bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

1737

unsigned ShuffleKind, SelectionDAG &DAG) {

1738

if (DAG.getDataLayout().isLittleEndian()) {

1739

if (ShuffleKind == 1) // unary

1740

return isVMerge(N, UnitSize, 0, 0);

1741

else if (ShuffleKind == 2) // swapped

1742

return isVMerge(N, UnitSize, 0, 16);

1743

else

1744

return false;

1745

} else {

1746

if (ShuffleKind == 1) // unary

1747

return isVMerge(N, UnitSize, 8, 8);

1748

else if (ShuffleKind == 0) // normal

1749

return isVMerge(N, UnitSize, 8, 24);

1750

else

1751

return false;

1752

}

1753

}

1754

1755

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

1756

/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).

1757

/// The ShuffleKind distinguishes between big-endian merges with two

1758

/// different inputs (0), either-endian merges with two identical inputs (1),

1759

/// and little-endian merges with two different inputs (2). For the latter,

1760

/// the input operands are swapped (see PPCInstrAltivec.td).

1761

bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

1762

unsigned ShuffleKind, SelectionDAG &DAG) {

1763

if (DAG.getDataLayout().isLittleEndian()) {

1764

if (ShuffleKind == 1) // unary

1765

return isVMerge(N, UnitSize, 8, 8);

1766

else if (ShuffleKind == 2) // swapped

1767

return isVMerge(N, UnitSize, 8, 24);

1768

else

1769

return false;

1770

} else {

1771

if (ShuffleKind == 1) // unary

1772

return isVMerge(N, UnitSize, 0, 0);

1773

else if (ShuffleKind == 0) // normal

1774

return isVMerge(N, UnitSize, 0, 16);

1775

else

1776

return false;

1777

}

1778

}

1779

1780

/**

1781

* Common function used to match vmrgew and vmrgow shuffles

1782

*

1783

* The indexOffset determines whether to look for even or odd words in

1784

* the shuffle mask. This is based on the of the endianness of the target

1785

* machine.

1786

* - Little Endian:

1787

* - Use offset of 0 to check for odd elements

1788

* - Use offset of 4 to check for even elements

1789

* - Big Endian:

1790

* - Use offset of 0 to check for even elements

1791

* - Use offset of 4 to check for odd elements

1792

* A detailed description of the vector element ordering for little endian and

1793

* big endian can be found at

1794

* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html

1795

* Targeting your applications - what little endian and big endian IBM XL C/C++

1796

* compiler differences mean to you

1797

*

1798

* The mask to the shuffle vector instruction specifies the indices of the

1799

* elements from the two input vectors to place in the result. The elements are

1800

* numbered in array-access order, starting with the first vector. These vectors

1801

* are always of type v16i8, thus each vector will contain 16 elements of size

1802

* 8. More info on the shuffle vector can be found in the

1803

* http://llvm.org/docs/LangRef.html#shufflevector-instruction

1804

* Language Reference.

1805

*

1806

* The RHSStartValue indicates whether the same input vectors are used (unary)

1807

* or two different input vectors are used, based on the following:

1808

* - If the instruction uses the same vector for both inputs, the range of the

1809

* indices will be 0 to 15. In this case, the RHSStart value passed should

1810

* be 0.

1811

* - If the instruction has two different vectors then the range of the

1812

* indices will be 0 to 31. In this case, the RHSStart value passed should

1813

* be 16 (indices 0-15 specify elements in the first vector while indices 16

1814

* to 31 specify elements in the second vector).

1815

*

1816

* \param[in] N The shuffle vector SD Node to analyze

1817

* \param[in] IndexOffset Specifies whether to look for even or odd elements

1818

* \param[in] RHSStartValue Specifies the starting index for the righthand input

1819

* vector to the shuffle_vector instruction

1820

* \return true iff this shuffle vector represents an even or odd word merge

1821

*/

1822

static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,

1823

unsigned RHSStartValue) {

1824

if (N->getValueType(0) != MVT::v16i8)

1825

return false;

1826

1827

for (unsigned i = 0; i < 2; ++i)

1828

for (unsigned j = 0; j < 4; ++j)

1829

if (!isConstantOrUndef(N->getMaskElt(i*4+j),

1830

i*RHSStartValue+j+IndexOffset) ||

1831

!isConstantOrUndef(N->getMaskElt(i*4+j+8),

1832

i*RHSStartValue+j+IndexOffset+8))

1833

return false;

1834

return true;

1835

}

1836

1837

/**

1838

* Determine if the specified shuffle mask is suitable for the vmrgew or

1839

* vmrgow instructions.

1840

*

1841

* \param[in] N The shuffle vector SD Node to analyze

1842

* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)

1843

* \param[in] ShuffleKind Identify the type of merge:

1844

* - 0 = big-endian merge with two different inputs;

1845

* - 1 = either-endian merge with two identical inputs;

1846

* - 2 = little-endian merge with two different inputs (inputs are swapped for

1847

* little-endian merges).

1848

* \param[in] DAG The current SelectionDAG

1849

* \return true iff this shuffle mask

1850

*/

1851

bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,

1852

unsigned ShuffleKind, SelectionDAG &DAG) {

1853

if (DAG.getDataLayout().isLittleEndian()) {

1854

unsigned indexOffset = CheckEven ? 4 : 0;

1855

if (ShuffleKind == 1) // Unary

1856

return isVMerge(N, indexOffset, 0);

1857

else if (ShuffleKind == 2) // swapped

1858

return isVMerge(N, indexOffset, 16);

1859

else

1860

return false;

1861

}

1862

else {

1863

unsigned indexOffset = CheckEven ? 0 : 4;

1864

if (ShuffleKind == 1) // Unary

1865

return isVMerge(N, indexOffset, 0);

1866

else if (ShuffleKind == 0) // Normal

1867

return isVMerge(N, indexOffset, 16);

1868

else

1869

return false;

1870

}

1871

return false;

1872

}

1873

1874

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

1875

/// amount, otherwise return -1.

1876

/// The ShuffleKind distinguishes between big-endian operations with two

1877

/// different inputs (0), either-endian operations with two identical inputs

1878

/// (1), and little-endian operations with two different inputs (2). For the

1879

/// latter, the input operands are swapped (see PPCInstrAltivec.td).

1880

int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,

1881

SelectionDAG &DAG) {

1882

if (N->getValueType(0) != MVT::v16i8)

1883

return -1;

1884

1885

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

1886

1887

// Find the first non-undef value in the shuffle mask.

1888

unsigned i;

1889

for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)

1890

/*search*/;

1891

1892

if (i == 16) return -1; // all undef.

1893

1894

// Otherwise, check to see if the rest of the elements are consecutively

1895

// numbered from this value.

1896

unsigned ShiftAmt = SVOp->getMaskElt(i);

1897

if (ShiftAmt < i) return -1;

1898

1899

ShiftAmt -= i;

1900

bool isLE = DAG.getDataLayout().isLittleEndian();

1901

1902

if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {

1903

// Check the rest of the elements to see if they are consecutive.

1904

for (++i; i != 16; ++i)

1905

if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))

1906

return -1;

1907

} else if (ShuffleKind == 1) {

1908

// Check the rest of the elements to see if they are consecutive.

1909

for (++i; i != 16; ++i)

1910

if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))

1911

return -1;

1912

} else

1913

return -1;

1914

1915

if (isLE)

1916

ShiftAmt = 16 - ShiftAmt;

1917

1918

return ShiftAmt;

1919

}

1920

1921

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand

1922

/// specifies a splat of a single element that is suitable for input to

1923

/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).

1924

bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {

1925

assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__))

1926

EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__));

1927

1928

// The consecutive indices need to specify an element, not part of two

1929

// different elements. So abandon ship early if this isn't the case.

1930

if (N->getMaskElt(0) % EltSize != 0)

1931

return false;

1932

1933

// This is a splat operation if each element of the permute is the same, and

1934

// if the value doesn't reference the second vector.

1935

unsigned ElementBase = N->getMaskElt(0);

1936

1937

// FIXME: Handle UNDEF elements too!

1938

if (ElementBase >= 16)

1939

return false;

1940

1941

// Check that the indices are consecutive, in the case of a multi-byte element

1942

// splatted with a v16i8 mask.

1943

for (unsigned i = 1; i != EltSize; ++i)

1944

if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))

1945

return false;

1946

1947

for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {

1948

if (N->getMaskElt(i) < 0) continue;

1949

for (unsigned j = 0; j != EltSize; ++j)

1950

if (N->getMaskElt(i+j) != N->getMaskElt(j))

1951

return false;

1952

}

1953

return true;

1954

}

1955

1956

/// Check that the mask is shuffling N byte elements. Within each N byte

1957

/// element of the mask, the indices could be either in increasing or

1958

/// decreasing order as long as they are consecutive.

1959

/// \param[in] N the shuffle vector SD Node to analyze

1960

/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/

1961

/// Word/DoubleWord/QuadWord).

1962

/// \param[in] StepLen the delta indices number among the N byte element, if

1963

/// the mask is in increasing/decreasing order then it is 1/-1.

1964

/// \return true iff the mask is shuffling N byte elements.

1965

static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,

1966

int StepLen) {

1967

assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__))

1968

"Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__));

1969

assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1969, __PRETTY_FUNCTION__));

1970

1971

unsigned NumOfElem = 16 / Width;

1972

unsigned MaskVal[16]; // Width is never greater than 16

1973

for (unsigned i = 0; i < NumOfElem; ++i) {

1974

MaskVal[0] = N->getMaskElt(i * Width);

1975

if ((StepLen == 1) && (MaskVal[0] % Width)) {

1976

return false;

1977

} else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {

1978

return false;

1979

}

1980

1981

for (unsigned int j = 1; j < Width; ++j) {

1982

MaskVal[j] = N->getMaskElt(i * Width + j);

1983

if (MaskVal[j] != MaskVal[j-1] + StepLen) {

1984

return false;

1985

}

1986

}

1987

}

1988

1989

return true;

1990

}

1991

1992

bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,

1993

unsigned &InsertAtByte, bool &Swap, bool IsLE) {

1994

if (!isNByteElemShuffleMask(N, 4, 1))

1995

return false;

1996

1997

// Now we look at mask elements 0,4,8,12

1998

unsigned M0 = N->getMaskElt(0) / 4;

1999

unsigned M1 = N->getMaskElt(4) / 4;

2000

unsigned M2 = N->getMaskElt(8) / 4;

2001

unsigned M3 = N->getMaskElt(12) / 4;

2002

unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };

2003

unsigned BigEndianShifts[] = { 3, 0, 1, 2 };

2004

2005

// Below, let H and L be arbitrary elements of the shuffle mask

2006

// where H is in the range [4,7] and L is in the range [0,3].

2007

// H, 1, 2, 3 or L, 5, 6, 7

2008

if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||

2009

(M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {

2010

ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];

2011

InsertAtByte = IsLE ? 12 : 0;

2012

Swap = M0 < 4;

2013

return true;

2014

}

2015

// 0, H, 2, 3 or 4, L, 6, 7

2016

if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||

2017

(M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {

2018

ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];

2019

InsertAtByte = IsLE ? 8 : 4;

2020

Swap = M1 < 4;

2021

return true;

2022

}

2023

// 0, 1, H, 3 or 4, 5, L, 7

2024

if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||

2025

(M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {

2026

ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];

2027

InsertAtByte = IsLE ? 4 : 8;

2028

Swap = M2 < 4;

2029

return true;

2030

}

2031

// 0, 1, 2, H or 4, 5, 6, L

2032

if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||

2033

(M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {

2034

ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];

2035

InsertAtByte = IsLE ? 0 : 12;

2036

Swap = M3 < 4;

2037

return true;

2038

}

2039

2040

// If both vector operands for the shuffle are the same vector, the mask will

2041

// contain only elements from the first one and the second one will be undef.

2042

if (N->getOperand(1).isUndef()) {

2043

ShiftElts = 0;

2044

Swap = true;

2045

unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;

2046

if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {

2047

InsertAtByte = IsLE ? 12 : 0;

2048

return true;

2049

}

2050

if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {

2051

InsertAtByte = IsLE ? 8 : 4;

2052

return true;

2053

}

2054

if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {

2055

InsertAtByte = IsLE ? 4 : 8;

2056

return true;

2057

}

2058

if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {

2059

InsertAtByte = IsLE ? 0 : 12;

2060

return true;

2061

}

2062

}

2063

2064

return false;

2065

}

2066

2067

bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,

2068

bool &Swap, bool IsLE) {

2069

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2069, __PRETTY_FUNCTION__));

2070

// Ensure each byte index of the word is consecutive.

2071

if (!isNByteElemShuffleMask(N, 4, 1))

2072

return false;

2073

2074

// Now we look at mask elements 0,4,8,12, which are the beginning of words.

2075

unsigned M0 = N->getMaskElt(0) / 4;

2076

unsigned M1 = N->getMaskElt(4) / 4;

2077

unsigned M2 = N->getMaskElt(8) / 4;

2078

unsigned M3 = N->getMaskElt(12) / 4;

2079

2080

// If both vector operands for the shuffle are the same vector, the mask will

2081

// contain only elements from the first one and the second one will be undef.

2082

if (N->getOperand(1).isUndef()) {

2083

assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2083, __PRETTY_FUNCTION__));

2084

if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)

2085

return false;

2086

2087

ShiftElts = IsLE ? (4 - M0) % 4 : M0;

2088

Swap = false;

2089

return true;

2090

}

2091

2092

// Ensure each word index of the ShuffleVector Mask is consecutive.

2093

if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)

2094

return false;

2095

2096

if (IsLE) {

2097

if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {

2098

// Input vectors don't need to be swapped if the leading element

2099

// of the result is one of the 3 left elements of the second vector

2100

// (or if there is no shift to be done at all).

2101

Swap = false;

2102

ShiftElts = (8 - M0) % 8;

2103

} else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {

2104

// Input vectors need to be swapped if the leading element

2105

// of the result is one of the 3 left elements of the first vector

2106

// (or if we're shifting by 4 - thereby simply swapping the vectors).

2107

Swap = true;

2108

ShiftElts = (4 - M0) % 4;

2109

}

2110

2111

return true;

2112

} else { // BE

2113

if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {

2114

// Input vectors don't need to be swapped if the leading element

2115

// of the result is one of the 4 elements of the first vector.

2116

Swap = false;

2117

ShiftElts = M0;

2118

} else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {

2119

// Input vectors need to be swapped if the leading element

2120

// of the result is one of the 4 elements of the right vector.

2121

Swap = true;

2122

ShiftElts = M0 - 4;

2123

}

2124

2125

return true;

2126

}

2127

}

2128

2129

bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {

2130

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2130, __PRETTY_FUNCTION__));

2131

2132

if (!isNByteElemShuffleMask(N, Width, -1))

2133

return false;

2134

2135

for (int i = 0; i < 16; i += Width)

2136

if (N->getMaskElt(i) != i + Width - 1)

2137

return false;

2138

2139

return true;

2140

}

2141

2142

bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {

2143

return isXXBRShuffleMaskHelper(N, 2);

2144

}

2145

2146

bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {

2147

return isXXBRShuffleMaskHelper(N, 4);

2148

}

2149

2150

bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {

2151

return isXXBRShuffleMaskHelper(N, 8);

2152

}

2153

2154

bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {

2155

return isXXBRShuffleMaskHelper(N, 16);

2156

}

2157

2158

/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap

2159

/// if the inputs to the instruction should be swapped and set \p DM to the

2160

/// value for the immediate.

2161

/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI

2162

/// AND element 0 of the result comes from the first input (LE) or second input

2163

/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.

2164

/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle

2165

/// mask.

2166

bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,

2167

bool &Swap, bool IsLE) {

2168

assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__));

2169

2170

// Ensure each byte index of the double word is consecutive.

2171

if (!isNByteElemShuffleMask(N, 8, 1))

2172

return false;

2173

2174

unsigned M0 = N->getMaskElt(0) / 8;

2175

unsigned M1 = N->getMaskElt(8) / 8;

2176

assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2176, __PRETTY_FUNCTION__));

2177

2178

// If both vector operands for the shuffle are the same vector, the mask will

2179

// contain only elements from the first one and the second one will be undef.

2180

if (N->getOperand(1).isUndef()) {

2181

if ((M0 | M1) < 2) {

2182

DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);

2183

Swap = false;

2184

return true;

2185

} else

2186

return false;

2187

}

2188

2189

if (IsLE) {

2190

if (M0 > 1 && M1 < 2) {

2191

Swap = false;

2192

} else if (M0 < 2 && M1 > 1) {

2193

M0 = (M0 + 2) % 4;

2194

M1 = (M1 + 2) % 4;

2195

Swap = true;

2196

} else

2197

return false;

2198

2199

// Note: if control flow comes here that means Swap is already set above

2200

DM = (((~M1) & 1) << 1) + ((~M0) & 1);

2201

return true;

2202

} else { // BE

2203

if (M0 < 2 && M1 > 1) {

2204

Swap = false;

2205

} else if (M0 > 1 && M1 < 2) {

2206

M0 = (M0 + 2) % 4;

2207

M1 = (M1 + 2) % 4;

2208

Swap = true;

2209

} else

2210

return false;

2211

2212

// Note: if control flow comes here that means Swap is already set above

2213

DM = (M0 << 1) + (M1 & 1);

2214

return true;

2215

}

2216

}

2217

2218

2219

/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is

2220

/// appropriate for PPC mnemonics (which have a big endian bias - namely

2221

/// elements are counted from the left of the vector register).

2222

unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,

2223

SelectionDAG &DAG) {

2224

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

2225

assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2225, __PRETTY_FUNCTION__));

2226

if (DAG.getDataLayout().isLittleEndian())

2227

return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);

2228

else

2229

return SVOp->getMaskElt(0) / EltSize;

2230

}

2231

2232

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

2233

/// by using a vspltis[bhw] instruction of the specified element size, return

2234

/// the constant being splatted. The ByteSize field indicates the number of

2235

/// bytes of each element [124] -> [bhw].

2236

SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

2237

SDValue OpVal(nullptr, 0);

2238

2239

// If ByteSize of the splat is bigger than the element size of the

2240

// build_vector, then we have a case where we are checking for a splat where

2241

// multiple elements of the buildvector are folded together into a single

2242

// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).

2243

unsigned EltSize = 16/N->getNumOperands();

2244

if (EltSize < ByteSize) {

2245

unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.

2246

SDValue UniquedVals[4];

2247

assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2247, __PRETTY_FUNCTION__));

2248

2249

// See if all of the elements in the buildvector agree across.

2250

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

2251

if (N->getOperand(i).isUndef()) continue;

2252

// If the element isn't a constant, bail fully out.

2253

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();

2254

2255

if (!UniquedVals[i&(Multiple-1)].getNode())

2256

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

2257

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

2258

return SDValue(); // no match.

2259

}

2260

2261

// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains

2262

// either constant or undef values that are identical for each chunk. See

2263

// if these chunks can form into a larger vspltis*.

2264

2265

// Check to see if all of the leading entries are either 0 or -1. If

2266

// neither, then this won't fit into the immediate field.

2267

bool LeadingZero = true;

2268

bool LeadingOnes = true;

2269

for (unsigned i = 0; i != Multiple-1; ++i) {

2270

if (!UniquedVals[i].getNode()) continue; // Must have been undefs.

2271

2272

LeadingZero &= isNullConstant(UniquedVals[i]);

2273

LeadingOnes &= isAllOnesConstant(UniquedVals[i]);

2274

}

2275

// Finally, check the least significant entry.

2276

if (LeadingZero) {

2277

if (!UniquedVals[Multiple-1].getNode())

2278

return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef

2279

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();

2280

if (Val < 16) // 0,0,0,4 -> vspltisw(4)

2281

return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);

2282

}

2283

if (LeadingOnes) {

2284

if (!UniquedVals[Multiple-1].getNode())

2285

return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef

2286

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();

2287

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

2288

return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);

2289

}

2290

2291

return SDValue();

2292

}

2293

2294

// Check to see if this buildvec has a single non-undef value in its elements.

2295

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

2296

if (N->getOperand(i).isUndef()) continue;

2297

if (!OpVal.getNode())

2298

OpVal = N->getOperand(i);

2299

else if (OpVal != N->getOperand(i))

2300

return SDValue();

2301

}

2302

2303

if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.

2304

2305

unsigned ValSizeInBytes = EltSize;

2306

uint64_t Value = 0;

2307

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {

2308

Value = CN->getZExtValue();

2309

} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {

2310

assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2310, __PRETTY_FUNCTION__));

2311

Value = FloatToBits(CN->getValueAPF().convertToFloat());

2312

}

2313

2314

// If the splat value is larger than the element value, then we can never do

2315

// this splat. The only case that we could fit the replicated bits into our

2316

// immediate field for would be zero, and we prefer to use vxor for it.

2317

if (ValSizeInBytes < ByteSize) return SDValue();

2318

2319

// If the element value is larger than the splat value, check if it consists

2320

// of a repeated bit pattern of size ByteSize.

2321

if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))

2322

return SDValue();

2323

2324

// Properly sign extend the value.

2325

int MaskVal = SignExtend32(Value, ByteSize * 8);

2326

2327

// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.

2328

if (MaskVal == 0) return SDValue();

2329

2330

// Finally, if this value fits in a 5 bit sext field, return it

2331

if (SignExtend32<5>(MaskVal) == MaskVal)

2332

return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);

2333

return SDValue();

2334

}

2335

2336

/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift

2337

/// amount, otherwise return -1.

2338

int PPC::isQVALIGNIShuffleMask(SDNode *N) {

2339

EVT VT = N->getValueType(0);

2340

if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)

2341

return -1;

2342

2343

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

2344

2345

// Find the first non-undef value in the shuffle mask.

2346

unsigned i;

2347

for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)

2348

/*search*/;

2349

2350

if (i == 4) return -1; // all undef.

2351

2352

// Otherwise, check to see if the rest of the elements are consecutively

2353

// numbered from this value.

2354

unsigned ShiftAmt = SVOp->getMaskElt(i);

2355

if (ShiftAmt < i) return -1;

2356

ShiftAmt -= i;

2357

2358

// Check the rest of the elements to see if they are consecutive.

2359

for (++i; i != 4; ++i)

2360

if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))

2361

return -1;

2362

2363

return ShiftAmt;

2364

}

2365

2366

//===----------------------------------------------------------------------===//

2367

// Addressing Mode Selection

2368

//===----------------------------------------------------------------------===//

2369

2370

/// isIntS16Immediate - This method tests to see if the node is either a 32-bit

2371

/// or 64-bit immediate, and if the value can be accurately represented as a

2372

/// sign extension from a 16-bit value. If so, this returns true and the

2373

/// immediate.

2374

bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {

2375

if (!isa<ConstantSDNode>(N))

2376

return false;

2377

2378

Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();

2379

if (N->getValueType(0) == MVT::i32)

2380

return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();

2381

else

2382

return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();

2383

}

2384

bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {

2385

return isIntS16Immediate(Op.getNode(), Imm);

2386

}

2387

2388

2389

/// SelectAddressEVXRegReg - Given the specified address, check to see if it can

2390

/// be represented as an indexed [r+r] operation.

2391

bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,

2392

SDValue &Index,

2393

SelectionDAG &DAG) const {

2394

for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();

2395

UI != E; ++UI) {

2396

if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {

2397

if (Memop->getMemoryVT() == MVT::f64) {

2398

Base = N.getOperand(0);

2399

Index = N.getOperand(1);

2400

return true;

2401

}

2402

}

2403

}

2404

return false;

2405

}

2406

2407

/// SelectAddressRegReg - Given the specified addressed, check to see if it

2408

/// can be represented as an indexed [r+r] operation. Returns false if it

2409

/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is

2410

/// non-zero and N can be represented by a base register plus a signed 16-bit

2411

/// displacement, make a more precise judgement by checking (displacement % \p

2412

/// EncodingAlignment).

2413

bool PPCTargetLowering::SelectAddressRegReg(

2414

SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,

2415

MaybeAlign EncodingAlignment) const {

2416

// If we have a PC Relative target flag don't select as [reg+reg]. It will be

2417

// a [pc+imm].

2418

if (SelectAddressPCRel(N, Base))

2419

return false;

2420

2421

int16_t Imm = 0;

2422

if (N.getOpcode() == ISD::ADD) {

2423

// Is there any SPE load/store (f64), which can't handle 16bit offset?

2424

// SPE load/store can only handle 8-bit offsets.

2425

if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))

2426

return true;

2427

if (isIntS16Immediate(N.getOperand(1), Imm) &&

2428

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))

2429

return false; // r+i

2430

if (N.getOperand(1).getOpcode() == PPCISD::Lo)

2431

return false; // r+i

2432

2433

Base = N.getOperand(0);

2434

Index = N.getOperand(1);

2435

return true;

2436

} else if (N.getOpcode() == ISD::OR) {

2437

if (isIntS16Immediate(N.getOperand(1), Imm) &&

2438

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))

2439

return false; // r+i can fold it if we can.

2440

2441

// If this is an or of disjoint bitfields, we can codegen this as an add

2442

// (for better address arithmetic) if the LHS and RHS of the OR are provably

2443

// disjoint.

2444

KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

2445

2446

if (LHSKnown.Zero.getBoolValue()) {

2447

KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));

2448

// If all of the bits are known zero on the LHS or RHS, the add won't

2449

// carry.

2450

if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {

2451

Base = N.getOperand(0);

2452

Index = N.getOperand(1);

2453

return true;

2454

}

2455

}

2456

}

2457

2458

return false;

2459

}

2460

2461

// If we happen to be doing an i64 load or store into a stack slot that has

2462

// less than a 4-byte alignment, then the frame-index elimination may need to

2463

// use an indexed load or store instruction (because the offset may not be a

2464

// multiple of 4). The extra register needed to hold the offset comes from the

2465

// register scavenger, and it is possible that the scavenger will need to use

2466

// an emergency spill slot. As a result, we need to make sure that a spill slot

2467

// is allocated when doing an i64 load/store into a less-than-4-byte-aligned

2468

// stack slot.

2469

static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {

2470

// FIXME: This does not handle the LWA case.

2471

if (VT != MVT::i64)

2472

return;

2473

2474

// NOTE: We'll exclude negative FIs here, which come from argument

2475

// lowering, because there are no known test cases triggering this problem

2476

// using packed structures (or similar). We can remove this exclusion if

2477

// we find such a test case. The reason why this is so test-case driven is

2478

// because this entire 'fixup' is only to prevent crashes (from the

2479

// register scavenger) on not-really-valid inputs. For example, if we have:

2480

// %a = alloca i1

2481

// %b = bitcast i1* %a to i64*

2482

// store i64* a, i64 b

2483

// then the store should really be marked as 'align 1', but is not. If it

2484

// were marked as 'align 1' then the indexed form would have been

2485

// instruction-selected initially, and the problem this 'fixup' is preventing

2486

// won't happen regardless.

2487

if (FrameIdx < 0)

2488

return;

2489

2490

MachineFunction &MF = DAG.getMachineFunction();

2491

MachineFrameInfo &MFI = MF.getFrameInfo();

2492

2493

if (MFI.getObjectAlign(FrameIdx) >= Align(4))

2494

return;

2495

2496

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

2497

FuncInfo->setHasNonRISpills();

2498

}

2499

2500

/// Returns true if the address N can be represented by a base register plus

2501

/// a signed 16-bit displacement [r+imm], and if it is not better

2502

/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept

2503

/// displacements that are multiples of that value.

2504

bool PPCTargetLowering::SelectAddressRegImm(

2505

SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,

2506

MaybeAlign EncodingAlignment) const {

2507

// FIXME dl should come from parent load or store, not from address

2508

SDLoc dl(N);

2509

2510

// If we have a PC Relative target flag don't select as [reg+imm]. It will be

2511

// a [pc+imm].

2512

if (SelectAddressPCRel(N, Base))

2513

return false;

2514

2515

// If this can be more profitably realized as r+r, fail.

2516

if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))

2517

return false;

2518

2519

if (N.getOpcode() == ISD::ADD) {

2520

int16_t imm = 0;

2521

if (isIntS16Immediate(N.getOperand(1), imm) &&

2522

(!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {

2523

Disp = DAG.getTargetConstant(imm, dl, N.getValueType());

2524

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

2525

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2526

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2527

} else {

2528

Base = N.getOperand(0);

2529

}

2530

return true; // [r+i]

2531

} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {

2532

// Match LOAD (ADD (X, Lo(G))).

2533

assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))

2534

&& "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__));

2535

Disp = N.getOperand(1).getOperand(0); // The global address.

2536

assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))

2537

Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))

2538

Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))

2539

Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__));

2540

Base = N.getOperand(0);

2541

return true; // [&g+r]

2542

}

2543

} else if (N.getOpcode() == ISD::OR) {

2544

int16_t imm = 0;

2545

if (isIntS16Immediate(N.getOperand(1), imm) &&

2546

(!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {

2547

// If this is an or of disjoint bitfields, we can codegen this as an add

2548

// (for better address arithmetic) if the LHS and RHS of the OR are

2549

// provably disjoint.

2550

KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

2551

2552

if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {

2553

// If all of the bits are known zero on the LHS or RHS, the add won't

2554

// carry.

2555

if (FrameIndexSDNode *FI =

2556

dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

2557

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2558

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2559

} else {

2560

Base = N.getOperand(0);

2561

}

2562

Disp = DAG.getTargetConstant(imm, dl, N.getValueType());

2563

return true;

2564

}

2565

}

2566

} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {

2567

// Loading from a constant address.

2568

2569

// If this address fits entirely in a 16-bit sext immediate field, codegen

2570

// this as "d, 0"

2571

int16_t Imm;

2572

if (isIntS16Immediate(CN, Imm) &&

2573

(!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {

2574

Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));

2575

Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

2576

CN->getValueType(0));

2577

return true;

2578

}

2579

2580

// Handle 32-bit sext immediates with LIS + addr mode.

2581

if ((CN->getValueType(0) == MVT::i32 ||

2582

(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&

2583

(!EncodingAlignment ||

2584

isAligned(*EncodingAlignment, CN->getZExtValue()))) {

2585

int Addr = (int)CN->getZExtValue();

2586

2587

// Otherwise, break this down into an LIS + disp.

2588

Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);

2589

2590

Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,

2591

MVT::i32);

2592

unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;

2593

Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);

2594

return true;

2595

}

2596

}

2597

2598

Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));

2599

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {

2600

Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

2601

fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

2602

} else

2603

Base = N;

2604

return true; // [r+0]

2605

}

2606

2607

/// SelectAddressRegRegOnly - Given the specified addressed, force it to be

2608

/// represented as an indexed [r+r] operation.

2609

bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,

2610

SDValue &Index,

2611

SelectionDAG &DAG) const {

2612

// Check to see if we can easily represent this as an [r+r] address. This

2613

// will fail if it thinks that the address is more profitably represented as

2614

// reg+imm, e.g. where imm = 0.

2615

if (SelectAddressRegReg(N, Base, Index, DAG))

2616

return true;

2617

2618

// If the address is the result of an add, we will utilize the fact that the

2619

// address calculation includes an implicit add. However, we can reduce

2620

// register pressure if we do not materialize a constant just for use as the

2621

// index register. We only get rid of the add if it is not an add of a

2622

// value and a 16-bit signed constant and both have a single use.

2623

int16_t imm = 0;

2624

if (N.getOpcode() == ISD::ADD &&

2625

(!isIntS16Immediate(N.getOperand(1), imm) ||

2626

!N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {

2627

Base = N.getOperand(0);

2628

Index = N.getOperand(1);

2629

return true;

2630

}

2631

2632

// Otherwise, do it the hard way, using R0 as the base register.

2633

Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

2634

N.getValueType());

2635

Index = N;

2636

return true;

2637

}

2638

2639

template <typename Ty> static bool isValidPCRelNode(SDValue N) {

2640

Ty *PCRelCand = dyn_cast<Ty>(N);

2641

return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);

2642

}

2643

2644

/// Returns true if this address is a PC Relative address.

2645

/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG

2646

/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.

2647

bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {

2648

// This is a materialize PC Relative node. Always select this as PC Relative.

2649

Base = N;

2650

if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)

2651

return true;

2652

if (isValidPCRelNode<ConstantPoolSDNode>(N) ||

2653

isValidPCRelNode<GlobalAddressSDNode>(N) ||

2654

isValidPCRelNode<JumpTableSDNode>(N) ||

2655

isValidPCRelNode<BlockAddressSDNode>(N))

2656

return true;

2657

return false;

2658

}

2659

2660

/// Returns true if we should use a direct load into vector instruction

2661

/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.

2662

static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {

2663

2664

// If there are any other uses other than scalar to vector, then we should

2665

// keep it as a scalar load -> direct move pattern to prevent multiple

2666

// loads.

2667

LoadSDNode *LD = dyn_cast<LoadSDNode>(N);

2668

if (!LD)

2669

return false;

2670

2671

EVT MemVT = LD->getMemoryVT();

2672

if (!MemVT.isSimple())

2673

return false;

2674

switch(MemVT.getSimpleVT().SimpleTy) {

2675

case MVT::i64:

2676

break;

2677

case MVT::i32:

2678

if (!ST.hasP8Vector())

2679

return false;

2680

break;

2681

case MVT::i16:

2682

case MVT::i8:

2683

if (!ST.hasP9Vector())

2684

return false;

2685

break;

2686

default:

2687

return false;

2688

}

2689

2690

SDValue LoadedVal(N, 0);

2691

if (!LoadedVal.hasOneUse())

2692

return false;

2693

2694

for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();

2695

UI != UE; ++UI)

2696

if (UI.getUse().get().getResNo() == 0 &&

2697

UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&

2698

UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)

2699

return false;

2700

2701

return true;

2702

}

2703

2704

/// getPreIndexedAddressParts - returns true by value, base pointer and

2705

/// offset pointer and addressing mode by reference if the node's address

2706

/// can be legally represented as pre-indexed load / store address.

2707

bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

2708

SDValue &Offset,

2709

ISD::MemIndexedMode &AM,

2710

SelectionDAG &DAG) const {

2711

if (DisablePPCPreinc) return false;

2712

2713

bool isLoad = true;

2714

SDValue Ptr;

2715

EVT VT;

2716

unsigned Alignment;

2717

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

2718

Ptr = LD->getBasePtr();

2719

VT = LD->getMemoryVT();

2720

Alignment = LD->getAlignment();

2721

} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

2722

Ptr = ST->getBasePtr();

2723

VT = ST->getMemoryVT();

2724

Alignment = ST->getAlignment();

2725

isLoad = false;

2726

} else

2727

return false;

2728

2729

// Do not generate pre-inc forms for specific loads that feed scalar_to_vector

2730

// instructions because we can fold these into a more efficient instruction

2731

// instead, (such as LXSD).

2732

if (isLoad && usePartialVectorLoads(N, Subtarget)) {

2733

return false;

2734

}

2735

2736

// PowerPC doesn't have preinc load/store instructions for vectors

2737

if (VT.isVector())

2738

return false;

2739

2740

if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {

2741

// Common code will reject creating a pre-inc form if the base pointer

2742

// is a frame index, or if N is a store and the base pointer is either

2743

// the same as or a predecessor of the value being stored. Check for

2744

// those situations here, and try with swapped Base/Offset instead.

2745

bool Swap = false;

2746

2747

if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))

2748

Swap = true;

2749

else if (!isLoad) {

2750

SDValue Val = cast<StoreSDNode>(N)->getValue();

2751

if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))

2752

Swap = true;

2753

}

2754

2755

if (Swap)

2756

std::swap(Base, Offset);

2757

2758

AM = ISD::PRE_INC;

2759

return true;

2760

}

2761

2762

// LDU/STU can only handle immediates that are a multiple of 4.

2763

if (VT != MVT::i64) {

2764

if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))

2765

return false;

2766

} else {

2767

// LDU/STU need an address with at least 4-byte alignment.

2768

if (Alignment < 4)

2769

return false;

2770

2771

if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))

2772

return false;

2773

}

2774

2775

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

2776

// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of

2777

// sext i32 to i64 when addr mode is r+i.

2778

if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&

2779

LD->getExtensionType() == ISD::SEXTLOAD &&

2780

isa<ConstantSDNode>(Offset))

2781

return false;

2782

}

2783

2784

AM = ISD::PRE_INC;

2785

return true;

2786

}

2787

2788

//===----------------------------------------------------------------------===//

2789

// LowerOperation implementation

2790

//===----------------------------------------------------------------------===//

2791

2792

/// Return true if we should reference labels using a PICBase, set the HiOpFlags

2793

/// and LoOpFlags to the target MO flags.

2794

static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,

2795

unsigned &HiOpFlags, unsigned &LoOpFlags,

2796

const GlobalValue *GV = nullptr) {

2797

HiOpFlags = PPCII::MO_HA;

2798

LoOpFlags = PPCII::MO_LO;

2799

2800

// Don't use the pic base if not in PIC relocation model.

2801

if (IsPIC) {

2802

HiOpFlags |= PPCII::MO_PIC_FLAG;

2803

LoOpFlags |= PPCII::MO_PIC_FLAG;

2804

}

2805

}

2806

2807

static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,

2808

SelectionDAG &DAG) {

2809

SDLoc DL(HiPart);

2810

EVT PtrVT = HiPart.getValueType();

2811

SDValue Zero = DAG.getConstant(0, DL, PtrVT);

2812

2813

SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);

2814

SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);

2815

2816

// With PIC, the first instruction is actually "GR+hi(&G)".

2817

if (isPIC)

2818

Hi = DAG.getNode(ISD::ADD, DL, PtrVT,

2819

DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);

2820

2821

// Generate non-pic code that has direct accesses to the constant pool.

2822

// The address of the global is just (hi(&g)+lo(&g)).

2823

return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);

2824

}

2825

2826

static void setUsesTOCBasePtr(MachineFunction &MF) {

2827

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

2828

FuncInfo->setUsesTOCBasePtr();

2829

}

2830

2831

static void setUsesTOCBasePtr(SelectionDAG &DAG) {

2832

setUsesTOCBasePtr(DAG.getMachineFunction());

2833

}

2834

2835

SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,

2836

SDValue GA) const {

2837

const bool Is64Bit = Subtarget.isPPC64();

2838

EVT VT = Is64Bit ? MVT::i64 : MVT::i32;

2839

SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)

2840

: Subtarget.isAIXABI()

2841

? DAG.getRegister(PPC::R2, VT)

2842

: DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);

2843

SDValue Ops[] = { GA, Reg };

2844

return DAG.getMemIntrinsicNode(

2845

PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,

2846

MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,

2847

MachineMemOperand::MOLoad);

2848

}

2849

2850

SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,

2851

SelectionDAG &DAG) const {

2852

EVT PtrVT = Op.getValueType();

2853

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

2854

const Constant *C = CP->getConstVal();

2855

2856

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2857

// The actual address of the GlobalValue is stored in the TOC.

2858

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2859

if (Subtarget.isUsingPCRelativeCalls()) {

2860

SDLoc DL(CP);

2861

EVT Ty = getPointerTy(DAG.getDataLayout());

2862

SDValue ConstPool = DAG.getTargetConstantPool(

2863

C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);

2864

return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);

2865

}

2866

setUsesTOCBasePtr(DAG);

2867

SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);

2868

return getTOCEntry(DAG, SDLoc(CP), GA);

2869

}

2870

2871

unsigned MOHiFlag, MOLoFlag;

2872

bool IsPIC = isPositionIndependent();

2873

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

2874

2875

if (IsPIC && Subtarget.isSVR4ABI()) {

2876

SDValue GA =

2877

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);

2878

return getTOCEntry(DAG, SDLoc(CP), GA);

2879

}

2880

2881

SDValue CPIHi =

2882

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);

2883

SDValue CPILo =

2884

DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);

2885

return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);

2886

}

2887

2888

// For 64-bit PowerPC, prefer the more compact relative encodings.

2889

// This trades 32 bits per jump table entry for one or two instructions

2890

// on the jump site.

2891

unsigned PPCTargetLowering::getJumpTableEncoding() const {

2892

if (isJumpTableRelative())

2893

return MachineJumpTableInfo::EK_LabelDifference32;

2894

2895

return TargetLowering::getJumpTableEncoding();

2896

}

2897

2898

bool PPCTargetLowering::isJumpTableRelative() const {

2899

if (UseAbsoluteJumpTables)

2900

return false;

2901

if (Subtarget.isPPC64() || Subtarget.isAIXABI())

2902

return true;

2903

return TargetLowering::isJumpTableRelative();

2904

}

2905

2906

SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,

2907

SelectionDAG &DAG) const {

2908

if (!Subtarget.isPPC64() || Subtarget.isAIXABI())

2909

return TargetLowering::getPICJumpTableRelocBase(Table, DAG);

2910

2911

switch (getTargetMachine().getCodeModel()) {

2912

case CodeModel::Small:

2913

case CodeModel::Medium:

2914

return TargetLowering::getPICJumpTableRelocBase(Table, DAG);

2915

default:

2916

return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),

2917

getPointerTy(DAG.getDataLayout()));

2918

}

2919

}

2920

2921

const MCExpr *

2922

PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,

2923

unsigned JTI,

2924

MCContext &Ctx) const {

2925

if (!Subtarget.isPPC64() || Subtarget.isAIXABI())

2926

return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

2927

2928

switch (getTargetMachine().getCodeModel()) {

2929

case CodeModel::Small:

2930

case CodeModel::Medium:

2931

return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

2932

default:

2933

return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);

2934

}

2935

}

2936

2937

SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {

2938

EVT PtrVT = Op.getValueType();

2939

JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

2940

2941

// isUsingPCRelativeCalls() returns true when PCRelative is enabled

2942

if (Subtarget.isUsingPCRelativeCalls()) {

2943

SDLoc DL(JT);

2944

EVT Ty = getPointerTy(DAG.getDataLayout());

2945

SDValue GA =

2946

DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);

2947

SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

2948

return MatAddr;

2949

}

2950

2951

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2952

// The actual address of the GlobalValue is stored in the TOC.

2953

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2954

setUsesTOCBasePtr(DAG);

2955

SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);

2956

return getTOCEntry(DAG, SDLoc(JT), GA);

2957

}

2958

2959

unsigned MOHiFlag, MOLoFlag;

2960

bool IsPIC = isPositionIndependent();

2961

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

2962

2963

if (IsPIC && Subtarget.isSVR4ABI()) {

2964

SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,

2965

PPCII::MO_PIC_FLAG);

2966

return getTOCEntry(DAG, SDLoc(GA), GA);

2967

}

2968

2969

SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);

2970

SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);

2971

return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);

2972

}

2973

2974

SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,

2975

SelectionDAG &DAG) const {

2976

EVT PtrVT = Op.getValueType();

2977

BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);

2978

const BlockAddress *BA = BASDN->getBlockAddress();

2979

2980

// isUsingPCRelativeCalls() returns true when PCRelative is enabled

2981

if (Subtarget.isUsingPCRelativeCalls()) {

2982

SDLoc DL(BASDN);

2983

EVT Ty = getPointerTy(DAG.getDataLayout());

2984

SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),

2985

PPCII::MO_PCREL_FLAG);

2986

SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

2987

return MatAddr;

2988

}

2989

2990

// 64-bit SVR4 ABI and AIX ABI code are always position-independent.

2991

// The actual BlockAddress is stored in the TOC.

2992

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

2993

setUsesTOCBasePtr(DAG);

2994

SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());

2995

return getTOCEntry(DAG, SDLoc(BASDN), GA);

2996

}

2997

2998

// 32-bit position-independent ELF stores the BlockAddress in the .got.

2999

if (Subtarget.is32BitELFABI() && isPositionIndependent())

3000

return getTOCEntry(

3001

DAG, SDLoc(BASDN),

3002

DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));

3003

3004

unsigned MOHiFlag, MOLoFlag;

3005

bool IsPIC = isPositionIndependent();

3006

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

3007

SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);

3008

SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);

3009

return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);

3010

}

3011

3012

SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,

3013

SelectionDAG &DAG) const {

3014

// FIXME: TLS addresses currently use medium model code sequences,

3015

// which is the most useful form. Eventually support for small and

3016

// large models could be added if users need it, at the cost of

3017

// additional complexity.

3018

if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)

3019

report_fatal_error("Thread local storage is not supported with pc-relative"

3020

" addressing - please compile with -mno-pcrel");

3021

GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

3022

if (DAG.getTarget().useEmulatedTLS())

3023

return LowerToTLSEmulatedModel(GA, DAG);

3024

3025

SDLoc dl(GA);

3026

const GlobalValue *GV = GA->getGlobal();

3027

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3028

bool is64bit = Subtarget.isPPC64();

3029

const Module *M = DAG.getMachineFunction().getFunction().getParent();

3030

PICLevel::Level picLevel = M->getPICLevel();

3031

3032

const TargetMachine &TM = getTargetMachine();

3033

TLSModel::Model Model = TM.getTLSModel(GV);

3034

3035

if (Model == TLSModel::LocalExec) {

3036

if (Subtarget.isUsingPCRelativeCalls()) {

3037

SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);

3038

SDValue TGA = DAG.getTargetGlobalAddress(

3039

GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));

3040

SDValue MatAddr =

3041

DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);

3042

return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);

3043

}

3044

3045

SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3046

PPCII::MO_TPREL_HA);

3047

SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3048

PPCII::MO_TPREL_LO);

3049

SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)

3050

: DAG.getRegister(PPC::R2, MVT::i32);

3051

3052

SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);

3053

return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);

3054

}

3055

3056

if (Model == TLSModel::InitialExec) {

3057

bool IsPCRel = Subtarget.isUsingPCRelativeCalls();

3058

SDValue TGA = DAG.getTargetGlobalAddress(

3059

GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);

3060

SDValue TGATLS = DAG.getTargetGlobalAddress(

3061

GV, dl, PtrVT, 0,

3062

IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);

3063

SDValue TPOffset;

3064

if (IsPCRel) {

3065

SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);

3066

TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,

3067

MachinePointerInfo());

3068

} else {

3069

SDValue GOTPtr;

3070

if (is64bit) {

3071

setUsesTOCBasePtr(DAG);

3072

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3073

GOTPtr =

3074

DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);

3075

} else {

3076

if (!TM.isPositionIndependent())

3077

GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);

3078

else if (picLevel == PICLevel::SmallPIC)

3079

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3080

else

3081

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3082

}

3083

TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);

3084

}

3085

return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);

3086

}

3087

3088

if (Model == TLSModel::GeneralDynamic) {

3089

if (Subtarget.isUsingPCRelativeCalls()) {

3090

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3091

PPCII::MO_GOT_TLSGD_PCREL_FLAG);

3092

return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);

3093

}

3094

3095

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);

3096

SDValue GOTPtr;

3097

if (is64bit) {

3098

setUsesTOCBasePtr(DAG);

3099

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3100

GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,

3101

GOTReg, TGA);

3102

} else {

3103

if (picLevel == PICLevel::SmallPIC)

3104

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3105

else

3106

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3107

}

3108

return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,

3109

GOTPtr, TGA, TGA);

3110

}

3111

3112

if (Model == TLSModel::LocalDynamic) {

3113

if (Subtarget.isUsingPCRelativeCalls()) {

3114

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

3115

PPCII::MO_GOT_TLSLD_PCREL_FLAG);

3116

SDValue MatPCRel =

3117

DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);

3118

return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);

3119

}

3120

3121

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);

3122

SDValue GOTPtr;

3123

if (is64bit) {

3124

setUsesTOCBasePtr(DAG);

3125

SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

3126

GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,

3127

GOTReg, TGA);

3128

} else {

3129

if (picLevel == PICLevel::SmallPIC)

3130

GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);

3131

else

3132

GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);

3133

}

3134

SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,

3135

PtrVT, GOTPtr, TGA, TGA);

3136

SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,

3137

PtrVT, TLSAddr, TGA);

3138

return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);

3139

}

3140

3141

llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3141);

3142

}

3143

3144

SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

3145

SelectionDAG &DAG) const {

3146

EVT PtrVT = Op.getValueType();

3147

GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);

3148

SDLoc DL(GSDN);

3149

const GlobalValue *GV = GSDN->getGlobal();

3150

3151

// 64-bit SVR4 ABI & AIX ABI code is always position-independent.

3152

// The actual address of the GlobalValue is stored in the TOC.

3153

if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {

3154

if (Subtarget.isUsingPCRelativeCalls()) {

3155

EVT Ty = getPointerTy(DAG.getDataLayout());

3156

if (isAccessedAsGotIndirect(Op)) {

3157

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),

3158

PPCII::MO_PCREL_FLAG |

3159

PPCII::MO_GOT_FLAG);

3160

SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

3161

SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,

3162

MachinePointerInfo());

3163

return Load;

3164

} else {

3165

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),

3166

PPCII::MO_PCREL_FLAG);

3167

return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);

3168

}

3169

}

3170

setUsesTOCBasePtr(DAG);

3171

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());

3172

return getTOCEntry(DAG, DL, GA);

3173

}

3174

3175

unsigned MOHiFlag, MOLoFlag;

3176

bool IsPIC = isPositionIndependent();

3177

getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);

3178

3179

if (IsPIC && Subtarget.isSVR4ABI()) {

3180

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,

3181

GSDN->getOffset(),

3182

PPCII::MO_PIC_FLAG);

3183

return getTOCEntry(DAG, DL, GA);

3184

}

3185

3186

SDValue GAHi =

3187

DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);

3188

SDValue GALo =

3189

DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);

3190

3191

return LowerLabelRef(GAHi, GALo, IsPIC, DAG);

3192

}

3193

3194

SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

3195

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

3196

SDLoc dl(Op);

3197

3198

if (Op.getValueType() == MVT::v2i64) {

3199

// When the operands themselves are v2i64 values, we need to do something

3200

// special because VSX has no underlying comparison operations for these.

3201

if (Op.getOperand(0).getValueType() == MVT::v2i64) {

3202

// Equality can be handled by casting to the legal type for Altivec

3203

// comparisons, everything else needs to be expanded.

3204

if (CC == ISD::SETEQ || CC == ISD::SETNE) {

3205

return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,

3206

DAG.getSetCC(dl, MVT::v4i32,

3207

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),

3208

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),

3209

CC));

3210

}

3211

3212

return SDValue();

3213

}

3214

3215

// We handle most of these in the usual way.

3216

return Op;

3217

}

3218

3219

// If we're comparing for equality to zero, expose the fact that this is

3220

// implemented as a ctlz/srl pair on ppc, so that the dag combiner can

3221

// fold the new nodes.

3222

if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))

3223

return V;

3224

3225

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

3226

// Leave comparisons against 0 and -1 alone for now, since they're usually

3227

// optimized. FIXME: revisit this when we can custom lower all setcc

3228

// optimizations.

3229

if (C->isAllOnesValue() || C->isNullValue())

3230

return SDValue();

3231

}

3232

3233

// If we have an integer seteq/setne, turn it into a compare against zero

3234

// by xor'ing the rhs with the lhs, which is faster than setting a

3235

// condition register, reading it back out, and masking the correct bit. The

3236

// normal approach here uses sub to do this instead of xor. Using xor exposes

3237

// the result to other bit-twiddling opportunities.

3238

EVT LHSVT = Op.getOperand(0).getValueType();

3239

if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

3240

EVT VT = Op.getValueType();

3241

SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),

3242

Op.getOperand(1));

3243

return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);

3244

}

3245

return SDValue();

3246

}

3247

3248

SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {

3249

SDNode *Node = Op.getNode();

3250

EVT VT = Node->getValueType(0);

3251

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3252

SDValue InChain = Node->getOperand(0);

3253

SDValue VAListPtr = Node->getOperand(1);

3254

const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();

3255

SDLoc dl(Node);

3256

3257

assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3257, __PRETTY_FUNCTION__));

3258

3259

// gpr_index

3260

SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,

3261

VAListPtr, MachinePointerInfo(SV), MVT::i8);

3262

InChain = GprIndex.getValue(1);

3263

3264

if (VT == MVT::i64) {

3265

// Check if GprIndex is even

3266

SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,

3267

DAG.getConstant(1, dl, MVT::i32));

3268

SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,

3269

DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);

3270

SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,

3271

DAG.getConstant(1, dl, MVT::i32));

3272

// Align GprIndex to be even if it isn't

3273

GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,

3274

GprIndex);

3275

}

3276

3277

// fpr index is 1 byte after gpr

3278

SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3279

DAG.getConstant(1, dl, MVT::i32));

3280

3281

// fpr

3282

SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,

3283

FprPtr, MachinePointerInfo(SV), MVT::i8);

3284

InChain = FprIndex.getValue(1);

3285

3286

SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3287

DAG.getConstant(8, dl, MVT::i32));

3288

3289

SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,

3290

DAG.getConstant(4, dl, MVT::i32));

3291

3292

// areas

3293

SDValue OverflowArea =

3294

DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());

3295

InChain = OverflowArea.getValue(1);

3296

3297

SDValue RegSaveArea =

3298

DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());

3299

InChain = RegSaveArea.getValue(1);

3300

3301

// select overflow_area if index > 8

3302

SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,

3303

DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);

3304

3305

// adjustment constant gpr_index * 4/8

3306

SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,

3307

VT.isInteger() ? GprIndex : FprIndex,

3308

DAG.getConstant(VT.isInteger() ? 4 : 8, dl,

3309

MVT::i32));

3310

3311

// OurReg = RegSaveArea + RegConstant

3312

SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,

3313

RegConstant);

3314

3315

// Floating types are 32 bytes into RegSaveArea

3316

if (VT.isFloatingPoint())

3317

OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,

3318

DAG.getConstant(32, dl, MVT::i32));

3319

3320

// increase {f,g}pr_index by 1 (or 2 if VT is i64)

3321

SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,

3322

VT.isInteger() ? GprIndex : FprIndex,

3323

DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,

3324

MVT::i32));

3325

3326

InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,

3327

VT.isInteger() ? VAListPtr : FprPtr,

3328

MachinePointerInfo(SV), MVT::i8);

3329

3330

// determine if we should load from reg_save_area or overflow_area

3331

SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);

3332

3333

// increase overflow_area by 4/8 if gpr/fpr > 8

3334

SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,

3335

DAG.getConstant(VT.isInteger() ? 4 : 8,

3336

dl, MVT::i32));

3337

3338

OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,

3339

OverflowAreaPlusN);

3340

3341

InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,

3342

MachinePointerInfo(), MVT::i32);

3343

3344

return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());

3345

}

3346

3347

SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {

3348

assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3348, __PRETTY_FUNCTION__));

3349

3350

// We have to copy the entire va_list struct:

3351

// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte

3352

return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),

3353

DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),

3354

false, true, false, MachinePointerInfo(),

3355

MachinePointerInfo());

3356

}

3357

3358

SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,

3359

SelectionDAG &DAG) const {

3360

if (Subtarget.isAIXABI())

3361

report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");

3362

3363

return Op.getOperand(0);

3364

}

3365

3366

SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,

3367

SelectionDAG &DAG) const {

3368

if (Subtarget.isAIXABI())

3369

report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");

3370

3371

SDValue Chain = Op.getOperand(0);

3372

SDValue Trmp = Op.getOperand(1); // trampoline

3373

SDValue FPtr = Op.getOperand(2); // nested function

3374

SDValue Nest = Op.getOperand(3); // 'nest' parameter value

3375

SDLoc dl(Op);

3376

3377

EVT PtrVT = getPointerTy(DAG.getDataLayout());

3378

bool isPPC64 = (PtrVT == MVT::i64);

3379

Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

3380

3381

TargetLowering::ArgListTy Args;

3382

TargetLowering::ArgListEntry Entry;

3383

3384

Entry.Ty = IntPtrTy;

3385

Entry.Node = Trmp; Args.push_back(Entry);

3386

3387

// TrampSize == (isPPC64 ? 48 : 40);

3388

Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,

3389

isPPC64 ? MVT::i64 : MVT::i32);

3390

Args.push_back(Entry);

3391

3392

Entry.Node = FPtr; Args.push_back(Entry);

3393

Entry.Node = Nest; Args.push_back(Entry);

3394

3395

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)

3396

TargetLowering::CallLoweringInfo CLI(DAG);

3397

CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(

3398

CallingConv::C, Type::getVoidTy(*DAG.getContext()),

3399

DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

3400

3401

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

3402

return CallResult.second;

3403

}

3404

3405

SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {

3406

MachineFunction &MF = DAG.getMachineFunction();

3407

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3408

EVT PtrVT = getPointerTy(MF.getDataLayout());

3409

3410

SDLoc dl(Op);

3411

3412

if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {

3413

// vastart just stores the address of the VarArgsFrameIndex slot into the

3414

// memory location argument.

3415

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

3416

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

3417

return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),

3418

MachinePointerInfo(SV));

3419

}

3420

3421

// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.

3422

// We suppose the given va_list is already allocated.

3423

//

3424

// typedef struct {

3425

// char gpr; /* index into the array of 8 GPRs

3426

// * stored in the register save area

3427

// * gpr=0 corresponds to r3,

3428

// * gpr=1 to r4, etc.

3429

// */

3430

// char fpr; /* index into the array of 8 FPRs

3431

// * stored in the register save area

3432

// * fpr=0 corresponds to f1,

3433

// * fpr=1 to f2, etc.

3434

// */

3435

// char *overflow_arg_area;

3436

// /* location on stack that holds

3437

// * the next overflow argument

3438

// */

3439

// char *reg_save_area;

3440

// /* where r3:r10 and f1:f8 (if saved)

3441

// * are stored

3442

// */

3443

// } va_list[1];

3444

3445

SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);

3446

SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);

3447

SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),

3448

PtrVT);

3449

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),

3450

PtrVT);

3451

3452

uint64_t FrameOffset = PtrVT.getSizeInBits()/8;

3453

SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);

3454

3455

uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;

3456

SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);

3457

3458

uint64_t FPROffset = 1;

3459

SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);

3460

3461

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

3462

3463

// Store first byte : number of int regs

3464

SDValue firstStore =

3465

DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),

3466

MachinePointerInfo(SV), MVT::i8);

3467

uint64_t nextOffset = FPROffset;

3468

SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),

3469

ConstFPROffset);

3470

3471

// Store second byte : number of float regs

3472

SDValue secondStore =

3473

DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,

3474

MachinePointerInfo(SV, nextOffset), MVT::i8);

3475

nextOffset += StackOffset;

3476

nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);

3477

3478

// Store second word : arguments given on stack

3479

SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,

3480

MachinePointerInfo(SV, nextOffset));

3481

nextOffset += FrameOffset;

3482

nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);

3483

3484

// Store third word : arguments given in registers

3485

return DAG.getStore(thirdStore, dl, FR, nextPtr,

3486

MachinePointerInfo(SV, nextOffset));

3487

}

3488

3489

/// FPR - The set of FP registers that should be allocated for arguments

3490

/// on Darwin and AIX.

3491

static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,

3492

PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,

3493

PPC::F11, PPC::F12, PPC::F13};

3494

3495

/// CalculateStackSlotSize - Calculates the size reserved for this argument on

3496

/// the stack.

3497

static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,

3498

unsigned PtrByteSize) {

3499

unsigned ArgSize = ArgVT.getStoreSize();

3500

if (Flags.isByVal())

3501

ArgSize = Flags.getByValSize();

3502

3503

// Round up to multiples of the pointer size, except for array members,

3504

// which are always packed.

3505

if (!Flags.isInConsecutiveRegs())

3506

ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

3507

3508

return ArgSize;

3509

}

3510

3511

/// CalculateStackSlotAlignment - Calculates the alignment of this argument

3512

/// on the stack.

3513

static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,

3514

ISD::ArgFlagsTy Flags,

3515

unsigned PtrByteSize) {

3516

Align Alignment(PtrByteSize);

3517

3518

// Altivec parameters are padded to a 16 byte boundary.

3519

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

3520

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

3521

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||

3522

ArgVT == MVT::v1i128 || ArgVT == MVT::f128)

3523

Alignment = Align(16);

3524

3525

// ByVal parameters are aligned as requested.

3526

if (Flags.isByVal()) {

3527

auto BVAlign = Flags.getNonZeroByValAlign();

3528

if (BVAlign > PtrByteSize) {

3529

if (BVAlign.value() % PtrByteSize != 0)

3530

llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531)

3531

"ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531);

3532

3533

Alignment = BVAlign;

3534

}

3535

}

3536

3537

// Array members are always packed to their original alignment.

3538

if (Flags.isInConsecutiveRegs()) {

3539

// If the array member was split into multiple registers, the first

3540

// needs to be aligned to the size of the full type. (Except for

3541

// ppcf128, which is only aligned as its f64 components.)

3542

if (Flags.isSplit() && OrigVT != MVT::ppcf128)

3543

Alignment = Align(OrigVT.getStoreSize());

3544

else

3545

Alignment = Align(ArgVT.getStoreSize());

3546

}

3547

3548

return Alignment;

3549

}

3550

3551

/// CalculateStackSlotUsed - Return whether this argument will use its

3552

/// stack slot (instead of being passed in registers). ArgOffset,

3553

/// AvailableFPRs, and AvailableVRs must hold the current argument

3554

/// position, and will be updated to account for this argument.

3555

static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,

3556

unsigned PtrByteSize, unsigned LinkageSize,

3557

unsigned ParamAreaSize, unsigned &ArgOffset,

3558

unsigned &AvailableFPRs,

3559

unsigned &AvailableVRs) {

3560

bool UseMemory = false;

3561

3562

// Respect alignment of argument on the stack.

3563

Align Alignment =

3564

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

3565

ArgOffset = alignTo(ArgOffset, Alignment);

3566

// If there's no space left in the argument save area, we must

3567

// use memory (this check also catches zero-sized arguments).

3568

if (ArgOffset >= LinkageSize + ParamAreaSize)

3569

UseMemory = true;

3570

3571

// Allocate argument on the stack.

3572

ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

3573

if (Flags.isInConsecutiveRegsLast())

3574

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

3575

// If we overran the argument save area, we must use memory

3576

// (this check catches arguments passed partially in memory)

3577

if (ArgOffset > LinkageSize + ParamAreaSize)

3578

UseMemory = true;

3579

3580

// However, if the argument is actually passed in an FPR or a VR,

3581

// we don't use memory after all.

3582

if (!Flags.isByVal()) {

3583

if (ArgVT == MVT::f32 || ArgVT == MVT::f64)

3584

if (AvailableFPRs > 0) {

3585

--AvailableFPRs;

3586

return false;

3587

}

3588

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

3589

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

3590

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||

3591

ArgVT == MVT::v1i128 || ArgVT == MVT::f128)

3592

if (AvailableVRs > 0) {

3593

--AvailableVRs;

3594

return false;

3595

}

3596

}

3597

3598

return UseMemory;

3599

}

3600

3601

/// EnsureStackAlignment - Round stack frame size up from NumBytes to

3602

/// ensure minimum alignment required for target.

3603

static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,

3604

unsigned NumBytes) {

3605

return alignTo(NumBytes, Lowering->getStackAlign());

3606

}

3607

3608

SDValue PPCTargetLowering::LowerFormalArguments(

3609

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3610

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3611

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3612

if (Subtarget.isAIXABI())

3613

return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,

3614

InVals);

3615

if (Subtarget.is64BitELFABI())

3616

return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,

3617

InVals);

3618

if (Subtarget.is32BitELFABI())

3619

return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,

3620

InVals);

3621

3622

return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,

3623

InVals);

3624

}

3625

3626

SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(

3627

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3628

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3629

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3630

3631

// 32-bit SVR4 ABI Stack Frame Layout:

3632

// +-----------------------------------+

3633

// +--> | Back chain |

3634

// | +-----------------------------------+

3635

// | | Floating-point register save area |

3636

// | +-----------------------------------+

3637

// | | General register save area |

3638

// | +-----------------------------------+

3639

// | | CR save word |

3640

// | +-----------------------------------+

3641

// | | VRSAVE save word |

3642

// | +-----------------------------------+

3643

// | | Alignment padding |

3644

// | +-----------------------------------+

3645

// | | Vector register save area |

3646

// | +-----------------------------------+

3647

// | | Local variable space |

3648

// | +-----------------------------------+

3649

// | | Parameter list area |

3650

// | +-----------------------------------+

3651

// | | LR save word |

3652

// | +-----------------------------------+

3653

// SP--> +--- | Back chain |

3654

// +-----------------------------------+

3655

//

3656

// Specifications:

3657

// System V Application Binary Interface PowerPC Processor Supplement

3658

// AltiVec Technology Programming Interface Manual

3659

3660

MachineFunction &MF = DAG.getMachineFunction();

3661

MachineFrameInfo &MFI = MF.getFrameInfo();

3662

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3663

3664

EVT PtrVT = getPointerTy(MF.getDataLayout());

3665

// Potential tail calls could cause overwriting of argument stack slots.

3666

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

3667

(CallConv == CallingConv::Fast));

3668

const Align PtrAlign(4);

3669

3670

// Assign locations to all of the incoming arguments.

3671

SmallVector<CCValAssign, 16> ArgLocs;

3672

PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

3673

*DAG.getContext());

3674

3675

// Reserve space for the linkage area on the stack.

3676

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

3677

CCInfo.AllocateStack(LinkageSize, PtrAlign);

3678

if (useSoftFloat())

3679

CCInfo.PreAnalyzeFormalArguments(Ins);

3680

3681

CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);

3682

CCInfo.clearWasPPCF128();

3683

3684

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

3685

CCValAssign &VA = ArgLocs[i];

3686

3687

// Arguments stored in registers.

3688

if (VA.isRegLoc()) {

3689

const TargetRegisterClass *RC;

3690

EVT ValVT = VA.getValVT();

3691

3692

switch (ValVT.getSimpleVT().SimpleTy) {

3693

default:

3694

llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3694);

3695

case MVT::i1:

3696

case MVT::i32:

3697

RC = &PPC::GPRCRegClass;

3698

break;

3699

case MVT::f32:

3700

if (Subtarget.hasP8Vector())

3701

RC = &PPC::VSSRCRegClass;

3702

else if (Subtarget.hasSPE())

3703

RC = &PPC::GPRCRegClass;

3704

else

3705

RC = &PPC::F4RCRegClass;

3706

break;

3707

case MVT::f64:

3708

if (Subtarget.hasVSX())

3709

RC = &PPC::VSFRCRegClass;

3710

else if (Subtarget.hasSPE())

3711

// SPE passes doubles in GPR pairs.

3712

RC = &PPC::GPRCRegClass;

3713

else

3714

RC = &PPC::F8RCRegClass;

3715

break;

3716

case MVT::v16i8:

3717

case MVT::v8i16:

3718

case MVT::v4i32:

3719

RC = &PPC::VRRCRegClass;

3720

break;

3721

case MVT::v4f32:

3722

RC = &PPC::VRRCRegClass;

3723

break;

3724

case MVT::v2f64:

3725

case MVT::v2i64:

3726

RC = &PPC::VRRCRegClass;

3727

break;

3728

}

3729

3730

SDValue ArgValue;

3731

// Transform the arguments stored in physical registers into

3732

// virtual ones.

3733

if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {

3734

assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3734, __PRETTY_FUNCTION__));

3735

unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);

3736

unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);

3737

SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);

3738

SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);

3739

if (!Subtarget.isLittleEndian())

3740

std::swap (ArgValueLo, ArgValueHi);

3741

ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,

3742

ArgValueHi);

3743

} else {

3744

unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);

3745

ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,

3746

ValVT == MVT::i1 ? MVT::i32 : ValVT);

3747

if (ValVT == MVT::i1)

3748

ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);

3749

}

3750

3751

InVals.push_back(ArgValue);

3752

} else {

3753

// Argument stored in memory.

3754

assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3754, __PRETTY_FUNCTION__));

3755

3756

// Get the extended size of the argument type in stack

3757

unsigned ArgSize = VA.getLocVT().getStoreSize();

3758

// Get the actual size of the argument type

3759

unsigned ObjSize = VA.getValVT().getStoreSize();

3760

unsigned ArgOffset = VA.getLocMemOffset();

3761

// Stack objects in PPC32 are right justified.

3762

ArgOffset += ArgSize - ObjSize;

3763

int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);

3764

3765

// Create load nodes to retrieve arguments from the stack.

3766

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

3767

InVals.push_back(

3768

DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));

3769

}

3770

}

3771

3772

// Assign locations to all of the incoming aggregate by value arguments.

3773

// Aggregates passed by value are stored in the local variable space of the

3774

// caller's stack frame, right above the parameter list area.

3775

SmallVector<CCValAssign, 16> ByValArgLocs;

3776

CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),

3777

ByValArgLocs, *DAG.getContext());

3778

3779

// Reserve stack space for the allocations in CCInfo.

3780

CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);

3781

3782

CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);

3783

3784

// Area that is at least reserved in the caller of this function.

3785

unsigned MinReservedArea = CCByValInfo.getNextStackOffset();

3786

MinReservedArea = std::max(MinReservedArea, LinkageSize);

3787

3788

// Set the size that is at least reserved in caller of this function. Tail

3789

// call optimized function's reserved stack space needs to be aligned so that

3790

// taking the difference between two stack areas will result in an aligned

3791

// stack.

3792

MinReservedArea =

3793

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

3794

FuncInfo->setMinReservedArea(MinReservedArea);

3795

3796

SmallVector<SDValue, 8> MemOps;

3797

3798

// If the function takes variable number of arguments, make a frame index for

3799

// the start of the first vararg value... for expansion of llvm.va_start.

3800

if (isVarArg) {

3801

static const MCPhysReg GPArgRegs[] = {

3802

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

3803

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

3804

};

3805

const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);

3806

3807

static const MCPhysReg FPArgRegs[] = {

3808

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

3809

PPC::F8

3810

};

3811

unsigned NumFPArgRegs = array_lengthof(FPArgRegs);

3812

3813

if (useSoftFloat() || hasSPE())

3814

NumFPArgRegs = 0;

3815

3816

FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));

3817

FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));

3818

3819

// Make room for NumGPArgRegs and NumFPArgRegs.

3820

int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +

3821

NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;

3822

3823

FuncInfo->setVarArgsStackOffset(

3824

MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,

3825

CCInfo.getNextStackOffset(), true));

3826

3827

FuncInfo->setVarArgsFrameIndex(

3828

MFI.CreateStackObject(Depth, Align(8), false));

3829

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

3830

3831

// The fixed integer arguments of a variadic function are stored to the

3832

// VarArgsFrameIndex on the stack so that they may be loaded by

3833

// dereferencing the result of va_next.

3834

for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {

3835

// Get an existing live-in vreg, or add a new one.

3836

unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);

3837

if (!VReg)

3838

VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);

3839

3840

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

3841

SDValue Store =

3842

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

3843

MemOps.push_back(Store);

3844

// Increment the address by four for the next argument to store

3845

SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);

3846

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

3847

}

3848

3849

// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6

3850

// is set.

3851

// The double arguments are stored to the VarArgsFrameIndex

3852

// on the stack.

3853

for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {

3854

// Get an existing live-in vreg, or add a new one.

3855

unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);

3856

if (!VReg)

3857

VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);

3858

3859

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);

3860

SDValue Store =

3861

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

3862

MemOps.push_back(Store);

3863

// Increment the address by eight for the next argument to store

3864

SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,

3865

PtrVT);

3866

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

3867

}

3868

}

3869

3870

if (!MemOps.empty())

3871

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

3872

3873

return Chain;

3874

}

3875

3876

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

3877

// value to MVT::i64 and then truncate to the correct register size.

3878

SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,

3879

EVT ObjectVT, SelectionDAG &DAG,

3880

SDValue ArgVal,

3881

const SDLoc &dl) const {

3882

if (Flags.isSExt())

3883

ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,

3884

DAG.getValueType(ObjectVT));

3885

else if (Flags.isZExt())

3886

ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,

3887

DAG.getValueType(ObjectVT));

3888

3889

return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);

3890

}

3891

3892

SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(

3893

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

3894

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

3895

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

3896

// TODO: add description of PPC stack frame format, or at least some docs.

3897

//

3898

bool isELFv2ABI = Subtarget.isELFv2ABI();

3899

bool isLittleEndian = Subtarget.isLittleEndian();

3900

MachineFunction &MF = DAG.getMachineFunction();

3901

MachineFrameInfo &MFI = MF.getFrameInfo();

3902

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

3903

3904

assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__))

3905

"fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__));

3906

3907

EVT PtrVT = getPointerTy(MF.getDataLayout());

3908

// Potential tail calls could cause overwriting of argument stack slots.

3909

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

3910

(CallConv == CallingConv::Fast));

3911

unsigned PtrByteSize = 8;

3912

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

3913

3914

static const MCPhysReg GPR[] = {

3915

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

3916

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

3917

};

3918

static const MCPhysReg VR[] = {

3919

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

3920

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

3921

};

3922

3923

const unsigned Num_GPR_Regs = array_lengthof(GPR);

3924

const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;

3925

const unsigned Num_VR_Regs = array_lengthof(VR);

3926

3927

// Do a first pass over the arguments to determine whether the ABI

3928

// guarantees that our caller has allocated the parameter save area

3929

// on its stack frame. In the ELFv1 ABI, this is always the case;

3930

// in the ELFv2 ABI, it is true if this is a vararg function or if

3931

// any parameter is located in a stack slot.

3932

3933

bool HasParameterArea = !isELFv2ABI || isVarArg;

3934

unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;

3935

unsigned NumBytes = LinkageSize;

3936

unsigned AvailableFPRs = Num_FPR_Regs;

3937

unsigned AvailableVRs = Num_VR_Regs;

3938

for (unsigned i = 0, e = Ins.size(); i != e; ++i) {

3939

if (Ins[i].Flags.isNest())

3940

continue;

3941

3942

if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,

3943

PtrByteSize, LinkageSize, ParamAreaSize,

3944

NumBytes, AvailableFPRs, AvailableVRs))

3945

HasParameterArea = true;

3946

}

3947

3948

// Add DAG nodes to load the arguments or copy them out of registers. On

3949

// entry to a function on PPC, the arguments start after the linkage area,

3950

// although the first ones are often in registers.

3951

3952

unsigned ArgOffset = LinkageSize;

3953

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

3954

SmallVector<SDValue, 8> MemOps;

3955

Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();

3956

unsigned CurArgIdx = 0;

3957

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {

3958

SDValue ArgVal;

3959

bool needsLoad = false;

3960

EVT ObjectVT = Ins[ArgNo].VT;

3961

EVT OrigVT = Ins[ArgNo].ArgVT;

3962

unsigned ObjSize = ObjectVT.getStoreSize();

3963

unsigned ArgSize = ObjSize;

3964

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

3965

if (Ins[ArgNo].isOrigArg()) {

3966

std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);

3967

CurArgIdx = Ins[ArgNo].getOrigArgIndex();

3968

}

3969

// We re-align the argument offset for each argument, except when using the

3970

// fast calling convention, when we need to make sure we do that only when

3971

// we'll actually use a stack slot.

3972

unsigned CurArgOffset;

3973

Align Alignment;

3974

auto ComputeArgOffset = [&]() {

3975

/* Respect alignment of argument on the stack. */

3976

Alignment =

3977

CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);

3978

ArgOffset = alignTo(ArgOffset, Alignment);

3979

CurArgOffset = ArgOffset;

3980

};

3981

3982

if (CallConv != CallingConv::Fast) {

3983

ComputeArgOffset();

3984

3985

/* Compute GPR index associated with argument offset. */

3986

GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

3987

GPR_idx = std::min(GPR_idx, Num_GPR_Regs);

3988

}

3989

3990

// FIXME the codegen can be much improved in some cases.

3991

// We do not have to keep everything in memory.

3992

if (Flags.isByVal()) {

3993

assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3993, __PRETTY_FUNCTION__));

3994

3995

if (CallConv == CallingConv::Fast)

3996

ComputeArgOffset();

3997

3998

// ObjSize is the true size, ArgSize rounded up to multiple of registers.

3999

ObjSize = Flags.getByValSize();

4000

ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4001

// Empty aggregate parameters do not take up registers. Examples:

4002

// struct { } a;

4003

// union { } b;

4004

// int c[0];

4005

// etc. However, we have to provide a place-holder in InVals, so

4006

// pretend we have an 8-byte item at the current address for that

4007

// purpose.

4008

if (!ObjSize) {

4009

int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);

4010

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4011

InVals.push_back(FIN);

4012

continue;

4013

}

4014

4015

// Create a stack object covering all stack doublewords occupied

4016

// by the argument. If the argument is (fully or partially) on

4017

// the stack, or if the argument is fully in registers but the

4018

// caller has allocated the parameter save anyway, we can refer

4019

// directly to the caller's stack frame. Otherwise, create a

4020

// local copy in our own frame.

4021

int FI;

4022

if (HasParameterArea ||

4023

ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)

4024

FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);

4025

else

4026

FI = MFI.CreateStackObject(ArgSize, Alignment, false);

4027

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4028

4029

// Handle aggregates smaller than 8 bytes.

4030

if (ObjSize < PtrByteSize) {

4031

// The value of the object is its address, which differs from the

4032

// address of the enclosing doubleword on big-endian systems.

4033

SDValue Arg = FIN;

4034

if (!isLittleEndian) {

4035

SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);

4036

Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);

4037

}

4038

InVals.push_back(Arg);

4039

4040

if (GPR_idx != Num_GPR_Regs) {

4041

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4042

FuncInfo->addLiveInAttr(VReg, Flags);

4043

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4044

SDValue Store;

4045

4046

if (ObjSize==1 || ObjSize==2 || ObjSize==4) {

4047

EVT ObjType = (ObjSize == 1 ? MVT::i8 :

4048

(ObjSize == 2 ? MVT::i16 : MVT::i32));

4049

Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,

4050

MachinePointerInfo(&*FuncArg), ObjType);

4051

} else {

4052

// For sizes that don't fit a truncating store (3, 5, 6, 7),

4053

// store the whole register as-is to the parameter save area

4054

// slot.

4055

Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

4056

MachinePointerInfo(&*FuncArg));

4057

}

4058

4059

MemOps.push_back(Store);

4060

}

4061

// Whether we copied from a register or not, advance the offset

4062

// into the parameter save area by a full doubleword.

4063

ArgOffset += PtrByteSize;

4064

continue;

4065

}

4066

4067

// The value of the object is its address, which is the address of

4068

// its first stack doubleword.

4069

InVals.push_back(FIN);

4070

4071

// Store whatever pieces of the object are in registers to memory.

4072

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

4073

if (GPR_idx == Num_GPR_Regs)

4074

break;

4075

4076

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4077

FuncInfo->addLiveInAttr(VReg, Flags);

4078

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4079

SDValue Addr = FIN;

4080

if (j) {

4081

SDValue Off = DAG.getConstant(j, dl, PtrVT);

4082

Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);

4083

}

4084

SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,

4085

MachinePointerInfo(&*FuncArg, j));

4086

MemOps.push_back(Store);

4087

++GPR_idx;

4088

}

4089

ArgOffset += ArgSize;

4090

continue;

4091

}

4092

4093

switch (ObjectVT.getSimpleVT().SimpleTy) {

4094

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4094);

4095

case MVT::i1:

4096

case MVT::i32:

4097

case MVT::i64:

4098

if (Flags.isNest()) {

4099

// The 'nest' parameter, if any, is passed in R11.

4100

unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);

4101

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4102

4103

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4104

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4105

4106

break;

4107

}

4108

4109

// These can be scalar arguments or elements of an integer array type

4110

// passed directly. Clang may use those instead of "byval" aggregate

4111

// types to avoid forcing arguments to memory unnecessarily.

4112

if (GPR_idx != Num_GPR_Regs) {

4113

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4114

FuncInfo->addLiveInAttr(VReg, Flags);

4115

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4116

4117

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4118

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

4119

// value to MVT::i64 and then truncate to the correct register size.

4120

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4121

} else {

4122

if (CallConv == CallingConv::Fast)

4123

ComputeArgOffset();

4124

4125

needsLoad = true;

4126

ArgSize = PtrByteSize;

4127

}

4128

if (CallConv != CallingConv::Fast || needsLoad)

4129

ArgOffset += 8;

4130

break;

4131

4132

case MVT::f32:

4133

case MVT::f64:

4134

// These can be scalar arguments or elements of a float array type

4135

// passed directly. The latter are used to implement ELFv2 homogenous

4136

// float aggregates.

4137

if (FPR_idx != Num_FPR_Regs) {

4138

unsigned VReg;

4139

4140

if (ObjectVT == MVT::f32)

4141

VReg = MF.addLiveIn(FPR[FPR_idx],

4142

Subtarget.hasP8Vector()

4143

? &PPC::VSSRCRegClass

4144

: &PPC::F4RCRegClass);

4145

else

4146

VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()

4147

? &PPC::VSFRCRegClass

4148

: &PPC::F8RCRegClass);

4149

4150

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4151

++FPR_idx;

4152

} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {

4153

// FIXME: We may want to re-enable this for CallingConv::Fast on the P8

4154

// once we support fp <-> gpr moves.

4155

4156

// This can only ever happen in the presence of f32 array types,

4157

// since otherwise we never run out of FPRs before running out

4158

// of GPRs.

4159

unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);

4160

FuncInfo->addLiveInAttr(VReg, Flags);

4161

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4162

4163

if (ObjectVT == MVT::f32) {

4164

if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))

4165

ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,

4166

DAG.getConstant(32, dl, MVT::i32));

4167

ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);

4168

}

4169

4170

ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);

4171

} else {

4172

if (CallConv == CallingConv::Fast)

4173

ComputeArgOffset();

4174

4175

needsLoad = true;

4176

}

4177

4178

// When passing an array of floats, the array occupies consecutive

4179

// space in the argument area; only round up to the next doubleword

4180

// at the end of the array. Otherwise, each float takes 8 bytes.

4181

if (CallConv != CallingConv::Fast || needsLoad) {

4182

ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;

4183

ArgOffset += ArgSize;

4184

if (Flags.isInConsecutiveRegsLast())

4185

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4186

}

4187

break;

4188

case MVT::v4f32:

4189

case MVT::v4i32:

4190

case MVT::v8i16:

4191

case MVT::v16i8:

4192

case MVT::v2f64:

4193

case MVT::v2i64:

4194

case MVT::v1i128:

4195

case MVT::f128:

4196

// These can be scalar arguments or elements of a vector array type

4197

// passed directly. The latter are used to implement ELFv2 homogenous

4198

// vector aggregates.

4199

if (VR_idx != Num_VR_Regs) {

4200

unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

4201

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4202

++VR_idx;

4203

} else {

4204

if (CallConv == CallingConv::Fast)

4205

ComputeArgOffset();

4206

needsLoad = true;

4207

}

4208

if (CallConv != CallingConv::Fast || needsLoad)

4209

ArgOffset += 16;

4210

break;

4211

}

4212

4213

// We need to load the argument to a virtual register if we determined

4214

// above that we ran out of physical registers of the appropriate type.

4215

if (needsLoad) {

4216

if (ObjSize < ArgSize && !isLittleEndian)

4217

CurArgOffset += ArgSize - ObjSize;

4218

int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);

4219

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4220

ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());

4221

}

4222

4223

InVals.push_back(ArgVal);

4224

}

4225

4226

// Area that is at least reserved in the caller of this function.

4227

unsigned MinReservedArea;

4228

if (HasParameterArea)

4229

MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);

4230

else

4231

MinReservedArea = LinkageSize;

4232

4233

// Set the size that is at least reserved in caller of this function. Tail

4234

// call optimized functions' reserved stack space needs to be aligned so that

4235

// taking the difference between two stack areas will result in an aligned

4236

// stack.

4237

MinReservedArea =

4238

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

4239

FuncInfo->setMinReservedArea(MinReservedArea);

4240

4241

// If the function takes variable number of arguments, make a frame index for

4242

// the start of the first vararg value... for expansion of llvm.va_start.

4243

// On ELFv2ABI spec, it writes:

4244

// C programs that are intended to be *portable* across different compilers

4245

// and architectures must use the header file <stdarg.h> to deal with variable

4246

// argument lists.

4247

if (isVarArg && MFI.hasVAStart()) {

4248

int Depth = ArgOffset;

4249

4250

FuncInfo->setVarArgsFrameIndex(

4251

MFI.CreateFixedObject(PtrByteSize, Depth, true));

4252

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

4253

4254

// If this function is vararg, store any remaining integer argument regs

4255

// to their spots on the stack so that they may be loaded by dereferencing

4256

// the result of va_next.

4257

for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

4258

GPR_idx < Num_GPR_Regs; ++GPR_idx) {

4259

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4260

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4261

SDValue Store =

4262

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

4263

MemOps.push_back(Store);

4264

// Increment the address by four for the next argument to store

4265

SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);

4266

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

4267

}

4268

}

4269

4270

if (!MemOps.empty())

4271

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

4272

4273

return Chain;

4274

}

4275

4276

SDValue PPCTargetLowering::LowerFormalArguments_Darwin(

4277

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

4278

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

4279

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

4280

// TODO: add description of PPC stack frame format, or at least some docs.

4281

//

4282

MachineFunction &MF = DAG.getMachineFunction();

4283

MachineFrameInfo &MFI = MF.getFrameInfo();

4284

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

4285

4286

EVT PtrVT = getPointerTy(MF.getDataLayout());

4287

bool isPPC64 = PtrVT == MVT::i64;

4288

// Potential tail calls could cause overwriting of argument stack slots.

4289

bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&

4290

(CallConv == CallingConv::Fast));

4291

unsigned PtrByteSize = isPPC64 ? 8 : 4;

4292

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

4293

unsigned ArgOffset = LinkageSize;

4294

// Area that is at least reserved in caller of this function.

4295

unsigned MinReservedArea = ArgOffset;

4296

4297

static const MCPhysReg GPR_32[] = { // 32-bit registers.

4298

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

4299

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

4300

};

4301

static const MCPhysReg GPR_64[] = { // 64-bit registers.

4302

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

4303

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

4304

};

4305

static const MCPhysReg VR[] = {

4306

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

4307

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

4308

};

4309

4310

const unsigned Num_GPR_Regs = array_lengthof(GPR_32);

4311

const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;

4312

const unsigned Num_VR_Regs = array_lengthof( VR);

4313

4314

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

4315

4316

const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

4317

4318

// In 32-bit non-varargs functions, the stack space for vectors is after the

4319

// stack space for non-vectors. We do not use this space unless we have

4320

// too many vectors to fit in registers, something that only occurs in

4321

// constructed examples:), but we have to walk the arglist to figure

4322

// that out...for the pathological case, compute VecArgOffset as the

4323

// start of the vector parameter area. Computing VecArgOffset is the

4324

// entire point of the following loop.

4325

unsigned VecArgOffset = ArgOffset;

4326

if (!isVarArg && !isPPC64) {

4327

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;

4328

++ArgNo) {

4329

EVT ObjectVT = Ins[ArgNo].VT;

4330

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

4331

4332

if (Flags.isByVal()) {

4333

// ObjSize is the true size, ArgSize rounded up to multiple of regs.

4334

unsigned ObjSize = Flags.getByValSize();

4335

unsigned ArgSize =

4336

((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4337

VecArgOffset += ArgSize;

4338

continue;

4339

}

4340

4341

switch(ObjectVT.getSimpleVT().SimpleTy) {

4342

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4342);

4343

case MVT::i1:

4344

case MVT::i32:

4345

case MVT::f32:

4346

VecArgOffset += 4;

4347

break;

4348

case MVT::i64: // PPC64

4349

case MVT::f64:

4350

// FIXME: We are guaranteed to be !isPPC64 at this point.

4351

// Does MVT::i64 apply?

4352

VecArgOffset += 8;

4353

break;

4354

case MVT::v4f32:

4355

case MVT::v4i32:

4356

case MVT::v8i16:

4357

case MVT::v16i8:

4358

// Nothing to do, we're only looking at Nonvector args here.

4359

break;

4360

}

4361

}

4362

}

4363

// We've found where the vector parameter area in memory is. Skip the

4364

// first 12 parameters; these don't use that memory.

4365

VecArgOffset = ((VecArgOffset+15)/16)*16;

4366

VecArgOffset += 12*16;

4367

4368

// Add DAG nodes to load the arguments or copy them out of registers. On

4369

// entry to a function on PPC, the arguments start after the linkage area,

4370

// although the first ones are often in registers.

4371

4372

SmallVector<SDValue, 8> MemOps;

4373

unsigned nAltivecParamsAtEnd = 0;

4374

Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();

4375

unsigned CurArgIdx = 0;

4376

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {

4377

SDValue ArgVal;

4378

bool needsLoad = false;

4379

EVT ObjectVT = Ins[ArgNo].VT;

4380

unsigned ObjSize = ObjectVT.getSizeInBits()/8;

4381

unsigned ArgSize = ObjSize;

4382

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

4383

if (Ins[ArgNo].isOrigArg()) {

4384

std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);

4385

CurArgIdx = Ins[ArgNo].getOrigArgIndex();

4386

}

4387

unsigned CurArgOffset = ArgOffset;

4388

4389

// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.

4390

if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||

4391

ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {

4392

if (isVarArg || isPPC64) {

4393

MinReservedArea = ((MinReservedArea+15)/16)*16;

4394

MinReservedArea += CalculateStackSlotSize(ObjectVT,

4395

Flags,

4396

PtrByteSize);

4397

} else nAltivecParamsAtEnd++;

4398

} else

4399

// Calculate min reserved area.

4400

MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,

4401

Flags,

4402

PtrByteSize);

4403

4404

// FIXME the codegen can be much improved in some cases.

4405

// We do not have to keep everything in memory.

4406

if (Flags.isByVal()) {

4407

assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4407, __PRETTY_FUNCTION__));

4408

4409

// ObjSize is the true size, ArgSize rounded up to multiple of registers.

4410

ObjSize = Flags.getByValSize();

4411

ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

4412

// Objects of size 1 and 2 are right justified, everything else is

4413

// left justified. This means the memory address is adjusted forwards.

4414

if (ObjSize==1 || ObjSize==2) {

4415

CurArgOffset = CurArgOffset + (4 - ObjSize);

4416

}

4417

// The value of the object is its address.

4418

int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);

4419

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4420

InVals.push_back(FIN);

4421

if (ObjSize==1 || ObjSize==2) {

4422

if (GPR_idx != Num_GPR_Regs) {

4423

unsigned VReg;

4424

if (isPPC64)

4425

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4426

else

4427

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4428

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4429

EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;

4430

SDValue Store =

4431

DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,

4432

MachinePointerInfo(&*FuncArg), ObjType);

4433

MemOps.push_back(Store);

4434

++GPR_idx;

4435

}

4436

4437

ArgOffset += PtrByteSize;

4438

4439

continue;

4440

}

4441

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

4442

// Store whatever pieces of the object are in registers

4443

// to memory. ArgOffset will be the address of the beginning

4444

// of the object.

4445

if (GPR_idx != Num_GPR_Regs) {

4446

unsigned VReg;

4447

if (isPPC64)

4448

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4449

else

4450

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4451

int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);

4452

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4453

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4454

SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

4455

MachinePointerInfo(&*FuncArg, j));

4456

MemOps.push_back(Store);

4457

++GPR_idx;

4458

ArgOffset += PtrByteSize;

4459

} else {

4460

ArgOffset += ArgSize - (ArgOffset-CurArgOffset);

4461

break;

4462

}

4463

}

4464

continue;

4465

}

4466

4467

switch (ObjectVT.getSimpleVT().SimpleTy) {

4468

default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4468);

4469

case MVT::i1:

4470

case MVT::i32:

4471

if (!isPPC64) {

4472

if (GPR_idx != Num_GPR_Regs) {

4473

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4474

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);

4475

4476

if (ObjectVT == MVT::i1)

4477

ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);

4478

4479

++GPR_idx;

4480

} else {

4481

needsLoad = true;

4482

ArgSize = PtrByteSize;

4483

}

4484

// All int arguments reserve stack space in the Darwin ABI.

4485

ArgOffset += PtrByteSize;

4486

break;

4487

}

4488

LLVM_FALLTHROUGH[[gnu::fallthrough]];

4489

case MVT::i64: // PPC64

4490

if (GPR_idx != Num_GPR_Regs) {

4491

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4492

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

4493

4494

if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

4495

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

4496

// value to MVT::i64 and then truncate to the correct register size.

4497

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

4498

4499

++GPR_idx;

4500

} else {

4501

needsLoad = true;

4502

ArgSize = PtrByteSize;

4503

}

4504

// All int arguments reserve stack space in the Darwin ABI.

4505

ArgOffset += 8;

4506

break;

4507

4508

case MVT::f32:

4509

case MVT::f64:

4510

// Every 4 bytes of argument space consumes one of the GPRs available for

4511

// argument passing.

4512

if (GPR_idx != Num_GPR_Regs) {

4513

++GPR_idx;

4514

if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)

4515

++GPR_idx;

4516

}

4517

if (FPR_idx != Num_FPR_Regs) {

4518

unsigned VReg;

4519

4520

if (ObjectVT == MVT::f32)

4521

VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);

4522

else

4523

VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);

4524

4525

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4526

++FPR_idx;

4527

} else {

4528

needsLoad = true;

4529

}

4530

4531

// All FP arguments reserve stack space in the Darwin ABI.

4532

ArgOffset += isPPC64 ? 8 : ObjSize;

4533

break;

4534

case MVT::v4f32:

4535

case MVT::v4i32:

4536

case MVT::v8i16:

4537

case MVT::v16i8:

4538

// Note that vector arguments in registers don't reserve stack space,

4539

// except in varargs functions.

4540

if (VR_idx != Num_VR_Regs) {

4541

unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

4542

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

4543

if (isVarArg) {

4544

while ((ArgOffset % 16) != 0) {

4545

ArgOffset += PtrByteSize;

4546

if (GPR_idx != Num_GPR_Regs)

4547

GPR_idx++;

4548

}

4549

ArgOffset += 16;

4550

GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?

4551

}

4552

++VR_idx;

4553

} else {

4554

if (!isVarArg && !isPPC64) {

4555

// Vectors go after all the nonvectors.

4556

CurArgOffset = VecArgOffset;

4557

VecArgOffset += 16;

4558

} else {

4559

// Vectors are aligned.

4560

ArgOffset = ((ArgOffset+15)/16)*16;

4561

CurArgOffset = ArgOffset;

4562

ArgOffset += 16;

4563

}

4564

needsLoad = true;

4565

}

4566

break;

4567

}

4568

4569

// We need to load the argument to a virtual register if we determined above

4570

// that we ran out of physical registers of the appropriate type.

4571

if (needsLoad) {

4572

int FI = MFI.CreateFixedObject(ObjSize,

4573

CurArgOffset + (ArgSize - ObjSize),

4574

isImmutable);

4575

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

4576

ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());

4577

}

4578

4579

InVals.push_back(ArgVal);

4580

}

4581

4582

// Allow for Altivec parameters at the end, if needed.

4583

if (nAltivecParamsAtEnd) {

4584

MinReservedArea = ((MinReservedArea+15)/16)*16;

4585

MinReservedArea += 16*nAltivecParamsAtEnd;

4586

}

4587

4588

// Area that is at least reserved in the caller of this function.

4589

MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);

4590

4591

// Set the size that is at least reserved in caller of this function. Tail

4592

// call optimized functions' reserved stack space needs to be aligned so that

4593

// taking the difference between two stack areas will result in an aligned

4594

// stack.

4595

MinReservedArea =

4596

EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);

4597

FuncInfo->setMinReservedArea(MinReservedArea);

4598

4599

// If the function takes variable number of arguments, make a frame index for

4600

// the start of the first vararg value... for expansion of llvm.va_start.

4601

if (isVarArg) {

4602

int Depth = ArgOffset;

4603

4604

FuncInfo->setVarArgsFrameIndex(

4605

MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,

4606

Depth, true));

4607

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

4608

4609

// If this function is vararg, store any remaining integer argument regs

4610

// to their spots on the stack so that they may be loaded by dereferencing

4611

// the result of va_next.

4612

for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

4613

unsigned VReg;

4614

4615

if (isPPC64)

4616

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

4617

else

4618

VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

4619

4620

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

4621

SDValue Store =

4622

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

4623

MemOps.push_back(Store);

4624

// Increment the address by four for the next argument to store

4625

SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);

4626

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

4627

}

4628

}

4629

4630

if (!MemOps.empty())

4631

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

4632

4633

return Chain;

4634

}

4635

4636

/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be

4637

/// adjusted to accommodate the arguments for the tailcall.

4638

static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,

4639

unsigned ParamSize) {

4640

4641

if (!isTailCall) return 0;

4642

4643

PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();

4644

unsigned CallerMinReservedArea = FI->getMinReservedArea();

4645

int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;

4646

// Remember only if the new adjustment is bigger.

4647

if (SPDiff < FI->getTailCallSPDelta())

4648

FI->setTailCallSPDelta(SPDiff);

4649

4650

return SPDiff;

4651

}

4652

4653

static bool isFunctionGlobalAddress(SDValue Callee);

4654

4655

static bool callsShareTOCBase(const Function *Caller, SDValue Callee,

4656

const TargetMachine &TM) {

4657

// It does not make sense to call callsShareTOCBase() with a caller that

4658

// is PC Relative since PC Relative callers do not have a TOC.

4659

#ifndef NDEBUG

4660

const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);

4661

assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__))

4662

"PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__));

4663

#endif

4664

4665

// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols

4666

// don't have enough information to determine if the caller and callee share

4667

// the same TOC base, so we have to pessimistically assume they don't for

4668

// correctness.

4669

GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);

4670

if (!G)

4671

return false;

4672

4673

const GlobalValue *GV = G->getGlobal();

4674

4675

// If the callee is preemptable, then the static linker will use a plt-stub

4676

// which saves the toc to the stack, and needs a nop after the call

4677

// instruction to convert to a toc-restore.

4678

if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))

4679

return false;

4680

4681

// Functions with PC Relative enabled may clobber the TOC in the same DSO.

4682

// We may need a TOC restore in the situation where the caller requires a

4683

// valid TOC but the callee is PC Relative and does not.

4684

const Function *F = dyn_cast<Function>(GV);

4685

const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);

4686

4687

// If we have an Alias we can try to get the function from there.

4688

if (Alias) {

4689

const GlobalObject *GlobalObj = Alias->getBaseObject();

4690

F = dyn_cast<Function>(GlobalObj);

4691

}

4692

4693

// If we still have no valid function pointer we do not have enough

4694

// information to determine if the callee uses PC Relative calls so we must

4695

// assume that it does.

4696

if (!F)

4697

return false;

4698

4699

// If the callee uses PC Relative we cannot guarantee that the callee won't

4700

// clobber the TOC of the caller and so we must assume that the two

4701

// functions do not share a TOC base.

4702

const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);

4703

if (STICallee->isUsingPCRelativeCalls())

4704

return false;

4705

4706

// The medium and large code models are expected to provide a sufficiently

4707

// large TOC to provide all data addressing needs of a module with a

4708

// single TOC.

4709

if (CodeModel::Medium == TM.getCodeModel() ||

4710

CodeModel::Large == TM.getCodeModel())

4711

return true;

4712

4713

// Otherwise we need to ensure callee and caller are in the same section,

4714

// since the linker may allocate multiple TOCs, and we don't know which

4715

// sections will belong to the same TOC base.

4716

if (!GV->isStrongDefinitionForLinker())

4717

return false;

4718

4719

// Any explicitly-specified sections and section prefixes must also match.

4720

// Also, if we're using -ffunction-sections, then each function is always in

4721

// a different section (the same is true for COMDAT functions).

4722

if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||

4723

GV->getSection() != Caller->getSection())

4724

return false;

4725

if (const auto *F = dyn_cast<Function>(GV)) {

4726

if (F->getSectionPrefix() != Caller->getSectionPrefix())

4727

return false;

4728

}

4729

4730

return true;

4731

}

4732

4733

static bool

4734

needStackSlotPassParameters(const PPCSubtarget &Subtarget,

4735

const SmallVectorImpl<ISD::OutputArg> &Outs) {

4736

assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4736, __PRETTY_FUNCTION__));

4737

4738

const unsigned PtrByteSize = 8;

4739

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

4740

4741

static const MCPhysReg GPR[] = {

4742

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

4743

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

4744

};

4745

static const MCPhysReg VR[] = {

4746

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

4747

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

4748

};

4749

4750

const unsigned NumGPRs = array_lengthof(GPR);

4751

const unsigned NumFPRs = 13;

4752

const unsigned NumVRs = array_lengthof(VR);

4753

const unsigned ParamAreaSize = NumGPRs * PtrByteSize;

4754

4755

unsigned NumBytes = LinkageSize;

4756

unsigned AvailableFPRs = NumFPRs;

4757

unsigned AvailableVRs = NumVRs;

4758

4759

for (const ISD::OutputArg& Param : Outs) {

4760

if (Param.Flags.isNest()) continue;

4761

4762

if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,

4763

LinkageSize, ParamAreaSize, NumBytes,

4764

AvailableFPRs, AvailableVRs))

4765

return true;

4766

}

4767

return false;

4768

}

4769

4770

static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {

4771

if (CB.arg_size() != CallerFn->arg_size())

4772

return false;

4773

4774

auto CalleeArgIter = CB.arg_begin();

4775

auto CalleeArgEnd = CB.arg_end();

4776

Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();

4777

4778

for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {

4779

const Value* CalleeArg = *CalleeArgIter;

4780

const Value* CallerArg = &(*CallerArgIter);

4781

if (CalleeArg == CallerArg)

4782

continue;

4783

4784

// e.g. @caller([4 x i64] %a, [4 x i64] %b) {

4785

// tail call @callee([4 x i64] undef, [4 x i64] %b)

4786

// }

4787

// 1st argument of callee is undef and has the same type as caller.

4788

if (CalleeArg->getType() == CallerArg->getType() &&

4789

isa<UndefValue>(CalleeArg))

4790

continue;

4791

4792

return false;

4793

}

4794

4795

return true;

4796

}

4797

4798

// Returns true if TCO is possible between the callers and callees

4799

// calling conventions.

4800

static bool

4801

areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,

4802

CallingConv::ID CalleeCC) {

4803

// Tail calls are possible with fastcc and ccc.

4804

auto isTailCallableCC = [] (CallingConv::ID CC){

4805

return CC == CallingConv::C || CC == CallingConv::Fast;

4806

};

4807

if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))

4808

return false;

4809

4810

// We can safely tail call both fastcc and ccc callees from a c calling

4811

// convention caller. If the caller is fastcc, we may have less stack space

4812

// than a non-fastcc caller with the same signature so disable tail-calls in

4813

// that case.

4814

return CallerCC == CallingConv::C || CallerCC == CalleeCC;

4815

}

4816

4817

bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(

4818

SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,

4819

const SmallVectorImpl<ISD::OutputArg> &Outs,

4820

const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {

4821

bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;

4822

4823

if (DisableSCO && !TailCallOpt) return false;

4824

4825

// Variadic argument functions are not supported.

4826

if (isVarArg) return false;

4827

4828

auto &Caller = DAG.getMachineFunction().getFunction();

4829

// Check that the calling conventions are compatible for tco.

4830

if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))

4831

return false;

4832

4833

// Caller contains any byval parameter is not supported.

4834

if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))

4835

return false;

4836

4837

// Callee contains any byval parameter is not supported, too.

4838

// Note: This is a quick work around, because in some cases, e.g.

4839

// caller's stack size > callee's stack size, we are still able to apply

4840

// sibling call optimization. For example, gcc is able to do SCO for caller1

4841

// in the following example, but not for caller2.

4842

// struct test {

4843

// long int a;

4844

// char ary[56];

4845

// } gTest;

4846

// __attribute__((noinline)) int callee(struct test v, struct test *b) {

4847

// b->a = v.a;

4848

// return 0;

4849

// }

4850

// void caller1(struct test a, struct test c, struct test *b) {

4851

// callee(gTest, b); }

4852

// void caller2(struct test *b) { callee(gTest, b); }

4853

if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))

4854

return false;

4855

4856

// If callee and caller use different calling conventions, we cannot pass

4857

// parameters on stack since offsets for the parameter area may be different.

4858

if (Caller.getCallingConv() != CalleeCC &&

4859

needStackSlotPassParameters(Subtarget, Outs))

4860

return false;

4861

4862

// All variants of 64-bit ELF ABIs without PC-Relative addressing require that

4863

// the caller and callee share the same TOC for TCO/SCO. If the caller and

4864

// callee potentially have different TOC bases then we cannot tail call since

4865

// we need to restore the TOC pointer after the call.

4866

// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977

4867

// We cannot guarantee this for indirect calls or calls to external functions.

4868

// When PC-Relative addressing is used, the concept of the TOC is no longer

4869

// applicable so this check is not required.

4870

// Check first for indirect calls.

4871

if (!Subtarget.isUsingPCRelativeCalls() &&

4872

!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))

4873

return false;

4874

4875

// Check if we share the TOC base.

4876

if (!Subtarget.isUsingPCRelativeCalls() &&

4877

!callsShareTOCBase(&Caller, Callee, getTargetMachine()))

4878

return false;

4879

4880

// TCO allows altering callee ABI, so we don't have to check further.

4881

if (CalleeCC == CallingConv::Fast && TailCallOpt)

4882

return true;

4883

4884

if (DisableSCO) return false;

4885

4886

// If callee use the same argument list that caller is using, then we can

4887

// apply SCO on this case. If it is not, then we need to check if callee needs

4888

// stack for passing arguments.

4889

// PC Relative tail calls may not have a CallBase.

4890

// If there is no CallBase we cannot verify if we have the same argument

4891

// list so assume that we don't have the same argument list.

4892

if (CB && !hasSameArgumentList(&Caller, *CB) &&

4893

needStackSlotPassParameters(Subtarget, Outs))

4894

return false;

4895

else if (!CB && needStackSlotPassParameters(Subtarget, Outs))

4896

return false;

4897

4898

return true;

4899

}

4900

4901

/// IsEligibleForTailCallOptimization - Check whether the call is eligible

4902

/// for tail call optimization. Targets which want to do tail call

4903

/// optimization should implement this function.

4904

bool

4905

PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,

4906

CallingConv::ID CalleeCC,

4907

bool isVarArg,

4908

const SmallVectorImpl<ISD::InputArg> &Ins,

4909

SelectionDAG& DAG) const {

4910

if (!getTargetMachine().Options.GuaranteedTailCallOpt)

4911

return false;

4912

4913

// Variable argument functions are not supported.

4914

if (isVarArg)

4915

return false;

4916

4917

MachineFunction &MF = DAG.getMachineFunction();

4918

CallingConv::ID CallerCC = MF.getFunction().getCallingConv();

4919

if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {

4920

// Functions containing by val parameters are not supported.

4921

for (unsigned i = 0; i != Ins.size(); i++) {

4922

ISD::ArgFlagsTy Flags = Ins[i].Flags;

4923

if (Flags.isByVal()) return false;

4924

}

4925

4926

// Non-PIC/GOT tail calls are supported.

4927

if (getTargetMachine().getRelocationModel() != Reloc::PIC_)

4928

return true;

4929

4930

// At the moment we can only do local tail calls (in same module, hidden

4931

// or protected) if we are generating PIC.

4932

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

4933

return G->getGlobal()->hasHiddenVisibility()

4934

|| G->getGlobal()->hasProtectedVisibility();

4935

}

4936

4937

return false;

4938

}

4939

4940

/// isCallCompatibleAddress - Return the immediate to use if the specified

4941

/// 32-bit value is representable in the immediate field of a BxA instruction.

4942

static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {

4943

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

4944

if (!C) return nullptr;

4945

4946

int Addr = C->getZExtValue();

4947

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

4948

SignExtend32<26>(Addr) != Addr)

4949

return nullptr; // Top 6 bits have to be sext of immediate.

4950

4951

return DAG

4952

.getConstant(

4953

(int)C->getZExtValue() >> 2, SDLoc(Op),

4954

DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))

4955

.getNode();

4956

}

4957

4958

namespace {

4959

4960

struct TailCallArgumentInfo {

4961

SDValue Arg;

4962

SDValue FrameIdxOp;

4963

int FrameIdx = 0;

4964

4965

TailCallArgumentInfo() = default;

4966

};

4967

4968

} // end anonymous namespace

4969

4970

/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.

4971

static void StoreTailCallArgumentsToStackSlot(

4972

SelectionDAG &DAG, SDValue Chain,

4973

const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,

4974

SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {

4975

for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {

4976

SDValue Arg = TailCallArgs[i].Arg;

4977

SDValue FIN = TailCallArgs[i].FrameIdxOp;

4978

int FI = TailCallArgs[i].FrameIdx;

4979

// Store relative to framepointer.

4980

MemOpChains.push_back(DAG.getStore(

4981

Chain, dl, Arg, FIN,

4982

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));

4983

}

4984

}

4985

4986

/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to

4987

/// the appropriate stack slot for the tail call optimized function call.

4988

static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,

4989

SDValue OldRetAddr, SDValue OldFP,

4990

int SPDiff, const SDLoc &dl) {

4991

if (SPDiff) {

4992

// Calculate the new stack slot for the return address.

4993

MachineFunction &MF = DAG.getMachineFunction();

4994

const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();

4995

const PPCFrameLowering *FL = Subtarget.getFrameLowering();

4996

bool isPPC64 = Subtarget.isPPC64();

4997

int SlotSize = isPPC64 ? 8 : 4;

4998

int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();

4999

int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,

5000

NewRetAddrLoc, true);

5001

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

5002

SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);

5003

Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,

5004

MachinePointerInfo::getFixedStack(MF, NewRetAddr));

5005

}

5006

return Chain;

5007

}

5008

5009

/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate

5010

/// the position of the argument.

5011

static void

5012

CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,

5013

SDValue Arg, int SPDiff, unsigned ArgOffset,

5014

SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {

5015

int Offset = ArgOffset + SPDiff;

5016

uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;

5017

int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);

5018

EVT VT = isPPC64 ? MVT::i64 : MVT::i32;

5019

SDValue FIN = DAG.getFrameIndex(FI, VT);

5020

TailCallArgumentInfo Info;

5021

Info.Arg = Arg;

5022

Info.FrameIdxOp = FIN;

5023

Info.FrameIdx = FI;

5024

TailCallArguments.push_back(Info);

5025

}

5026

5027

/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address

5028

/// stack slot. Returns the chain as result and the loaded frame pointers in

5029

/// LROpOut/FPOpout. Used when tail calling.

5030

SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(

5031

SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,

5032

SDValue &FPOpOut, const SDLoc &dl) const {

5033

if (SPDiff) {

5034

// Load the LR and FP stack slot for later adjusting.

5035

EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

5036

LROpOut = getReturnAddrFrameIndex(DAG);

5037

LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());

5038

Chain = SDValue(LROpOut.getNode(), 1);

5039

}

5040

return Chain;

5041

}

5042

5043

/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified

5044

/// by "Src" to address "Dst" of size "Size". Alignment information is

5045

/// specified by the specific parameter attribute. The copy will be passed as

5046

/// a byval function parameter.

5047

/// Sometimes what we are copying is the end of a larger object, the part that

5048

/// does not fit in registers.

5049

static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,

5050

SDValue Chain, ISD::ArgFlagsTy Flags,

5051

SelectionDAG &DAG, const SDLoc &dl) {

5052

SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);

5053

return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,

5054

Flags.getNonZeroByValAlign(), false, false, false,

5055

MachinePointerInfo(), MachinePointerInfo());

5056

}

5057

5058

/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of

5059

/// tail calls.

5060

static void LowerMemOpCallTo(

5061

SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,

5062

SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,

5063

bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,

5064

SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {

5065

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

5066

if (!isTailCall) {

5067

if (isVector) {

5068

SDValue StackPtr;

5069

if (isPPC64)

5070

StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

5071

else

5072

StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

5073

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,

5074

DAG.getConstant(ArgOffset, dl, PtrVT));

5075

}

5076

MemOpChains.push_back(

5077

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

5078

// Calculate and remember argument location.

5079

} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,

5080

TailCallArguments);

5081

}

5082

5083

static void

5084

PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,

5085

const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,

5086

SDValue FPOp,

5087

SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {

5088

// Emit a sequence of copyto/copyfrom virtual registers for arguments that

5089

// might overwrite each other in case of tail call optimization.

5090

SmallVector<SDValue, 8> MemOpChains2;

5091

// Do not flag preceding copytoreg stuff together with the following stuff.

5092

InFlag = SDValue();

5093

StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,

5094

MemOpChains2, dl);

5095

if (!MemOpChains2.empty())

5096

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

5097

5098

// Store the return address to the appropriate stack slot.

5099

Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);

5100

5101

// Emit callseq_end just before tailcall node.

5102

Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),

5103

DAG.getIntPtrConstant(0, dl, true), InFlag, dl);

5104

InFlag = Chain.getValue(1);

5105

}

5106

5107

// Is this global address that of a function that can be called by name? (as

5108

// opposed to something that must hold a descriptor for an indirect call).

5109

static bool isFunctionGlobalAddress(SDValue Callee) {

5110

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

5111

if (Callee.getOpcode() == ISD::GlobalTLSAddress ||

5112

Callee.getOpcode() == ISD::TargetGlobalTLSAddress)

5113

return false;

5114

5115

return G->getGlobal()->getValueType()->isFunctionTy();

5116

}

5117

5118

return false;

5119

}

5120

5121

SDValue PPCTargetLowering::LowerCallResult(

5122

SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,

5123

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5124

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

5125

SmallVector<CCValAssign, 16> RVLocs;

5126

CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

5127

*DAG.getContext());

5128

5129

CCRetInfo.AnalyzeCallResult(

5130

Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

5131

? RetCC_PPC_Cold

5132

: RetCC_PPC);

5133

5134

// Copy all of the result registers out of their specified physreg.

5135

for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

5136

CCValAssign &VA = RVLocs[i];

5137

assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5137, __PRETTY_FUNCTION__));

5138

5139

SDValue Val;

5140

5141

if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {

5142

SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

5143

InFlag);

5144

Chain = Lo.getValue(1);

5145

InFlag = Lo.getValue(2);

5146

VA = RVLocs[++i]; // skip ahead to next loc

5147

SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

5148

InFlag);

5149

Chain = Hi.getValue(1);

5150

InFlag = Hi.getValue(2);

5151

if (!Subtarget.isLittleEndian())

5152

std::swap (Lo, Hi);

5153

Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);

5154

} else {

5155

Val = DAG.getCopyFromReg(Chain, dl,

5156

VA.getLocReg(), VA.getLocVT(), InFlag);

5157

Chain = Val.getValue(1);

5158

InFlag = Val.getValue(2);

5159

}

5160

5161

switch (VA.getLocInfo()) {

5162

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5162);

5163

case CCValAssign::Full: break;

5164

case CCValAssign::AExt:

5165

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5166

break;

5167

case CCValAssign::ZExt:

5168

Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,

5169

DAG.getValueType(VA.getValVT()));

5170

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5171

break;

5172

case CCValAssign::SExt:

5173

Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,

5174

DAG.getValueType(VA.getValVT()));

5175

Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);

5176

break;

5177

}

5178

5179

InVals.push_back(Val);

5180

}

5181

5182

return Chain;

5183

}

5184

5185

static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,

5186

const PPCSubtarget &Subtarget, bool isPatchPoint) {

5187

// PatchPoint calls are not indirect.

5188

if (isPatchPoint)

5189

return false;

5190

5191

if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))

5192

return false;

5193

5194

// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not

5195

// becuase the immediate function pointer points to a descriptor instead of

5196

// a function entry point. The ELFv2 ABI cannot use a BLA because the function

5197

// pointer immediate points to the global entry point, while the BLA would

5198

// need to jump to the local entry point (see rL211174).

5199

if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&

5200

isBLACompatibleAddress(Callee, DAG))

5201

return false;

5202

5203

return true;

5204

}

5205

5206

// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.

5207

static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {

5208

return Subtarget.isAIXABI() ||

5209

(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());

5210

}

5211

5212

static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,

5213

const Function &Caller,

5214

const SDValue &Callee,

5215

const PPCSubtarget &Subtarget,

5216

const TargetMachine &TM) {

5217

if (CFlags.IsTailCall)

5218

return PPCISD::TC_RETURN;

5219

5220

// This is a call through a function pointer.

5221

if (CFlags.IsIndirect) {

5222

// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross

5223

// indirect calls. The save of the caller's TOC pointer to the stack will be

5224

// inserted into the DAG as part of call lowering. The restore of the TOC

5225

// pointer is modeled by using a pseudo instruction for the call opcode that

5226

// represents the 2 instruction sequence of an indirect branch and link,

5227

// immediately followed by a load of the TOC pointer from the the stack save

5228

// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC

5229

// as it is not saved or used.

5230

return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC

5231

: PPCISD::BCTRL;

5232

}

5233

5234

if (Subtarget.isUsingPCRelativeCalls()) {

5235

assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5235, __PRETTY_FUNCTION__));

5236

return PPCISD::CALL_NOTOC;

5237

}

5238

5239

// The ABIs that maintain a TOC pointer accross calls need to have a nop

5240

// immediately following the call instruction if the caller and callee may

5241

// have different TOC bases. At link time if the linker determines the calls

5242

// may not share a TOC base, the call is redirected to a trampoline inserted

5243

// by the linker. The trampoline will (among other things) save the callers

5244

// TOC pointer at an ABI designated offset in the linkage area and the linker

5245

// will rewrite the nop to be a load of the TOC pointer from the linkage area

5246

// into gpr2.

5247

if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())

5248

return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL

5249

: PPCISD::CALL_NOP;

5250

5251

return PPCISD::CALL;

5252

}

5253

5254

static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,

5255

const SDLoc &dl, const PPCSubtarget &Subtarget) {

5256

if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())

5257

if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))

5258

return SDValue(Dest, 0);

5259

5260

// Returns true if the callee is local, and false otherwise.

5261

auto isLocalCallee = [&]() {

5262

const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);

5263

const Module *Mod = DAG.getMachineFunction().getFunction().getParent();

5264

const GlobalValue *GV = G ? G->getGlobal() : nullptr;

5265

5266

return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&

5267

!dyn_cast_or_null<GlobalIFunc>(GV);

5268

};

5269

5270

// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in

5271

// a static relocation model causes some versions of GNU LD (2.17.50, at

5272

// least) to force BSS-PLT, instead of secure-PLT, even if all objects are

5273

// built with secure-PLT.

5274

bool UsePlt =

5275

Subtarget.is32BitELFABI() && !isLocalCallee() &&

5276

Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;

5277

5278

const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {

5279

const TargetMachine &TM = Subtarget.getTargetMachine();

5280

const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();

5281

MCSymbolXCOFF *S =

5282

cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));

5283

5284

MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

5285

return DAG.getMCSymbol(S, PtrVT);

5286

};

5287

5288

if (isFunctionGlobalAddress(Callee)) {

5289

const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();

5290

5291

if (Subtarget.isAIXABI()) {

5292

assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5292, __PRETTY_FUNCTION__));

5293

return getAIXFuncEntryPointSymbolSDNode(GV);

5294

}

5295

return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,

5296

UsePlt ? PPCII::MO_PLT : 0);

5297

}

5298

5299

if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

5300

const char *SymName = S->getSymbol();

5301

if (Subtarget.isAIXABI()) {

5302

// If there exists a user-declared function whose name is the same as the

5303

// ExternalSymbol's, then we pick up the user-declared version.

5304

const Module *Mod = DAG.getMachineFunction().getFunction().getParent();

5305

if (const Function *F =

5306

dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))

5307

return getAIXFuncEntryPointSymbolSDNode(F);

5308

5309

// On AIX, direct function calls reference the symbol for the function's

5310

// entry point, which is named by prepending a "." before the function's

5311

// C-linkage name. A Qualname is returned here because an external

5312

// function entry point is a csect with XTY_ER property.

5313

const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {

5314

auto &Context = DAG.getMachineFunction().getMMI().getContext();

5315

MCSectionXCOFF *Sec = Context.getXCOFFSection(

5316

(Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,

5317

SectionKind::getMetadata());

5318

return Sec->getQualNameSymbol();

5319

};

5320

5321

SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();

5322

}

5323

return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),

5324

UsePlt ? PPCII::MO_PLT : 0);

5325

}

5326

5327

// No transformation needed.

5328

assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__));

5329

return Callee;

5330

}

5331

5332

static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {

5333

assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__))

5334

"Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__));

5335

5336

// The last operand is the chain, except when the node has glue. If the node

5337

// has glue, then the last operand is the glue, and the chain is the second

5338

// last operand.

5339

SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);

5340

if (LastValue.getValueType() != MVT::Glue)

5341

return LastValue;

5342

5343

return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);

5344

}

5345

5346

// Creates the node that moves a functions address into the count register

5347

// to prepare for an indirect call instruction.

5348

static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,

5349

SDValue &Glue, SDValue &Chain,

5350

const SDLoc &dl) {

5351

SDValue MTCTROps[] = {Chain, Callee, Glue};

5352

EVT ReturnTypes[] = {MVT::Other, MVT::Glue};

5353

Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),

5354

makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));

5355

// The glue is the second value produced.

5356

Glue = Chain.getValue(1);

5357

}

5358

5359

static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,

5360

SDValue &Glue, SDValue &Chain,

5361

SDValue CallSeqStart,

5362

const CallBase *CB, const SDLoc &dl,

5363

bool hasNest,

5364

const PPCSubtarget &Subtarget) {

5365

// Function pointers in the 64-bit SVR4 ABI do not point to the function

5366

// entry point, but to the function descriptor (the function entry point

5367

// address is part of the function descriptor though).

5368

// The function descriptor is a three doubleword structure with the

5369

// following fields: function entry point, TOC base address and

5370

// environment pointer.

5371

// Thus for a call through a function pointer, the following actions need

5372

// to be performed:

5373

// 1. Save the TOC of the caller in the TOC save area of its stack

5374

// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).

5375

// 2. Load the address of the function entry point from the function

5376

// descriptor.

5377

// 3. Load the TOC of the callee from the function descriptor into r2.

5378

// 4. Load the environment pointer from the function descriptor into

5379

// r11.

5380

// 5. Branch to the function entry point address.

5381

// 6. On return of the callee, the TOC of the caller needs to be

5382

// restored (this is done in FinishCall()).

5383

//

5384

// The loads are scheduled at the beginning of the call sequence, and the

5385

// register copies are flagged together to ensure that no other

5386

// operations can be scheduled in between. E.g. without flagging the

5387

// copies together, a TOC access in the caller could be scheduled between

5388

// the assignment of the callee TOC and the branch to the callee, which leads

5389

// to incorrect code.

5390

5391

// Start by loading the function address from the descriptor.

5392

SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);

5393

auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()

5394

? (MachineMemOperand::MODereferenceable |

5395

MachineMemOperand::MOInvariant)

5396

: MachineMemOperand::MONone;

5397

5398

MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);

5399

5400

// Registers used in building the DAG.

5401

const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();

5402

const MCRegister TOCReg = Subtarget.getTOCPointerRegister();

5403

5404

// Offsets of descriptor members.

5405

const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();

5406

const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();

5407

5408

const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

5409

const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;

5410

5411

// One load for the functions entry point address.

5412

SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,

5413

Alignment, MMOFlags);

5414

5415

// One for loading the TOC anchor for the module that contains the called

5416

// function.

5417

SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);

5418

SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);

5419

SDValue TOCPtr =

5420

DAG.getLoad(RegVT, dl, LDChain, AddTOC,

5421

MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);

5422

5423

// One for loading the environment pointer.

5424

SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);

5425

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);

5426

SDValue LoadEnvPtr =

5427

DAG.getLoad(RegVT, dl, LDChain, AddPtr,

5428

MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);

5429

5430

5431

// Then copy the newly loaded TOC anchor to the TOC pointer.

5432

SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);

5433

Chain = TOCVal.getValue(0);

5434

Glue = TOCVal.getValue(1);

5435

5436

// If the function call has an explicit 'nest' parameter, it takes the

5437

// place of the environment pointer.

5438

assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__))

5439

"Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__));

5440

if (!hasNest) {

5441

SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);

5442

Chain = EnvVal.getValue(0);

5443

Glue = EnvVal.getValue(1);

5444

}

5445

5446

// The rest of the indirect call sequence is the same as the non-descriptor

5447

// DAG.

5448

prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);

5449

}

5450

5451

static void

5452

buildCallOperands(SmallVectorImpl<SDValue> &Ops,

5453

PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,

5454

SelectionDAG &DAG,

5455

SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,

5456

SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,

5457

const PPCSubtarget &Subtarget) {

5458

const bool IsPPC64 = Subtarget.isPPC64();

5459

// MVT for a general purpose register.

5460

const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

5461

5462

// First operand is always the chain.

5463

Ops.push_back(Chain);

5464

5465

// If it's a direct call pass the callee as the second operand.

5466

if (!CFlags.IsIndirect)

5467

Ops.push_back(Callee);

5468

else {

5469

assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5469, __PRETTY_FUNCTION__));

5470

5471

// For the TOC based ABIs, we have saved the TOC pointer to the linkage area

5472

// on the stack (this would have been done in `LowerCall_64SVR4` or

5473

// `LowerCall_AIX`). The call instruction is a pseudo instruction that

5474

// represents both the indirect branch and a load that restores the TOC

5475

// pointer from the linkage area. The operand for the TOC restore is an add

5476

// of the TOC save offset to the stack pointer. This must be the second

5477

// operand: after the chain input but before any other variadic arguments.

5478

// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not

5479

// saved or used.

5480

if (isTOCSaveRestoreRequired(Subtarget)) {

5481

const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();

5482

5483

SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);

5484

unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();

5485

SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

5486

SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);

5487

Ops.push_back(AddTOC);

5488

}

5489

5490

// Add the register used for the environment pointer.

5491

if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)

5492

Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),

5493

RegVT));

5494

5495

5496

// Add CTR register as callee so a bctr can be emitted later.

5497

if (CFlags.IsTailCall)

5498

Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));

5499

}

5500

5501

// If this is a tail call add stack pointer delta.

5502

if (CFlags.IsTailCall)

5503

Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));

5504

5505

// Add argument registers to the end of the list so that they are known live

5506

// into the call.

5507

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

5508

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

5509

RegsToPass[i].second.getValueType()));

5510

5511

// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is

5512

// no way to mark dependencies as implicit here.

5513

// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.

5514

if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&

5515

!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())

5516

Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));

5517

5518

// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls

5519

if (CFlags.IsVarArg && Subtarget.is32BitELFABI())

5520

Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));

5521

5522

// Add a register mask operand representing the call-preserved registers.

5523

const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

5524

const uint32_t *Mask =

5525

TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);

5526

assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5526, __PRETTY_FUNCTION__));

5527

Ops.push_back(DAG.getRegisterMask(Mask));

5528

5529

// If the glue is valid, it is the last operand.

5530

if (Glue.getNode())

5531

Ops.push_back(Glue);

5532

}

5533

5534

SDValue PPCTargetLowering::FinishCall(

5535

CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,

5536

SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,

5537

SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,

5538

unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,

5539

SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {

5540

5541

if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||

5542

Subtarget.isAIXABI())

5543

setUsesTOCBasePtr(DAG);

5544

5545

unsigned CallOpc =

5546

getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,

5547

Subtarget, DAG.getTarget());

5548

5549

if (!CFlags.IsIndirect)

5550

Callee = transformCallee(Callee, DAG, dl, Subtarget);

5551

else if (Subtarget.usesFunctionDescriptors())

5552

prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,

5553

dl, CFlags.HasNest, Subtarget);

5554

else

5555

prepareIndirectCall(DAG, Callee, Glue, Chain, dl);

5556

5557

// Build the operand list for the call instruction.

5558

SmallVector<SDValue, 8> Ops;

5559

buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,

5560

SPDiff, Subtarget);

5561

5562

// Emit tail call.

5563

if (CFlags.IsTailCall) {

5564

// Indirect tail call when using PC Relative calls do not have the same

5565

// constraints.

5566

assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5567

cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5568

Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5569

Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5570

isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5571

(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5572

"Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5573

"register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))

5574

"used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__));

5575

// PC Relative calls also use TC_RETURN as the way to mark tail calls.

5576

assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__))

5577

"Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__));

5578

DAG.getMachineFunction().getFrameInfo().setHasTailCall();

5579

return DAG.getNode(CallOpc, dl, MVT::Other, Ops);

5580

}

5581

5582

std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};

5583

Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);

5584

DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);

5585

Glue = Chain.getValue(1);

5586

5587

// When performing tail call optimization the callee pops its arguments off

5588

// the stack. Account for this here so these bytes can be pushed back on in

5589

// PPCFrameLowering::eliminateCallFramePseudoInstr.

5590

int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&

5591

getTargetMachine().Options.GuaranteedTailCallOpt)

5592

? NumBytes

5593

: 0;

5594

5595

Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),

5596

DAG.getIntPtrConstant(BytesCalleePops, dl, true),

5597

Glue, dl);

5598

Glue = Chain.getValue(1);

5599

5600

return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,

5601

DAG, InVals);

5602

}

5603

5604

SDValue

5605

PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

5606

SmallVectorImpl<SDValue> &InVals) const {

5607

SelectionDAG &DAG = CLI.DAG;

5608

SDLoc &dl = CLI.DL;

5609

SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

5610

SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

5611

SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

5612

SDValue Chain = CLI.Chain;

5613

SDValue Callee = CLI.Callee;

5614

bool &isTailCall = CLI.IsTailCall;

5615

CallingConv::ID CallConv = CLI.CallConv;

5616

bool isVarArg = CLI.IsVarArg;

5617

bool isPatchPoint = CLI.IsPatchPoint;

5618

const CallBase *CB = CLI.CB;

5619

5620

if (isTailCall) {

5621

if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))

5622

isTailCall = false;

5623

else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())

5624

isTailCall = IsEligibleForTailCallOptimization_64SVR4(

5625

Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);

5626

else

5627

isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,

5628

Ins, DAG);

5629

if (isTailCall) {

5630

++NumTailCalls;

5631

if (!getTargetMachine().Options.GuaranteedTailCallOpt)

5632

++NumSiblingCalls;

5633

5634

// PC Relative calls no longer guarantee that the callee is a Global

5635

// Address Node. The callee could be an indirect tail call in which

5636

// case the SDValue for the callee could be a load (to load the address

5637

// of a function pointer) or it may be a register copy (to move the

5638

// address of the callee from a function parameter into a virtual

5639

// register). It may also be an ExternalSymbolSDNode (ex memcopy).

5640

assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))

5641

isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))

5642

"Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__));

5643

5644

LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)

5645

<< "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false);

5646

LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false);

5647

}

5648

}

5649

5650

if (!isTailCall && CB && CB->isMustTailCall())

5651

report_fatal_error("failed to perform tail call elimination on a call "

5652

"site marked musttail");

5653

5654

// When long calls (i.e. indirect calls) are always used, calls are always

5655

// made via function pointer. If we have a function name, first translate it

5656

// into a pointer.

5657

if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&

5658

!isTailCall)

5659

Callee = LowerGlobalAddress(Callee, DAG);

5660

5661

CallFlags CFlags(

5662

CallConv, isTailCall, isVarArg, isPatchPoint,

5663

isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),

5664

// hasNest

5665

Subtarget.is64BitELFABI() &&

5666

any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),

5667

CLI.NoMerge);

5668

5669

if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())

5670

return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5671

InVals, CB);

5672

5673

if (Subtarget.isSVR4ABI())

5674

return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5675

InVals, CB);

5676

5677

if (Subtarget.isAIXABI())

5678

return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5679

InVals, CB);

5680

5681

return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,

5682

InVals, CB);

5683

}

5684

5685

SDValue PPCTargetLowering::LowerCall_32SVR4(

5686

SDValue Chain, SDValue Callee, CallFlags CFlags,

5687

const SmallVectorImpl<ISD::OutputArg> &Outs,

5688

const SmallVectorImpl<SDValue> &OutVals,

5689

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5690

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

5691

const CallBase *CB) const {

5692

// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description

5693

// of the 32-bit SVR4 ABI stack frame layout.

5694

5695

const CallingConv::ID CallConv = CFlags.CallConv;

5696

const bool IsVarArg = CFlags.IsVarArg;

5697

const bool IsTailCall = CFlags.IsTailCall;

5698

5699

assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))

5700

CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))

5701

CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__));

5702

5703

const Align PtrAlign(4);

5704

5705

MachineFunction &MF = DAG.getMachineFunction();

5706

5707

// Mark this function as potentially containing a function that contains a

5708

// tail call. As a consequence the frame pointer will be used for dynamicalloc

5709

// and restoring the callers stack pointer in this functions epilog. This is

5710

// done because by tail calling the called function might overwrite the value

5711

// in this function's (MF) stack pointer stack slot 0(SP).

5712

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

5713

CallConv == CallingConv::Fast)

5714

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

5715

5716

// Count how many bytes are to be pushed on the stack, including the linkage

5717

// area, parameter list area and the part of the local variable space which

5718

// contains copies of aggregates which are passed by value.

5719

5720

// Assign locations to all of the outgoing arguments.

5721

SmallVector<CCValAssign, 16> ArgLocs;

5722

PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

5723

5724

// Reserve space for the linkage area on the stack.

5725

CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),

5726

PtrAlign);

5727

if (useSoftFloat())

5728

CCInfo.PreAnalyzeCallOperands(Outs);

5729

5730

if (IsVarArg) {

5731

// Handle fixed and variable vector arguments differently.

5732

// Fixed vector arguments go into registers as long as registers are

5733

// available. Variable vector arguments always go into memory.

5734

unsigned NumArgs = Outs.size();

5735

5736

for (unsigned i = 0; i != NumArgs; ++i) {

5737

MVT ArgVT = Outs[i].VT;

5738

ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

5739

bool Result;

5740

5741

if (Outs[i].IsFixed) {

5742

Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,

5743

CCInfo);

5744

} else {

5745

Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,

5746

ArgFlags, CCInfo);

5747

}

5748

5749

if (Result) {

5750

#ifndef NDEBUG

5751

errs() << "Call operand #" << i << " has unhandled type "

5752

<< EVT(ArgVT).getEVTString() << "\n";

5753

#endif

5754

llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5754);

5755

}

5756

}

5757

} else {

5758

// All arguments are treated the same.

5759

CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);

5760

}

5761

CCInfo.clearWasPPCF128();

5762

5763

// Assign locations to all of the outgoing aggregate by value arguments.

5764

SmallVector<CCValAssign, 16> ByValArgLocs;

5765

CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());

5766

5767

// Reserve stack space for the allocations in CCInfo.

5768

CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);

5769

5770

CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);

5771

5772

// Size of the linkage area, parameter list area and the part of the local

5773

// space variable where copies of aggregates which are passed by value are

5774

// stored.

5775

unsigned NumBytes = CCByValInfo.getNextStackOffset();

5776

5777

// Calculate by how many bytes the stack has to be adjusted in case of tail

5778

// call optimization.

5779

int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);

5780

5781

// Adjust the stack pointer for the new arguments...

5782

// These operations are automatically eliminated by the prolog/epilog pass

5783

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

5784

SDValue CallSeqStart = Chain;

5785

5786

// Load the return address and frame pointer so it can be moved somewhere else

5787

// later.

5788

SDValue LROp, FPOp;

5789

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

5790

5791

// Set up a copy of the stack pointer for use loading and storing any

5792

// arguments that may not fit in the registers available for argument

5793

// passing.

5794

SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

5795

5796

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

5797

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

5798

SmallVector<SDValue, 8> MemOpChains;

5799

5800

bool seenFloatArg = false;

5801

// Walk the register/memloc assignments, inserting copies/loads.

5802

// i - Tracks the index into the list of registers allocated for the call

5803

// RealArgIdx - Tracks the index into the list of actual function arguments

5804

// j - Tracks the index into the list of byval arguments

5805

for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();

5806

i != e;

5807

++i, ++RealArgIdx) {

5808

CCValAssign &VA = ArgLocs[i];

5809

SDValue Arg = OutVals[RealArgIdx];

5810

ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;

5811

5812

if (Flags.isByVal()) {

5813

// Argument is an aggregate which is passed by value, thus we need to

5814

// create a copy of it in the local variable space of the current stack

5815

// frame (which is the stack frame of the caller) and pass the address of

5816

// this copy to the callee.

5817

assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5817, __PRETTY_FUNCTION__));

5818

CCValAssign &ByValVA = ByValArgLocs[j++];

5819

assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5819, __PRETTY_FUNCTION__));

5820

5821

// Memory reserved in the local variable space of the callers stack frame.

5822

unsigned LocMemOffset = ByValVA.getLocMemOffset();

5823

5824

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

5825

PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),

5826

StackPtr, PtrOff);

5827

5828

// Create a copy of the argument in the local area of the current

5829

// stack frame.

5830

SDValue MemcpyCall =

5831

CreateCopyOfByValArgument(Arg, PtrOff,

5832

CallSeqStart.getNode()->getOperand(0),

5833

Flags, DAG, dl);

5834

5835

// This must go outside the CALLSEQ_START..END.

5836

SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,

5837

SDLoc(MemcpyCall));

5838

DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),

5839

NewCallSeqStart.getNode());

5840

Chain = CallSeqStart = NewCallSeqStart;

5841

5842

// Pass the address of the aggregate copy on the stack either in a

5843

// physical register or in the parameter list area of the current stack

5844

// frame to the callee.

5845

Arg = PtrOff;

5846

}

5847

5848

// When useCRBits() is true, there can be i1 arguments.

5849

// It is because getRegisterType(MVT::i1) => MVT::i1,

5850

// and for other integer types getRegisterType() => MVT::i32.

5851

// Extend i1 and ensure callee will get i32.

5852

if (Arg.getValueType() == MVT::i1)

5853

Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,

5854

dl, MVT::i32, Arg);

5855

5856

if (VA.isRegLoc()) {

5857

seenFloatArg |= VA.getLocVT().isFloatingPoint();

5858

// Put argument in a physical register.

5859

if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {

5860

bool IsLE = Subtarget.isLittleEndian();

5861

SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

5862

DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));

5863

RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));

5864

SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

5865

DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));

5866

RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),

5867

SVal.getValue(0)));

5868

} else

5869

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

5870

} else {

5871

// Put argument in the parameter list area of the current stack frame.

5872

assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5872, __PRETTY_FUNCTION__));

5873

unsigned LocMemOffset = VA.getLocMemOffset();

5874

5875

if (!IsTailCall) {

5876

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

5877

PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),

5878

StackPtr, PtrOff);

5879

5880

MemOpChains.push_back(

5881

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

5882

} else {

5883

// Calculate and remember argument location.

5884

CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,

5885

TailCallArguments);

5886

}

5887

}

5888

}

5889

5890

if (!MemOpChains.empty())

5891

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

5892

5893

// Build a sequence of copy-to-reg nodes chained together with token chain

5894

// and flag operands which copy the outgoing args into the appropriate regs.

5895

SDValue InFlag;

5896

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

5897

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

5898

RegsToPass[i].second, InFlag);

5899

InFlag = Chain.getValue(1);

5900

}

5901

5902

// Set CR bit 6 to true if this is a vararg call with floating args passed in

5903

// registers.

5904

if (IsVarArg) {

5905

SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

5906

SDValue Ops[] = { Chain, InFlag };

5907

5908

Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,

5909

dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));

5910

5911

InFlag = Chain.getValue(1);

5912

}

5913

5914

if (IsTailCall)

5915

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

5916

TailCallArguments);

5917

5918

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

5919

Callee, SPDiff, NumBytes, Ins, InVals, CB);

5920

}

5921

5922

// Copy an argument into memory, being careful to do this outside the

5923

// call sequence for the call to which the argument belongs.

5924

SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(

5925

SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,

5926

SelectionDAG &DAG, const SDLoc &dl) const {

5927

SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,

5928

CallSeqStart.getNode()->getOperand(0),

5929

Flags, DAG, dl);

5930

// The MEMCPY must go outside the CALLSEQ_START..END.

5931

int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);

5932

SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,

5933

SDLoc(MemcpyCall));

5934

DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),

5935

NewCallSeqStart.getNode());

5936

return NewCallSeqStart;

5937

}

5938

5939

SDValue PPCTargetLowering::LowerCall_64SVR4(

5940

SDValue Chain, SDValue Callee, CallFlags CFlags,

5941

const SmallVectorImpl<ISD::OutputArg> &Outs,

5942

const SmallVectorImpl<SDValue> &OutVals,

5943

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

5944

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

5945

const CallBase *CB) const {

5946

bool isELFv2ABI = Subtarget.isELFv2ABI();

5947

bool isLittleEndian = Subtarget.isLittleEndian();

5948

unsigned NumOps = Outs.size();

5949

bool IsSibCall = false;

5950

bool IsFastCall = CFlags.CallConv == CallingConv::Fast;

5951

5952

EVT PtrVT = getPointerTy(DAG.getDataLayout());

5953

unsigned PtrByteSize = 8;

5954

5955

MachineFunction &MF = DAG.getMachineFunction();

5956

5957

if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)

5958

IsSibCall = true;

5959

5960

// Mark this function as potentially containing a function that contains a

5961

// tail call. As a consequence the frame pointer will be used for dynamicalloc

5962

// and restoring the callers stack pointer in this functions epilog. This is

5963

// done because by tail calling the called function might overwrite the value

5964

// in this function's (MF) stack pointer stack slot 0(SP).

5965

if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)

5966

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

5967

5968

assert(!(IsFastCall && CFlags.IsVarArg) &&((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__))

5969

"fastcc not supported on varargs functions")((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__));

5970

5971

// Count how many bytes are to be pushed on the stack, including the linkage

5972

// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes

5973

// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage

5974

// area is 32 bytes reserved space for [SP][CR][LR][TOC].

5975

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

5976

unsigned NumBytes = LinkageSize;

5977

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

5978

5979

static const MCPhysReg GPR[] = {

5980

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

5981

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

5982

};

5983

static const MCPhysReg VR[] = {

5984

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

5985

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

5986

};

5987

5988

const unsigned NumGPRs = array_lengthof(GPR);

5989

const unsigned NumFPRs = useSoftFloat() ? 0 : 13;

5990

const unsigned NumVRs = array_lengthof(VR);

5991

5992

// On ELFv2, we can avoid allocating the parameter area if all the arguments

5993

// can be passed to the callee in registers.

5994

// For the fast calling convention, there is another check below.

5995

// Note: We should keep consistent with LowerFormalArguments_64SVR4()

5996

bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;

5997

if (!HasParameterArea) {

5998

unsigned ParamAreaSize = NumGPRs * PtrByteSize;

5999

unsigned AvailableFPRs = NumFPRs;

6000

unsigned AvailableVRs = NumVRs;

6001

unsigned NumBytesTmp = NumBytes;

6002

for (unsigned i = 0; i != NumOps; ++i) {

6003

if (Outs[i].Flags.isNest()) continue;

6004

if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,

6005

PtrByteSize, LinkageSize, ParamAreaSize,

6006

NumBytesTmp, AvailableFPRs, AvailableVRs))

6007

HasParameterArea = true;

6008

}

6009

}

6010

6011

// When using the fast calling convention, we don't provide backing for

6012

// arguments that will be in registers.

6013

unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;

6014

6015

// Avoid allocating parameter area for fastcc functions if all the arguments

6016

// can be passed in the registers.

6017

if (IsFastCall)

6018

HasParameterArea = false;

6019

6020

// Add up all the space actually used.

6021

for (unsigned i = 0; i != NumOps; ++i) {

6022

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6023

EVT ArgVT = Outs[i].VT;

6024

EVT OrigVT = Outs[i].ArgVT;

6025

6026

if (Flags.isNest())

6027

continue;

6028

6029

if (IsFastCall) {

6030

if (Flags.isByVal()) {

6031

NumGPRsUsed += (Flags.getByValSize()+7)/8;

6032

if (NumGPRsUsed > NumGPRs)

6033

HasParameterArea = true;

6034

} else {

6035

switch (ArgVT.getSimpleVT().SimpleTy) {

6036

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6036);

6037

case MVT::i1:

6038

case MVT::i32:

6039

case MVT::i64:

6040

if (++NumGPRsUsed <= NumGPRs)

6041

continue;

6042

break;

6043

case MVT::v4i32:

6044

case MVT::v8i16:

6045

case MVT::v16i8:

6046

case MVT::v2f64:

6047

case MVT::v2i64:

6048

case MVT::v1i128:

6049

case MVT::f128:

6050

if (++NumVRsUsed <= NumVRs)

6051

continue;

6052

break;

6053

case MVT::v4f32:

6054

if (++NumVRsUsed <= NumVRs)

6055

continue;

6056

break;

6057

case MVT::f32:

6058

case MVT::f64:

6059

if (++NumFPRsUsed <= NumFPRs)

6060

continue;

6061

break;

6062

}

6063

HasParameterArea = true;

6064

}

6065

}

6066

6067

/* Respect alignment of argument on the stack. */

6068

auto Alignement =

6069

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

6070

NumBytes = alignTo(NumBytes, Alignement);

6071

6072

NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

6073

if (Flags.isInConsecutiveRegsLast())

6074

NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

6075

}

6076

6077

unsigned NumBytesActuallyUsed = NumBytes;

6078

6079

// In the old ELFv1 ABI,

6080

// the prolog code of the callee may store up to 8 GPR argument registers to

6081

// the stack, allowing va_start to index over them in memory if its varargs.

6082

// Because we cannot tell if this is needed on the caller side, we have to

6083

// conservatively assume that it is needed. As such, make sure we have at

6084

// least enough stack space for the caller to store the 8 GPRs.

6085

// In the ELFv2 ABI, we allocate the parameter area iff a callee

6086

// really requires memory operands, e.g. a vararg function.

6087

if (HasParameterArea)

6088

NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

6089

else

6090

NumBytes = LinkageSize;

6091

6092

// Tail call needs the stack to be aligned.

6093

if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)

6094

NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);

6095

6096

int SPDiff = 0;

6097

6098

// Calculate by how many bytes the stack has to be adjusted in case of tail

6099

// call optimization.

6100

if (!IsSibCall)

6101

SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);

6102

6103

// To protect arguments on the stack from being clobbered in a tail call,

6104

// force all the loads to happen before doing any other lowering.

6105

if (CFlags.IsTailCall)

6106

Chain = DAG.getStackArgumentTokenFactor(Chain);

6107

6108

// Adjust the stack pointer for the new arguments...

6109

// These operations are automatically eliminated by the prolog/epilog pass

6110

if (!IsSibCall)

6111

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

6112

SDValue CallSeqStart = Chain;

6113

6114

// Load the return address and frame pointer so it can be move somewhere else

6115

// later.

6116

SDValue LROp, FPOp;

6117

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

6118

6119

// Set up a copy of the stack pointer for use loading and storing any

6120

// arguments that may not fit in the registers available for argument

6121

// passing.

6122

SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

6123

6124

// Figure out which arguments are going to go in registers, and which in

6125

// memory. Also, if this is a vararg function, floating point operations

6126

// must be stored to our stack, and loaded into integer regs as well, if

6127

// any integer regs are available for argument passing.

6128

unsigned ArgOffset = LinkageSize;

6129

6130

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

6131

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

6132

6133

SmallVector<SDValue, 8> MemOpChains;

6134

for (unsigned i = 0; i != NumOps; ++i) {

6135

SDValue Arg = OutVals[i];

6136

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6137

EVT ArgVT = Outs[i].VT;

6138

EVT OrigVT = Outs[i].ArgVT;

6139

6140

// PtrOff will be used to store the current argument to the stack if a

6141

// register cannot be found for it.

6142

SDValue PtrOff;

6143

6144

// We re-align the argument offset for each argument, except when using the

6145

// fast calling convention, when we need to make sure we do that only when

6146

// we'll actually use a stack slot.

6147

auto ComputePtrOff = [&]() {

6148

/* Respect alignment of argument on the stack. */

6149

auto Alignment =

6150

CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

6151

ArgOffset = alignTo(ArgOffset, Alignment);

6152

6153

PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());

6154

6155

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6156

};

6157

6158

if (!IsFastCall) {

6159

ComputePtrOff();

6160

6161

/* Compute GPR index associated with argument offset. */

6162

GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

6163

GPR_idx = std::min(GPR_idx, NumGPRs);

6164

}

6165

6166

// Promote integers to 64-bit values.

6167

if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {

6168

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

6169

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

6170

Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);

6171

}

6172

6173

// FIXME memcpy is used way more than necessary. Correctness first.

6174

// Note: "by value" is code for passing a structure by value, not

6175

// basic types.

6176

if (Flags.isByVal()) {

6177

// Note: Size includes alignment padding, so

6178

// struct x { short a; char b; }

6179

// will have Size = 4. With #pragma pack(1), it will have Size = 3.

6180

// These are the proper values we need for right-justifying the

6181

// aggregate in a parameter register.

6182

unsigned Size = Flags.getByValSize();

6183

6184

// An empty aggregate parameter takes up no storage and no

6185

// registers.

6186

if (Size == 0)

6187

continue;

6188

6189

if (IsFastCall)

6190

ComputePtrOff();

6191

6192

// All aggregates smaller than 8 bytes must be passed right-justified.

6193

if (Size==1 || Size==2 || Size==4) {

6194

EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);

6195

if (GPR_idx != NumGPRs) {

6196

SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,

6197

MachinePointerInfo(), VT);

6198

MemOpChains.push_back(Load.getValue(1));

6199

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6200

6201

ArgOffset += PtrByteSize;

6202

continue;

6203

}

6204

}

6205

6206

if (GPR_idx == NumGPRs && Size < 8) {

6207

SDValue AddPtr = PtrOff;

6208

if (!isLittleEndian) {

6209

SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,

6210

PtrOff.getValueType());

6211

AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6212

}

6213

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6214

CallSeqStart,

6215

Flags, DAG, dl);

6216

ArgOffset += PtrByteSize;

6217

continue;

6218

}

6219

// Copy entire object into memory. There are cases where gcc-generated

6220

// code assumes it is there, even if it could be put entirely into

6221

// registers. (This is not what the doc says.)

6222

6223

// FIXME: The above statement is likely due to a misunderstanding of the

6224

// documents. All arguments must be copied into the parameter area BY

6225

// THE CALLEE in the event that the callee takes the address of any

6226

// formal argument. That has not yet been implemented. However, it is

6227

// reasonable to use the stack area as a staging area for the register

6228

// load.

6229

6230

// Skip this for small aggregates, as we will use the same slot for a

6231

// right-justified copy, below.

6232

if (Size >= 8)

6233

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,

6234

CallSeqStart,

6235

Flags, DAG, dl);

6236

6237

// When a register is available, pass a small aggregate right-justified.

6238

if (Size < 8 && GPR_idx != NumGPRs) {

6239

// The easiest way to get this right-justified in a register

6240

// is to copy the structure into the rightmost portion of a

6241

// local variable slot, then load the whole slot into the

6242

// register.

6243

// FIXME: The memcpy seems to produce pretty awful code for

6244

// small aggregates, particularly for packed ones.

6245

// FIXME: It would be preferable to use the slot in the

6246

// parameter save area instead of a new local variable.

6247

SDValue AddPtr = PtrOff;

6248

if (!isLittleEndian) {

6249

SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());

6250

AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6251

}

6252

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6253

CallSeqStart,

6254

Flags, DAG, dl);

6255

6256

// Load the slot into the register.

6257

SDValue Load =

6258

DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());

6259

MemOpChains.push_back(Load.getValue(1));

6260

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6261

6262

// Done with this argument.

6263

ArgOffset += PtrByteSize;

6264

continue;

6265

}

6266

6267

// For aggregates larger than PtrByteSize, copy the pieces of the

6268

// object that fit into registers from the parameter save area.

6269

for (unsigned j=0; j<Size; j+=PtrByteSize) {

6270

SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());

6271

SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

6272

if (GPR_idx != NumGPRs) {

6273

SDValue Load =

6274

DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());

6275

MemOpChains.push_back(Load.getValue(1));

6276

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6277

ArgOffset += PtrByteSize;

6278

} else {

6279

ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;

6280

break;

6281

}

6282

}

6283

continue;

6284

}

6285

6286

switch (Arg.getSimpleValueType().SimpleTy) {

6287

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6287);

6288

case MVT::i1:

6289

case MVT::i32:

6290

case MVT::i64:

6291

if (Flags.isNest()) {

6292

// The 'nest' parameter, if any, is passed in R11.

6293

RegsToPass.push_back(std::make_pair(PPC::X11, Arg));

6294

break;

6295

}

6296

6297

// These can be scalar arguments or elements of an integer array type

6298

// passed directly. Clang may use those instead of "byval" aggregate

6299

// types to avoid forcing arguments to memory unnecessarily.

6300

if (GPR_idx != NumGPRs) {

6301

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

6302

} else {

6303

if (IsFastCall)

6304

ComputePtrOff();

6305

6306

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__))

6307

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__));

6308

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6309

true, CFlags.IsTailCall, false, MemOpChains,

6310

TailCallArguments, dl);

6311

if (IsFastCall)

6312

ArgOffset += PtrByteSize;

6313

}

6314

if (!IsFastCall)

6315

ArgOffset += PtrByteSize;

6316

break;

6317

case MVT::f32:

6318

case MVT::f64: {

6319

// These can be scalar arguments or elements of a float array type

6320

// passed directly. The latter are used to implement ELFv2 homogenous

6321

// float aggregates.

6322

6323

// Named arguments go into FPRs first, and once they overflow, the

6324

// remaining arguments go into GPRs and then the parameter save area.

6325

// Unnamed arguments for vararg functions always go to GPRs and

6326

// then the parameter save area. For now, put all arguments to vararg

6327

// routines always in both locations (FPR *and* GPR or stack slot).

6328

bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;

6329

bool NeededLoad = false;

6330

6331

// First load the argument into the next available FPR.

6332

if (FPR_idx != NumFPRs)

6333

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

6334

6335

// Next, load the argument into GPR or stack slot if needed.

6336

if (!NeedGPROrStack)

6337

;

6338

else if (GPR_idx != NumGPRs && !IsFastCall) {

6339

// FIXME: We may want to re-enable this for CallingConv::Fast on the P8

6340

// once we support fp <-> gpr moves.

6341

6342

// In the non-vararg case, this can only ever happen in the

6343

// presence of f32 array types, since otherwise we never run

6344

// out of FPRs before running out of GPRs.

6345

SDValue ArgVal;

6346

6347

// Double values are always passed in a single GPR.

6348

if (Arg.getValueType() != MVT::f32) {

6349

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);

6350

6351

// Non-array float values are extended and passed in a GPR.

6352

} else if (!Flags.isInConsecutiveRegs()) {

6353

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6354

ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

6355

6356

// If we have an array of floats, we collect every odd element

6357

// together with its predecessor into one GPR.

6358

} else if (ArgOffset % PtrByteSize != 0) {

6359

SDValue Lo, Hi;

6360

Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);

6361

Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6362

if (!isLittleEndian)

6363

std::swap(Lo, Hi);

6364

ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

6365

6366

// The final element, if even, goes into the first half of a GPR.

6367

} else if (Flags.isInConsecutiveRegsLast()) {

6368

ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

6369

ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

6370

if (!isLittleEndian)

6371

ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,

6372

DAG.getConstant(32, dl, MVT::i32));

6373

6374

// Non-final even elements are skipped; they will be handled

6375

// together the with subsequent argument on the next go-around.

6376

} else

6377

ArgVal = SDValue();

6378

6379

if (ArgVal.getNode())

6380

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));

6381

} else {

6382

if (IsFastCall)

6383

ComputePtrOff();

6384

6385

// Single-precision floating-point values are mapped to the

6386

// second (rightmost) word of the stack doubleword.

6387

if (Arg.getValueType() == MVT::f32 &&

6388

!isLittleEndian && !Flags.isInConsecutiveRegs()) {

6389

SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());

6390

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

6391

}

6392

6393

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__))

6394

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__));

6395

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6396

true, CFlags.IsTailCall, false, MemOpChains,

6397

TailCallArguments, dl);

6398

6399

NeededLoad = true;

6400

}

6401

// When passing an array of floats, the array occupies consecutive

6402

// space in the argument area; only round up to the next doubleword

6403

// at the end of the array. Otherwise, each float takes 8 bytes.

6404

if (!IsFastCall || NeededLoad) {

6405

ArgOffset += (Arg.getValueType() == MVT::f32 &&

6406

Flags.isInConsecutiveRegs()) ? 4 : 8;

6407

if (Flags.isInConsecutiveRegsLast())

6408

ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

6409

}

6410

break;

6411

}

6412

case MVT::v4f32:

6413

case MVT::v4i32:

6414

case MVT::v8i16:

6415

case MVT::v16i8:

6416

case MVT::v2f64:

6417

case MVT::v2i64:

6418

case MVT::v1i128:

6419

case MVT::f128:

6420

// These can be scalar arguments or elements of a vector array type

6421

// passed directly. The latter are used to implement ELFv2 homogenous

6422

// vector aggregates.

6423

6424

// For a varargs call, named arguments go into VRs or on the stack as

6425

// usual; unnamed arguments always go to the stack or the corresponding

6426

// GPRs when within range. For now, we always put the value in both

6427

// locations (or even all three).

6428

if (CFlags.IsVarArg) {

6429

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))

6430

"Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__));

6431

// We could elide this store in the case where the object fits

6432

// entirely in R registers. Maybe later.

6433

SDValue Store =

6434

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6435

MemOpChains.push_back(Store);

6436

if (VR_idx != NumVRs) {

6437

SDValue Load =

6438

DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());

6439

MemOpChains.push_back(Load.getValue(1));

6440

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

6441

}

6442

ArgOffset += 16;

6443

for (unsigned i=0; i<16; i+=PtrByteSize) {

6444

if (GPR_idx == NumGPRs)

6445

break;

6446

SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,

6447

DAG.getConstant(i, dl, PtrVT));

6448

SDValue Load =

6449

DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());

6450

MemOpChains.push_back(Load.getValue(1));

6451

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6452

}

6453

break;

6454

}

6455

6456

// Non-varargs Altivec params go into VRs or on the stack.

6457

if (VR_idx != NumVRs) {

6458

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

6459

} else {

6460

if (IsFastCall)

6461

ComputePtrOff();

6462

6463

assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__))

6464

"Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__));

6465

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6466

true, CFlags.IsTailCall, true, MemOpChains,

6467

TailCallArguments, dl);

6468

if (IsFastCall)

6469

ArgOffset += 16;

6470

}

6471

6472

if (!IsFastCall)

6473

ArgOffset += 16;

6474

break;

6475

}

6476

}

6477

6478

assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__))

6479

"mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__));

6480

(void)NumBytesActuallyUsed;

6481

6482

if (!MemOpChains.empty())

6483

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

6484

6485

// Check if this is an indirect call (MTCTR/BCTRL).

6486

// See prepareDescriptorIndirectCall and buildCallOperands for more

6487

// information about calls through function pointers in the 64-bit SVR4 ABI.

6488

if (CFlags.IsIndirect) {

6489

// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the

6490

// caller in the TOC save area.

6491

if (isTOCSaveRestoreRequired(Subtarget)) {

6492

assert(!CFlags.IsTailCall && "Indirect tails calls not supported")((!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6492, __PRETTY_FUNCTION__));

6493

// Load r2 into a virtual register and store it to the TOC save area.

6494

setUsesTOCBasePtr(DAG);

6495

SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);

6496

// TOC save area offset.

6497

unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();

6498

SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

6499

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6500

Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,

6501

MachinePointerInfo::getStack(

6502

DAG.getMachineFunction(), TOCSaveOffset));

6503

}

6504

// In the ELFv2 ABI, R12 must contain the address of an indirect callee.

6505

// This does not mean the MTCTR instruction must use R12; it's easier

6506

// to model this as an extra parameter, so do that.

6507

if (isELFv2ABI && !CFlags.IsPatchPoint)

6508

RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));

6509

}

6510

6511

// Build a sequence of copy-to-reg nodes chained together with token chain

6512

// and flag operands which copy the outgoing args into the appropriate regs.

6513

SDValue InFlag;

6514

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

6515

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

6516

RegsToPass[i].second, InFlag);

6517

InFlag = Chain.getValue(1);

6518

}

6519

6520

if (CFlags.IsTailCall && !IsSibCall)

6521

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

6522

TailCallArguments);

6523

6524

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

6525

Callee, SPDiff, NumBytes, Ins, InVals, CB);

6526

}

6527

6528

SDValue PPCTargetLowering::LowerCall_Darwin(

6529

SDValue Chain, SDValue Callee, CallFlags CFlags,

6530

const SmallVectorImpl<ISD::OutputArg> &Outs,

6531

const SmallVectorImpl<SDValue> &OutVals,

6532

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

6533

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

6534

const CallBase *CB) const {

6535

unsigned NumOps = Outs.size();

6536

6537

EVT PtrVT = getPointerTy(DAG.getDataLayout());

6538

bool isPPC64 = PtrVT == MVT::i64;

6539

unsigned PtrByteSize = isPPC64 ? 8 : 4;

6540

6541

MachineFunction &MF = DAG.getMachineFunction();

6542

6543

// Mark this function as potentially containing a function that contains a

6544

// tail call. As a consequence the frame pointer will be used for dynamicalloc

6545

// and restoring the callers stack pointer in this functions epilog. This is

6546

// done because by tail calling the called function might overwrite the value

6547

// in this function's (MF) stack pointer stack slot 0(SP).

6548

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

6549

CFlags.CallConv == CallingConv::Fast)

6550

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

6551

6552

// Count how many bytes are to be pushed on the stack, including the linkage

6553

// area, and parameter passing area. We start with 24/48 bytes, which is

6554

// prereserved space for [SP][CR][LR][3 x unused].

6555

unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

6556

unsigned NumBytes = LinkageSize;

6557

6558

// Add up all the space actually used.

6559

// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually

6560

// they all go in registers, but we must reserve stack space for them for

6561

// possible use by the caller. In varargs or 64-bit calls, parameters are

6562

// assigned stack space in order, with padding so Altivec parameters are

6563

// 16-byte aligned.

6564

unsigned nAltivecParamsAtEnd = 0;

6565

for (unsigned i = 0; i != NumOps; ++i) {

6566

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6567

EVT ArgVT = Outs[i].VT;

6568

// Varargs Altivec parameters are padded to a 16 byte boundary.

6569

if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

6570

ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

6571

ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {

6572

if (!CFlags.IsVarArg && !isPPC64) {

6573

// Non-varargs Altivec parameters go after all the non-Altivec

6574

// parameters; handle those later so we know how much padding we need.

6575

nAltivecParamsAtEnd++;

6576

continue;

6577

}

6578

// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.

6579

NumBytes = ((NumBytes+15)/16)*16;

6580

}

6581

NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

6582

}

6583

6584

// Allow for Altivec parameters at the end, if needed.

6585

if (nAltivecParamsAtEnd) {

6586

NumBytes = ((NumBytes+15)/16)*16;

6587

NumBytes += 16*nAltivecParamsAtEnd;

6588

}

6589

6590

// The prolog code of the callee may store up to 8 GPR argument registers to

6591

// the stack, allowing va_start to index over them in memory if its varargs.

6592

// Because we cannot tell if this is needed on the caller side, we have to

6593

// conservatively assume that it is needed. As such, make sure we have at

6594

// least enough stack space for the caller to store the 8 GPRs.

6595

NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

6596

6597

// Tail call needs the stack to be aligned.

6598

if (getTargetMachine().Options.GuaranteedTailCallOpt &&

6599

CFlags.CallConv == CallingConv::Fast)

6600

NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);

6601

6602

// Calculate by how many bytes the stack has to be adjusted in case of tail

6603

// call optimization.

6604

int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);

6605

6606

// To protect arguments on the stack from being clobbered in a tail call,

6607

// force all the loads to happen before doing any other lowering.

6608

if (CFlags.IsTailCall)

6609

Chain = DAG.getStackArgumentTokenFactor(Chain);

6610

6611

// Adjust the stack pointer for the new arguments...

6612

// These operations are automatically eliminated by the prolog/epilog pass

6613

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

6614

SDValue CallSeqStart = Chain;

6615

6616

// Load the return address and frame pointer so it can be move somewhere else

6617

// later.

6618

SDValue LROp, FPOp;

6619

Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

6620

6621

// Set up a copy of the stack pointer for use loading and storing any

6622

// arguments that may not fit in the registers available for argument

6623

// passing.

6624

SDValue StackPtr;

6625

if (isPPC64)

6626

StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

6627

else

6628

StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

6629

6630

// Figure out which arguments are going to go in registers, and which in

6631

// memory. Also, if this is a vararg function, floating point operations

6632

// must be stored to our stack, and loaded into integer regs as well, if

6633

// any integer regs are available for argument passing.

6634

unsigned ArgOffset = LinkageSize;

6635

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

6636

6637

static const MCPhysReg GPR_32[] = { // 32-bit registers.

6638

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

6639

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

6640

};

6641

static const MCPhysReg GPR_64[] = { // 64-bit registers.

6642

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

6643

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

6644

};

6645

static const MCPhysReg VR[] = {

6646

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

6647

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

6648

};

6649

const unsigned NumGPRs = array_lengthof(GPR_32);

6650

const unsigned NumFPRs = 13;

6651

const unsigned NumVRs = array_lengthof(VR);

6652

6653

const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

6654

6655

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

6656

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

6657

6658

SmallVector<SDValue, 8> MemOpChains;

6659

for (unsigned i = 0; i != NumOps; ++i) {

6660

SDValue Arg = OutVals[i];

6661

ISD::ArgFlagsTy Flags = Outs[i].Flags;

6662

6663

// PtrOff will be used to store the current argument to the stack if a

6664

// register cannot be found for it.

6665

SDValue PtrOff;

6666

6667

PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());

6668

6669

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

6670

6671

// On PPC64, promote integers to 64-bit values.

6672

if (isPPC64 && Arg.getValueType() == MVT::i32) {

6673

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

6674

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

6675

Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);

6676

}

6677

6678

// FIXME memcpy is used way more than necessary. Correctness first.

6679

// Note: "by value" is code for passing a structure by value, not

6680

// basic types.

6681

if (Flags.isByVal()) {

6682

unsigned Size = Flags.getByValSize();

6683

// Very small objects are passed right-justified. Everything else is

6684

// passed left-justified.

6685

if (Size==1 || Size==2) {

6686

EVT VT = (Size==1) ? MVT::i8 : MVT::i16;

6687

if (GPR_idx != NumGPRs) {

6688

SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,

6689

MachinePointerInfo(), VT);

6690

MemOpChains.push_back(Load.getValue(1));

6691

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6692

6693

ArgOffset += PtrByteSize;

6694

} else {

6695

SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,

6696

PtrOff.getValueType());

6697

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

6698

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

6699

CallSeqStart,

6700

Flags, DAG, dl);

6701

ArgOffset += PtrByteSize;

6702

}

6703

continue;

6704

}

6705

// Copy entire object into memory. There are cases where gcc-generated

6706

// code assumes it is there, even if it could be put entirely into

6707

// registers. (This is not what the doc says.)

6708

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,

6709

CallSeqStart,

6710

Flags, DAG, dl);

6711

6712

// For small aggregates (Darwin only) and aggregates >= PtrByteSize,

6713

// copy the pieces of the object that fit into registers from the

6714

// parameter save area.

6715

for (unsigned j=0; j<Size; j+=PtrByteSize) {

6716

SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());

6717

SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

6718

if (GPR_idx != NumGPRs) {

6719

SDValue Load =

6720

DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());

6721

MemOpChains.push_back(Load.getValue(1));

6722

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6723

ArgOffset += PtrByteSize;

6724

} else {

6725

ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;

6726

break;

6727

}

6728

}

6729

continue;

6730

}

6731

6732

switch (Arg.getSimpleValueType().SimpleTy) {

6733

default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6733);

6734

case MVT::i1:

6735

case MVT::i32:

6736

case MVT::i64:

6737

if (GPR_idx != NumGPRs) {

6738

if (Arg.getValueType() == MVT::i1)

6739

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);

6740

6741

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

6742

} else {

6743

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6744

isPPC64, CFlags.IsTailCall, false, MemOpChains,

6745

TailCallArguments, dl);

6746

}

6747

ArgOffset += PtrByteSize;

6748

break;

6749

case MVT::f32:

6750

case MVT::f64:

6751

if (FPR_idx != NumFPRs) {

6752

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

6753

6754

if (CFlags.IsVarArg) {

6755

SDValue Store =

6756

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6757

MemOpChains.push_back(Store);

6758

6759

// Float varargs are always shadowed in available integer registers

6760

if (GPR_idx != NumGPRs) {

6761

SDValue Load =

6762

DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());

6763

MemOpChains.push_back(Load.getValue(1));

6764

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6765

}

6766

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){

6767

SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());

6768

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

6769

SDValue Load =

6770

DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());

6771

MemOpChains.push_back(Load.getValue(1));

6772

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6773

}

6774

} else {

6775

// If we have any FPRs remaining, we may also have GPRs remaining.

6776

// Args passed in FPRs consume either 1 (f32) or 2 (f64) available

6777

// GPRs.

6778

if (GPR_idx != NumGPRs)

6779

++GPR_idx;

6780

if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&

6781

!isPPC64) // PPC64 has 64-bit GPR's obviously :)

6782

++GPR_idx;

6783

}

6784

} else

6785

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6786

isPPC64, CFlags.IsTailCall, false, MemOpChains,

6787

TailCallArguments, dl);

6788

if (isPPC64)

6789

ArgOffset += 8;

6790

else

6791

ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;

6792

break;

6793

case MVT::v4f32:

6794

case MVT::v4i32:

6795

case MVT::v8i16:

6796

case MVT::v16i8:

6797

if (CFlags.IsVarArg) {

6798

// These go aligned on the stack, or in the corresponding R registers

6799

// when within range. The Darwin PPC ABI doc claims they also go in

6800

// V registers; in fact gcc does this only for arguments that are

6801

// prototyped, not for those that match the ... We do it for all

6802

// arguments, seems to work.

6803

while (ArgOffset % 16 !=0) {

6804

ArgOffset += PtrByteSize;

6805

if (GPR_idx != NumGPRs)

6806

GPR_idx++;

6807

}

6808

// We could elide this store in the case where the object fits

6809

// entirely in R registers. Maybe later.

6810

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,

6811

DAG.getConstant(ArgOffset, dl, PtrVT));

6812

SDValue Store =

6813

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());

6814

MemOpChains.push_back(Store);

6815

if (VR_idx != NumVRs) {

6816

SDValue Load =

6817

DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());

6818

MemOpChains.push_back(Load.getValue(1));

6819

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

6820

}

6821

ArgOffset += 16;

6822

for (unsigned i=0; i<16; i+=PtrByteSize) {

6823

if (GPR_idx == NumGPRs)

6824

break;

6825

SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,

6826

DAG.getConstant(i, dl, PtrVT));

6827

SDValue Load =

6828

DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());

6829

MemOpChains.push_back(Load.getValue(1));

6830

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

6831

}

6832

break;

6833

}

6834

6835

// Non-varargs Altivec params generally go in registers, but have

6836

// stack space allocated at the end.

6837

if (VR_idx != NumVRs) {

6838

// Doesn't have GPR space allocated.

6839

RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

6840

} else if (nAltivecParamsAtEnd==0) {

6841

// We are emitting Altivec params in order.

6842

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6843

isPPC64, CFlags.IsTailCall, true, MemOpChains,

6844

TailCallArguments, dl);

6845

ArgOffset += 16;

6846

}

6847

break;

6848

}

6849

}

6850

// If all Altivec parameters fit in registers, as they usually do,

6851

// they get stack space following the non-Altivec parameters. We

6852

// don't track this here because nobody below needs it.

6853

// If there are more Altivec parameters than fit in registers emit

6854

// the stores here.

6855

if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {

6856

unsigned j = 0;

6857

// Offset is aligned; skip 1st 12 params which go in V registers.

6858

ArgOffset = ((ArgOffset+15)/16)*16;

6859

ArgOffset += 12*16;

6860

for (unsigned i = 0; i != NumOps; ++i) {

6861

SDValue Arg = OutVals[i];

6862

EVT ArgType = Outs[i].VT;

6863

if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||

6864

ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {

6865

if (++j > NumVRs) {

6866

SDValue PtrOff;

6867

// We are emitting Altivec params in order.

6868

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

6869

isPPC64, CFlags.IsTailCall, true, MemOpChains,

6870

TailCallArguments, dl);

6871

ArgOffset += 16;

6872

}

6873

}

6874

}

6875

}

6876

6877

if (!MemOpChains.empty())

6878

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

6879

6880

// On Darwin, R12 must contain the address of an indirect callee. This does

6881

// not mean the MTCTR instruction must use R12; it's easier to model this as

6882

// an extra parameter, so do that.

6883

if (CFlags.IsIndirect) {

6884

assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6884, __PRETTY_FUNCTION__));

6885

RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :

6886

PPC::R12), Callee));

6887

}

6888

6889

// Build a sequence of copy-to-reg nodes chained together with token chain

6890

// and flag operands which copy the outgoing args into the appropriate regs.

6891

SDValue InFlag;

6892

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

6893

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

6894

RegsToPass[i].second, InFlag);

6895

InFlag = Chain.getValue(1);

6896

}

6897

6898

if (CFlags.IsTailCall)

6899

PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,

6900

TailCallArguments);

6901

6902

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

6903

Callee, SPDiff, NumBytes, Ins, InVals, CB);

6904

}

6905

6906

static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,

6907

CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,

6908

CCState &State) {

6909

6910

const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(

6911

State.getMachineFunction().getSubtarget());

6912

const bool IsPPC64 = Subtarget.isPPC64();

6913

const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);

6914

const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

6915

6916

assert((!ValVT.isInteger() ||(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))

6917

(ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))

6918

"Integer argument exceeds register size: should have been legalized")(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__));

6919

6920

if (ValVT == MVT::f128)

6921

report_fatal_error("f128 is unimplemented on AIX.");

6922

6923

if (ArgFlags.isNest())

6924

report_fatal_error("Nest arguments are unimplemented.");

6925

6926

if (ValVT.isVector() || LocVT.isVector())

6927

report_fatal_error("Vector arguments are unimplemented on AIX.");

6928

6929

static const MCPhysReg GPR_32[] = {// 32-bit registers.

6930

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

6931

PPC::R7, PPC::R8, PPC::R9, PPC::R10};

6932

static const MCPhysReg GPR_64[] = {// 64-bit registers.

6933

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

6934

PPC::X7, PPC::X8, PPC::X9, PPC::X10};

6935

6936

if (ArgFlags.isByVal()) {

6937

if (ArgFlags.getNonZeroByValAlign() > PtrAlign)

6938

report_fatal_error("Pass-by-value arguments with alignment greater than "

6939

"register width are not supported.");

6940

6941

const unsigned ByValSize = ArgFlags.getByValSize();

6942

6943

// An empty aggregate parameter takes up no storage and no registers,

6944

// but needs a MemLoc for a stack slot for the formal arguments side.

6945

if (ByValSize == 0) {

6946

State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,

6947

State.getNextStackOffset(), RegVT,

6948

LocInfo));

6949

return false;

6950

}

6951

6952

const unsigned StackSize = alignTo(ByValSize, PtrAlign);

6953

unsigned Offset = State.AllocateStack(StackSize, PtrAlign);

6954

for (const unsigned E = Offset + StackSize; Offset < E;

6955

Offset += PtrAlign.value()) {

6956

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))

6957

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));

6958

else {

6959

State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,

6960

Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,

6961

LocInfo));

6962

break;

6963

}

6964

}

6965

return false;

6966

}

6967

6968

// Arguments always reserve parameter save area.

6969

switch (ValVT.SimpleTy) {

6970

default:

6971

report_fatal_error("Unhandled value type for argument.");

6972

case MVT::i64:

6973

// i64 arguments should have been split to i32 for PPC32.

6974

assert(IsPPC64 && "PPC32 should have split i64 values.")((IsPPC64 && "PPC32 should have split i64 values.") ?
static_cast<void> (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6974, __PRETTY_FUNCTION__));

6975

LLVM_FALLTHROUGH[[gnu::fallthrough]];

6976

case MVT::i1:

6977

case MVT::i32: {

6978

const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);

6979

// AIX integer arguments are always passed in register width.

6980

if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())

6981

LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt

6982

: CCValAssign::LocInfo::ZExt;

6983

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))

6984

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));

6985

else

6986

State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));

6987

6988

return false;

6989

}

6990

case MVT::f32:

6991

case MVT::f64: {

6992

// Parameter save area (PSA) is reserved even if the float passes in fpr.

6993

const unsigned StoreSize = LocVT.getStoreSize();

6994

// Floats are always 4-byte aligned in the PSA on AIX.

6995

// This includes f64 in 64-bit mode for ABI compatibility.

6996

const unsigned Offset =

6997

State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));

6998

unsigned FReg = State.AllocateReg(FPR);

6999

if (FReg)

7000

State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));

7001

7002

// Reserve and initialize GPRs or initialize the PSA as required.

7003

for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {

7004

if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {

7005

assert(FReg && "An FPR should be available when a GPR is reserved.")((FReg && "An FPR should be available when a GPR is reserved."
) ? static_cast<void> (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7005, __PRETTY_FUNCTION__));

7006

if (State.isVarArg()) {

7007

// Successfully reserved GPRs are only initialized for vararg calls.

7008

// Custom handling is required for:

7009

// f64 in PPC32 needs to be split into 2 GPRs.

7010

// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.

7011

State.addLoc(

7012

CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));

7013

}

7014

} else {

7015

// If there are insufficient GPRs, the PSA needs to be initialized.

7016

// Initialization occurs even if an FPR was initialized for

7017

// compatibility with the AIX XL compiler. The full memory for the

7018

// argument will be initialized even if a prior word is saved in GPR.

7019

// A custom memLoc is used when the argument also passes in FPR so

7020

// that the callee handling can skip over it easily.

7021

State.addLoc(

7022

FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,

7023

LocInfo)

7024

: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));

7025

break;

7026

}

7027

}

7028

7029

return false;

7030

}

7031

}

7032

return true;

7033

}

7034

7035

static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,

7036

bool IsPPC64) {

7037

assert((IsPPC64 || SVT != MVT::i64) &&(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__))

7038

"i64 should have been split for 32-bit codegen.")(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__));

7039

7040

switch (SVT) {

7041

default:

7042

report_fatal_error("Unexpected value type for formal argument");

7043

case MVT::i1:

7044

case MVT::i32:

7045

case MVT::i64:

7046

return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

7047

case MVT::f32:

7048

return &PPC::F4RCRegClass;

7049

case MVT::f64:

7050

return &PPC::F8RCRegClass;

7051

}

7052

}

7053

7054

static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,

7055

SelectionDAG &DAG, SDValue ArgValue,

7056

MVT LocVT, const SDLoc &dl) {

7057

assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())((ValVT.isScalarInteger() && LocVT.isScalarInteger())
? static_cast<void> (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7057, __PRETTY_FUNCTION__));

7058

assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())((ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())
? static_cast<void> (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__));

7059

7060

if (Flags.isSExt())

7061

ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,

7062

DAG.getValueType(ValVT));

7063

else if (Flags.isZExt())

7064

ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,

7065

DAG.getValueType(ValVT));

7066

7067

return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);

7068

}

7069

7070

static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {

7071

const unsigned LASize = FL->getLinkageSize();

7072

7073

if (PPC::GPRCRegClass.contains(Reg)) {

7074

assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))

7075

"Reg must be a valid argument register!")((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__));

7076

return LASize + 4 * (Reg - PPC::R3);

7077

}

7078

7079

if (PPC::G8RCRegClass.contains(Reg)) {

7080

assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__))

7081

"Reg must be a valid argument register!")((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__));

7082

return LASize + 8 * (Reg - PPC::X3);

7083

}

7084

7085

llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7085);

7086

}

7087

7088

// AIX ABI Stack Frame Layout:

7089

//

7090

// Low Memory +--------------------------------------------+

7091

// SP +---> | Back chain | ---+

7092

// | +--------------------------------------------+ |

7093

// | | Saved Condition Register | |

7094

// | +--------------------------------------------+ |

7095

// | | Saved Linkage Register | |

7096

// | +--------------------------------------------+ | Linkage Area

7097

// | | Reserved for compilers | |

7098

// | +--------------------------------------------+ |

7099

// | | Reserved for binders | |

7100

// | +--------------------------------------------+ |

7101

// | | Saved TOC pointer | ---+

7102

// | +--------------------------------------------+

7103

// | | Parameter save area |

7104

// | +--------------------------------------------+

7105

// | | Alloca space |

7106

// | +--------------------------------------------+

7107

// | | Local variable space |

7108

// | +--------------------------------------------+

7109

// | | Float/int conversion temporary |

7110

// | +--------------------------------------------+

7111

// | | Save area for AltiVec registers |

7112

// | +--------------------------------------------+

7113

// | | AltiVec alignment padding |

7114

// | +--------------------------------------------+

7115

// | | Save area for VRSAVE register |

7116

// | +--------------------------------------------+

7117

// | | Save area for General Purpose registers |

7118

// | +--------------------------------------------+

7119

// | | Save area for Floating Point registers |

7120

// | +--------------------------------------------+

7121

// +---- | Back chain |

7122

// High Memory +--------------------------------------------+

7123

//

7124

// Specifications:

7125

// AIX 7.2 Assembler Language Reference

7126

// Subroutine linkage convention

7127

7128

SDValue PPCTargetLowering::LowerFormalArguments_AIX(

7129

SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

7130

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

7131

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

7132

7133

assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))

7134

CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))

7135

"Unexpected calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__));

7136

7137

if (getTargetMachine().Options.GuaranteedTailCallOpt)

7138

report_fatal_error("Tail call support is unimplemented on AIX.");

7139

7140

if (useSoftFloat())

7141

report_fatal_error("Soft float support is unimplemented on AIX.");

7142

7143

const PPCSubtarget &Subtarget =

7144

static_cast<const PPCSubtarget &>(DAG.getSubtarget());

7145

7146

const bool IsPPC64 = Subtarget.isPPC64();

7147

const unsigned PtrByteSize = IsPPC64 ? 8 : 4;

7148

7149

// Assign locations to all of the incoming arguments.

7150

SmallVector<CCValAssign, 16> ArgLocs;

7151

MachineFunction &MF = DAG.getMachineFunction();

7152

MachineFrameInfo &MFI = MF.getFrameInfo();

7153

CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

7154

7155

const EVT PtrVT = getPointerTy(MF.getDataLayout());

7156

// Reserve space for the linkage area on the stack.

7157

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

7158

CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));

7159

CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);

7160

7161

SmallVector<SDValue, 8> MemOps;

7162

7163

for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {

7164

CCValAssign &VA = ArgLocs[I++];

7165

MVT LocVT = VA.getLocVT();

7166

ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;

7167

7168

// For compatibility with the AIX XL compiler, the float args in the

7169

// parameter save area are initialized even if the argument is available

7170

// in register. The caller is required to initialize both the register

7171

// and memory, however, the callee can choose to expect it in either.

7172

// The memloc is dismissed here because the argument is retrieved from

7173

// the register.

7174

if (VA.isMemLoc() && VA.needsCustom())

7175

continue;

7176

7177

if (Flags.isByVal() && VA.isMemLoc()) {

7178

const unsigned Size =

7179

alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,

7180

PtrByteSize);

7181

const int FI = MF.getFrameInfo().CreateFixedObject(

7182

Size, VA.getLocMemOffset(), /* IsImmutable */ false,

7183

/* IsAliased */ true);

7184

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7185

InVals.push_back(FIN);

7186

7187

continue;

7188

}

7189

7190

if (Flags.isByVal()) {

7191

assert(VA.isRegLoc() && "MemLocs should already be handled.")((VA.isRegLoc() && "MemLocs should already be handled."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7191, __PRETTY_FUNCTION__));

7192

7193

const MCPhysReg ArgReg = VA.getLocReg();

7194

const PPCFrameLowering *FL = Subtarget.getFrameLowering();

7195

7196

if (Flags.getNonZeroByValAlign() > PtrByteSize)

7197

report_fatal_error("Over aligned byvals not supported yet.");

7198

7199

const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);

7200

const int FI = MF.getFrameInfo().CreateFixedObject(

7201

StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,

7202

/* IsAliased */ true);

7203

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7204

InVals.push_back(FIN);

7205

7206

// Add live ins for all the RegLocs for the same ByVal.

7207

const TargetRegisterClass *RegClass =

7208

IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

7209

7210

auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,

7211

unsigned Offset) {

7212

const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);

7213

// Since the callers side has left justified the aggregate in the

7214

// register, we can simply store the entire register into the stack

7215

// slot.

7216

SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);

7217

// The store to the fixedstack object is needed becuase accessing a

7218

// field of the ByVal will use a gep and load. Ideally we will optimize

7219

// to extracting the value from the register directly, and elide the

7220

// stores when the arguments address is not taken, but that will need to

7221

// be future work.

7222

SDValue Store = DAG.getStore(

7223

CopyFrom.getValue(1), dl, CopyFrom,

7224

DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),

7225

MachinePointerInfo::getFixedStack(MF, FI, Offset));

7226

7227

MemOps.push_back(Store);

7228

};

7229

7230

unsigned Offset = 0;

7231

HandleRegLoc(VA.getLocReg(), Offset);

7232

Offset += PtrByteSize;

7233

for (; Offset != StackSize && ArgLocs[I].isRegLoc();

7234

Offset += PtrByteSize) {

7235

assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))

7236

"RegLocs should be for ByVal argument.")((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__));

7237

7238

const CCValAssign RL = ArgLocs[I++];

7239

HandleRegLoc(RL.getLocReg(), Offset);

7240

}

7241

7242

if (Offset != StackSize) {

7243

assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__))

7244

"Expected MemLoc for remaining bytes.")((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__));

7245

assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")((ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7245, __PRETTY_FUNCTION__));

7246

// Consume the MemLoc.The InVal has already been emitted, so nothing

7247

// more needs to be done.

7248

++I;

7249

}

7250

7251

continue;

7252

}

7253

7254

EVT ValVT = VA.getValVT();

7255

if (VA.isRegLoc() && !VA.needsCustom()) {

7256

MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;

7257

unsigned VReg =

7258

MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));

7259

SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);

7260

if (ValVT.isScalarInteger() &&

7261

(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {

7262

ArgValue =

7263

truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);

7264

}

7265

InVals.push_back(ArgValue);

7266

continue;

7267

}

7268

if (VA.isMemLoc()) {

7269

const unsigned LocSize = LocVT.getStoreSize();

7270

const unsigned ValSize = ValVT.getStoreSize();

7271

assert((ValSize <= LocSize) &&(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__))

7272

"Object size is larger than size of MemLoc")(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__));

7273

int CurArgOffset = VA.getLocMemOffset();

7274

// Objects are right-justified because AIX is big-endian.

7275

if (LocSize > ValSize)

7276

CurArgOffset += LocSize - ValSize;

7277

// Potential tail calls could cause overwriting of argument stack slots.

7278

const bool IsImmutable =

7279

!(getTargetMachine().Options.GuaranteedTailCallOpt &&

7280

(CallConv == CallingConv::Fast));

7281

int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);

7282

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

7283

SDValue ArgValue =

7284

DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());

7285

InVals.push_back(ArgValue);

7286

continue;

7287

}

7288

}

7289

7290

// On AIX a minimum of 8 words is saved to the parameter save area.

7291

const unsigned MinParameterSaveArea = 8 * PtrByteSize;

7292

// Area that is at least reserved in the caller of this function.

7293

unsigned CallerReservedArea =

7294

std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);

7295

7296

// Set the size that is at least reserved in caller of this function. Tail

7297

// call optimized function's reserved stack space needs to be aligned so

7298

// that taking the difference between two stack areas will result in an

7299

// aligned stack.

7300

CallerReservedArea =

7301

EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);

7302

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

7303

FuncInfo->setMinReservedArea(CallerReservedArea);

7304

7305

if (isVarArg) {

7306

FuncInfo->setVarArgsFrameIndex(

7307

MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));

7308

SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

7309

7310

static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,

7311

PPC::R7, PPC::R8, PPC::R9, PPC::R10};

7312

7313

static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,

7314

PPC::X7, PPC::X8, PPC::X9, PPC::X10};

7315

const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);

7316

7317

// The fixed integer arguments of a variadic function are stored to the

7318

// VarArgsFrameIndex on the stack so that they may be loaded by

7319

// dereferencing the result of va_next.

7320

for (unsigned GPRIndex =

7321

(CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;

7322

GPRIndex < NumGPArgRegs; ++GPRIndex) {

7323

7324

const unsigned VReg =

7325

IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)

7326

: MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);

7327

7328

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

7329

SDValue Store =

7330

DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());

7331

MemOps.push_back(Store);

7332

// Increment the address for the next argument to store.

7333

SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);

7334

FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);

7335

}

7336

}

7337

7338

if (!MemOps.empty())

7339

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

7340

7341

return Chain;

7342

}

7343

7344

SDValue PPCTargetLowering::LowerCall_AIX(

7345

SDValue Chain, SDValue Callee, CallFlags CFlags,

7346

const SmallVectorImpl<ISD::OutputArg> &Outs,

7347

const SmallVectorImpl<SDValue> &OutVals,

7348

const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

7349

SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,

7350

const CallBase *CB) const {

7351

// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the

7352

// AIX ABI stack frame layout.

7353

7354

assert((CFlags.CallConv == CallingConv::C ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))

7355

CFlags.CallConv == CallingConv::Cold ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))

7356

CFlags.CallConv == CallingConv::Fast) &&(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))

7357

"Unexpected calling convention!")(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__));

7358

7359

if (CFlags.IsPatchPoint)

7360

report_fatal_error("This call type is unimplemented on AIX.");

7361

7362

const PPCSubtarget& Subtarget =

7363

static_cast<const PPCSubtarget&>(DAG.getSubtarget());

7364

if (Subtarget.hasAltivec())

7365

report_fatal_error("Altivec support is unimplemented on AIX.");

7366

7367

MachineFunction &MF = DAG.getMachineFunction();

7368

SmallVector<CCValAssign, 16> ArgLocs;

7369

CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,

7370

*DAG.getContext());

7371

7372

// Reserve space for the linkage save area (LSA) on the stack.

7373

// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:

7374

// [SP][CR][LR][2 x reserved][TOC].

7375

// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.

7376

const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

7377

const bool IsPPC64 = Subtarget.isPPC64();

7378

const EVT PtrVT = getPointerTy(DAG.getDataLayout());

7379

const unsigned PtrByteSize = IsPPC64 ? 8 : 4;

7380

CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));

7381

CCInfo.AnalyzeCallOperands(Outs, CC_AIX);

7382

7383

// The prolog code of the callee may store up to 8 GPR argument registers to

7384

// the stack, allowing va_start to index over them in memory if the callee

7385

// is variadic.

7386

// Because we cannot tell if this is needed on the caller side, we have to

7387

// conservatively assume that it is needed. As such, make sure we have at

7388

// least enough stack space for the caller to store the 8 GPRs.

7389

const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;

7390

const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,

7391

CCInfo.getNextStackOffset());

7392

7393

// Adjust the stack pointer for the new arguments...

7394

// These operations are automatically eliminated by the prolog/epilog pass.

7395

Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

7396

SDValue CallSeqStart = Chain;

7397

7398

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

7399

SmallVector<SDValue, 8> MemOpChains;

7400

7401

// Set up a copy of the stack pointer for loading and storing any

7402

// arguments that may not fit in the registers available for argument

7403

// passing.

7404

const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)

7405

: DAG.getRegister(PPC::R1, MVT::i32);

7406

7407

for (unsigned I = 0, E = ArgLocs.size(); I != E;) {

7408

const unsigned ValNo = ArgLocs[I].getValNo();

7409

SDValue Arg = OutVals[ValNo];

7410

ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;

7411

7412

if (Flags.isByVal()) {

7413

const unsigned ByValSize = Flags.getByValSize();

7414

7415

// Nothing to do for zero-sized ByVals on the caller side.

7416

if (!ByValSize) {

7417

++I;

7418

continue;

7419

}

7420

7421

auto GetLoad = [&](EVT VT, unsigned LoadOffset) {

7422

return DAG.getExtLoad(

7423

ISD::ZEXTLOAD, dl, PtrVT, Chain,

7424

(LoadOffset != 0)

7425

? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))

7426

: Arg,

7427

MachinePointerInfo(), VT);

7428

};

7429

7430

unsigned LoadOffset = 0;

7431

7432

// Initialize registers, which are fully occupied by the by-val argument.

7433

while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {

7434

SDValue Load = GetLoad(PtrVT, LoadOffset);

7435

MemOpChains.push_back(Load.getValue(1));

7436

LoadOffset += PtrByteSize;

7437

const CCValAssign &ByValVA = ArgLocs[I++];

7438

assert(ByValVA.getValNo() == ValNo &&((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__))

7439

"Unexpected location for pass-by-value argument.")((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__));

7440

RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));

7441

}

7442

7443

if (LoadOffset == ByValSize)

7444

continue;

7445

7446

// There must be one more loc to handle the remainder.

7447

assert(ArgLocs[I].getValNo() == ValNo &&((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__))

7448

"Expected additional location for by-value argument.")((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__));

7449

7450

if (ArgLocs[I].isMemLoc()) {

7451

assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")((LoadOffset < ByValSize && "Unexpected memloc for by-val arg."
) ? static_cast<void> (0) : __assert_fail ("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7451, __PRETTY_FUNCTION__));

7452

const CCValAssign &ByValVA = ArgLocs[I++];

7453

ISD::ArgFlagsTy MemcpyFlags = Flags;

7454

// Only memcpy the bytes that don't pass in register.

7455

MemcpyFlags.setByValSize(ByValSize - LoadOffset);

7456

Chain = CallSeqStart = createMemcpyOutsideCallSeq(

7457

(LoadOffset != 0)

7458

? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))

7459

: Arg,

7460

DAG.getObjectPtrOffset(dl, StackPtr,

7461

TypeSize::Fixed(ByValVA.getLocMemOffset())),

7462

CallSeqStart, MemcpyFlags, DAG, dl);

7463

continue;

7464

}

7465

7466

// Initialize the final register residue.

7467

// Any residue that occupies the final by-val arg register must be

7468

// left-justified on AIX. Loads must be a power-of-2 size and cannot be

7469

// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,

7470

// 2 and 1 byte loads.

7471

const unsigned ResidueBytes = ByValSize % PtrByteSize;

7472

assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__))

7473

"Unexpected register residue for by-value argument.")((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__));

7474

SDValue ResidueVal;

7475

for (unsigned Bytes = 0; Bytes != ResidueBytes;) {

7476

const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);

7477

const MVT VT =

7478

N == 1 ? MVT::i8

7479

: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));

7480

SDValue Load = GetLoad(VT, LoadOffset);

7481

MemOpChains.push_back(Load.getValue(1));

7482

LoadOffset += N;

7483

Bytes += N;

7484

7485

// By-val arguments are passed left-justfied in register.

7486

// Every load here needs to be shifted, otherwise a full register load

7487

// should have been used.

7488

assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))

7489

"Unexpected load emitted during handling of pass-by-value "((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))

7490

"argument.")((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__));

7491

unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);

7492

EVT ShiftAmountTy =

7493

getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());

7494

SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);

7495

SDValue ShiftedLoad =

7496

DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);

7497

ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,

7498

ShiftedLoad)

7499

: ShiftedLoad;

7500

}

7501

7502

const CCValAssign &ByValVA = ArgLocs[I++];

7503

RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));

7504

continue;

7505

}

7506

7507

CCValAssign &VA = ArgLocs[I++];

7508

const MVT LocVT = VA.getLocVT();

7509

const MVT ValVT = VA.getValVT();

7510

7511

switch (VA.getLocInfo()) {

7512

default:

7513

report_fatal_error("Unexpected argument extension type.");

7514

case CCValAssign::Full:

7515

break;

7516

case CCValAssign::ZExt:

7517

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

7518

break;

7519

case CCValAssign::SExt:

7520

Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

7521

break;

7522

}

7523

7524

if (VA.isRegLoc() && !VA.needsCustom()) {

7525

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

7526

continue;

7527

}

7528

7529

if (VA.isMemLoc()) {

7530

SDValue PtrOff =

7531

DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());

7532

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

7533

MemOpChains.push_back(

7534

DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

7535

7536

continue;

7537

}

7538

7539

// Custom handling is used for GPR initializations for vararg float

7540

// arguments.

7541

assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))

7542

ValVT.isFloatingPoint() && LocVT.isInteger() &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))

7543

"Unexpected register handling for calling convention.")((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__));

7544

7545

SDValue ArgAsInt =

7546

DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);

7547

7548

if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())

7549

// f32 in 32-bit GPR

7550

// f64 in 64-bit GPR

7551

RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));

7552

else if (Arg.getValueType().getFixedSizeInBits() <

7553

LocVT.getFixedSizeInBits())

7554

// f32 in 64-bit GPR.

7555

RegsToPass.push_back(std::make_pair(

7556

VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));

7557

else {

7558

// f64 in two 32-bit GPRs

7559

// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.

7560

assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))

7561

"Unexpected custom register for argument!")((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__));

7562

CCValAssign &GPR1 = VA;

7563

SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,

7564

DAG.getConstant(32, dl, MVT::i8));

7565

RegsToPass.push_back(std::make_pair(

7566

GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));

7567

7568

if (I != E) {

7569

// If only 1 GPR was available, there will only be one custom GPR and

7570

// the argument will also pass in memory.

7571

CCValAssign &PeekArg = ArgLocs[I];

7572

if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {

7573

assert(PeekArg.needsCustom() && "A second custom GPR is expected.")((PeekArg.needsCustom() && "A second custom GPR is expected."
) ? static_cast<void> (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7573, __PRETTY_FUNCTION__));

7574

CCValAssign &GPR2 = ArgLocs[I++];

7575

RegsToPass.push_back(std::make_pair(

7576

GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));

7577

}

7578

}

7579

}

7580

}

7581

7582

if (!MemOpChains.empty())

7583

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

7584

7585

// For indirect calls, we need to save the TOC base to the stack for

7586

// restoration after the call.

7587

if (CFlags.IsIndirect) {

7588

assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7588, __PRETTY_FUNCTION__));

7589

const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();

7590

const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();

7591

const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

7592

const unsigned TOCSaveOffset =

7593

Subtarget.getFrameLowering()->getTOCSaveOffset();

7594

7595

setUsesTOCBasePtr(DAG);

7596

SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);

7597

SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);

7598

SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);

7599

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

7600

Chain = DAG.getStore(

7601

Val.getValue(1), dl, Val, AddPtr,

7602

MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));

7603

}

7604

7605

// Build a sequence of copy-to-reg nodes chained together with token chain

7606

// and flag operands which copy the outgoing args into the appropriate regs.

7607

SDValue InFlag;

7608

for (auto Reg : RegsToPass) {

7609

Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);

7610

InFlag = Chain.getValue(1);

7611

}

7612

7613

const int SPDiff = 0;

7614

return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,

7615

Callee, SPDiff, NumBytes, Ins, InVals, CB);

7616

}

7617

7618

bool

7619

PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,

7620

MachineFunction &MF, bool isVarArg,

7621

const SmallVectorImpl<ISD::OutputArg> &Outs,

7622

LLVMContext &Context) const {

7623

SmallVector<CCValAssign, 16> RVLocs;

7624

CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);

7625

return CCInfo.CheckReturn(

7626

Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

7627

? RetCC_PPC_Cold

7628

: RetCC_PPC);

7629

}

7630

7631

SDValue

7632

PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,

7633

bool isVarArg,

7634

const SmallVectorImpl<ISD::OutputArg> &Outs,

7635

const SmallVectorImpl<SDValue> &OutVals,

7636

const SDLoc &dl, SelectionDAG &DAG) const {

7637

SmallVector<CCValAssign, 16> RVLocs;

7638

CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

7639

*DAG.getContext());

7640

CCInfo.AnalyzeReturn(Outs,

7641

(Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)

7642

? RetCC_PPC_Cold

7643

: RetCC_PPC);

7644

7645

SDValue Flag;

7646

SmallVector<SDValue, 4> RetOps(1, Chain);

7647

7648

// Copy the result values into the output registers.

7649

for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {

7650

CCValAssign &VA = RVLocs[i];

7651

assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7651, __PRETTY_FUNCTION__));

7652

7653

SDValue Arg = OutVals[RealResIdx];

7654

7655

if (Subtarget.isAIXABI() &&

7656

(VA.getLocVT().isVector() || VA.getValVT().isVector()))

7657

report_fatal_error("Returning vector types not yet supported on AIX.");

7658

7659

switch (VA.getLocInfo()) {

7660

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7660);

7661

case CCValAssign::Full: break;

7662

case CCValAssign::AExt:

7663

Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);

7664

break;

7665

case CCValAssign::ZExt:

7666

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

7667

break;

7668

case CCValAssign::SExt:

7669

Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

7670

break;

7671

}

7672

if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {

7673

bool isLittleEndian = Subtarget.isLittleEndian();

7674

// Legalize ret f64 -> ret 2 x i32.

7675

SDValue SVal =

7676

DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

7677

DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));

7678

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);

7679

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

7680

SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,

7681

DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));

7682

Flag = Chain.getValue(1);

7683

VA = RVLocs[++i]; // skip ahead to next loc

7684

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);

7685

} else

7686

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);

7687

Flag = Chain.getValue(1);

7688

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

7689

}

7690

7691

RetOps[0] = Chain; // Update chain.

7692

7693

// Add the flag if we have it.

7694

if (Flag.getNode())

7695

RetOps.push_back(Flag);

7696

7697

return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);

7698

}

7699

7700

SDValue

7701

PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,

7702

SelectionDAG &DAG) const {

7703

SDLoc dl(Op);

7704

7705

// Get the correct type for integers.

7706

EVT IntVT = Op.getValueType();

7707

7708

// Get the inputs.

7709

SDValue Chain = Op.getOperand(0);

7710

SDValue FPSIdx = getFramePointerFrameIndex(DAG);

7711

// Build a DYNAREAOFFSET node.

7712

SDValue Ops[2] = {Chain, FPSIdx};

7713

SDVTList VTs = DAG.getVTList(IntVT);

7714

return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);

7715

}

7716

7717

SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,

7718

SelectionDAG &DAG) const {

7719

// When we pop the dynamic allocation we need to restore the SP link.

7720

SDLoc dl(Op);

7721

7722

// Get the correct type for pointers.

7723

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7724

7725

// Construct the stack pointer operand.

7726

bool isPPC64 = Subtarget.isPPC64();

7727

unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;

7728

SDValue StackPtr = DAG.getRegister(SP, PtrVT);

7729

7730

// Get the operands for the STACKRESTORE.

7731

SDValue Chain = Op.getOperand(0);

7732

SDValue SaveSP = Op.getOperand(1);

7733

7734

// Load the old link SP.

7735

SDValue LoadLinkSP =

7736

DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());

7737

7738

// Restore the stack pointer.

7739

Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);

7740

7741

// Store the old link SP.

7742

return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());

7743

}

7744

7745

SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {

7746

MachineFunction &MF = DAG.getMachineFunction();

7747

bool isPPC64 = Subtarget.isPPC64();

7748

EVT PtrVT = getPointerTy(MF.getDataLayout());

7749

7750

// Get current frame pointer save index. The users of this index will be

7751

// primarily DYNALLOC instructions.

7752

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

7753

int RASI = FI->getReturnAddrSaveIndex();

7754

7755

// If the frame pointer save index hasn't been defined yet.

7756

if (!RASI) {

7757

// Find out what the fix offset of the frame pointer save area.

7758

int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();

7759

// Allocate the frame index for frame pointer save area.

7760

RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);

7761

// Save the result.

7762

FI->setReturnAddrSaveIndex(RASI);

7763

}

7764

return DAG.getFrameIndex(RASI, PtrVT);

7765

}

7766

7767

SDValue

7768

PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {

7769

MachineFunction &MF = DAG.getMachineFunction();

7770

bool isPPC64 = Subtarget.isPPC64();

7771

EVT PtrVT = getPointerTy(MF.getDataLayout());

7772

7773

// Get current frame pointer save index. The users of this index will be

7774

// primarily DYNALLOC instructions.

7775

PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

7776

int FPSI = FI->getFramePointerSaveIndex();

7777

7778

// If the frame pointer save index hasn't been defined yet.

7779

if (!FPSI) {

7780

// Find out what the fix offset of the frame pointer save area.

7781

int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();

7782

// Allocate the frame index for frame pointer save area.

7783

FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);

7784

// Save the result.

7785

FI->setFramePointerSaveIndex(FPSI);

7786

}

7787

return DAG.getFrameIndex(FPSI, PtrVT);

7788

}

7789

7790

SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,

7791

SelectionDAG &DAG) const {

7792

MachineFunction &MF = DAG.getMachineFunction();

7793

// Get the inputs.

7794

SDValue Chain = Op.getOperand(0);

7795

SDValue Size = Op.getOperand(1);

7796

SDLoc dl(Op);

7797

7798

// Get the correct type for pointers.

7799

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7800

// Negate the size.

7801

SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,

7802

DAG.getConstant(0, dl, PtrVT), Size);

7803

// Construct a node for the frame pointer save index.

7804

SDValue FPSIdx = getFramePointerFrameIndex(DAG);

7805

SDValue Ops[3] = { Chain, NegSize, FPSIdx };

7806

SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);

7807

if (hasInlineStackProbe(MF))

7808

return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);

7809

return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);

7810

}

7811

7812

SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,

7813

SelectionDAG &DAG) const {

7814

MachineFunction &MF = DAG.getMachineFunction();

7815

7816

bool isPPC64 = Subtarget.isPPC64();

7817

EVT PtrVT = getPointerTy(DAG.getDataLayout());

7818

7819

int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);

7820

return DAG.getFrameIndex(FI, PtrVT);

7821

}

7822

7823

SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,

7824

SelectionDAG &DAG) const {

7825

SDLoc DL(Op);

7826

return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,

7827

DAG.getVTList(MVT::i32, MVT::Other),

7828

Op.getOperand(0), Op.getOperand(1));

7829

}

7830

7831

SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,

7832

SelectionDAG &DAG) const {

7833

SDLoc DL(Op);

7834

return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,

7835

Op.getOperand(0), Op.getOperand(1));

7836

}

7837

7838

SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {

7839

if (Op.getValueType().isVector())

7840

return LowerVectorLoad(Op, DAG);

7841

7842

assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__))

7843

"Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__));

7844

7845

// First, load 8 bits into 32 bits, then truncate to 1 bit.

7846

7847

SDLoc dl(Op);

7848

LoadSDNode *LD = cast<LoadSDNode>(Op);

7849

7850

SDValue Chain = LD->getChain();

7851

SDValue BasePtr = LD->getBasePtr();

7852

MachineMemOperand *MMO = LD->getMemOperand();

7853

7854

SDValue NewLD =

7855

DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,

7856

BasePtr, MVT::i8, MMO);

7857

SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);

7858

7859

SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };

7860

return DAG.getMergeValues(Ops, dl);

7861

}

7862

7863

SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

7864

if (Op.getOperand(1).getValueType().isVector())

7865

return LowerVectorStore(Op, DAG);

7866

7867

assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__))

7868

"Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__));

7869

7870

// First, zero extend to 32 bits, then use a truncating store to 8 bits.

7871

7872

SDLoc dl(Op);

7873

StoreSDNode *ST = cast<StoreSDNode>(Op);

7874

7875

SDValue Chain = ST->getChain();

7876

SDValue BasePtr = ST->getBasePtr();

7877

SDValue Value = ST->getValue();

7878

MachineMemOperand *MMO = ST->getMemOperand();

7879

7880

Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),

7881

Value);

7882

return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);

7883

}

7884

7885

// FIXME: Remove this once the ANDI glue bug is fixed:

7886

SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {

7887

assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__))

7888

"Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__));

7889

7890

SDLoc DL(Op);

7891

return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));

7892

}

7893

7894

SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,

7895

SelectionDAG &DAG) const {

7896

7897

// Implements a vector truncate that fits in a vector register as a shuffle.

7898

// We want to legalize vector truncates down to where the source fits in

7899

// a vector register (and target is therefore smaller than vector register

7900

// size). At that point legalization will try to custom lower the sub-legal

7901

// result and get here - where we can contain the truncate as a single target

7902

// operation.

7903

7904

// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:

7905

// <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>

7906

//

7907

// We will implement it for big-endian ordering as this (where x denotes

7908

// undefined):

7909

// < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to

7910

// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>

7911

//

7912

// The same operation in little-endian ordering will be:

7913

// <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to

7914

// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>

7915

7916

EVT TrgVT = Op.getValueType();

7917

assert(TrgVT.isVector() && "Vector type expected.")((TrgVT.isVector() && "Vector type expected.") ? static_cast
<void> (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7917, __PRETTY_FUNCTION__));

7918

unsigned TrgNumElts = TrgVT.getVectorNumElements();

7919

EVT EltVT = TrgVT.getVectorElementType();

7920

if (!isOperationCustom(Op.getOpcode(), TrgVT) ||

7921

TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||

7922

!isPowerOf2_32(EltVT.getSizeInBits()))

7923

return SDValue();

7924

7925

SDValue N1 = Op.getOperand(0);

7926

EVT SrcVT = N1.getValueType();

7927

unsigned SrcSize = SrcVT.getSizeInBits();

7928

if (SrcSize > 256 ||

7929

!isPowerOf2_32(SrcVT.getVectorNumElements()) ||

7930

!isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))

7931

return SDValue();

7932

if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)

7933

return SDValue();

7934

7935

unsigned WideNumElts = 128 / EltVT.getSizeInBits();

7936

EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

7937

7938

SDLoc DL(Op);

7939

SDValue Op1, Op2;

7940

if (SrcSize == 256) {

7941

EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());

7942

EVT SplitVT =

7943

N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());

7944

unsigned SplitNumElts = SplitVT.getVectorNumElements();

7945

Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,

7946

DAG.getConstant(0, DL, VecIdxTy));

7947

Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,

7948

DAG.getConstant(SplitNumElts, DL, VecIdxTy));

7949

}

7950

else {

7951

Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);

7952

Op2 = DAG.getUNDEF(WideVT);

7953

}

7954

7955

// First list the elements we want to keep.

7956

unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();

7957

SmallVector<int, 16> ShuffV;

7958

if (Subtarget.isLittleEndian())

7959

for (unsigned i = 0; i < TrgNumElts; ++i)

7960

ShuffV.push_back(i * SizeMult);

7961

else

7962

for (unsigned i = 1; i <= TrgNumElts; ++i)

7963

ShuffV.push_back(i * SizeMult - 1);

7964

7965

// Populate the remaining elements with undefs.

7966

for (unsigned i = TrgNumElts; i < WideNumElts; ++i)

7967

// ShuffV.push_back(i + WideNumElts);

7968

ShuffV.push_back(WideNumElts + 1);

7969

7970

Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);

7971

Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);

7972

return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);

7973

}

7974

7975

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

7976

/// possible.

7977

SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

7978

// Not FP, or using SPE? Not a fsel.

7979

if (!Op.getOperand(0).getValueType().isFloatingPoint() ||

7980

!Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())

7981

return Op;

7982

7983

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

7984

7985

EVT ResVT = Op.getValueType();

7986

EVT CmpVT = Op.getOperand(0).getValueType();

7987

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

7988

SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);

7989

SDLoc dl(Op);

7990

SDNodeFlags Flags = Op.getNode()->getFlags();

7991

7992

// We have xsmaxcdp/xsmincdp which are OK to emit even in the

7993

// presence of infinities.

7994

if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {

7995

switch (CC) {

7996

default:

7997

break;

7998

case ISD::SETOGT:

7999

case ISD::SETGT:

8000

return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);

8001

case ISD::SETOLT:

8002

case ISD::SETLT:

8003

return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);

8004

}

8005

}

8006

8007

// We might be able to do better than this under some circumstances, but in

8008

// general, fsel-based lowering of select is a finite-math-only optimization.

8009

// For more information, see section F.3 of the 2.06 ISA specification.

8010

// With ISA 3.0

8011

if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||

8012

(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))

8013

return Op;

8014

8015

// If the RHS of the comparison is a 0.0, we don't need to do the

8016

// subtraction at all.

8017

SDValue Sel1;

8018

if (isFloatingPointZero(RHS))

8019

switch (CC) {

8020

default: break; // SETUO etc aren't handled by fsel.

8021

case ISD::SETNE:

8022

std::swap(TV, FV);

8023

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8024

case ISD::SETEQ:

8025

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8026

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8027

Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);

8028

if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits

8029

Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);

8030

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8031

DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);

8032

case ISD::SETULT:

8033

case ISD::SETLT:

8034

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

8035

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8036

case ISD::SETOGE:

8037

case ISD::SETGE:

8038

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8039

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8040

return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);

8041

case ISD::SETUGT:

8042

case ISD::SETGT:

8043

std::swap(TV, FV); // fsel is natively setge, swap operands for setlt

8044

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8045

case ISD::SETOLE:

8046

case ISD::SETLE:

8047

if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits

8048

LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);

8049

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8050

DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);

8051

}

8052

8053

SDValue Cmp;

8054

switch (CC) {

8055

default: break; // SETUO etc aren't handled by fsel.

8056

case ISD::SETNE:

8057

std::swap(TV, FV);

8058

LLVM_FALLTHROUGH[[gnu::fallthrough]];

8059

case ISD::SETEQ:

8060

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8061

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8062

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8063

Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8064

if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits

8065

Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);

8066

return DAG.getNode(PPCISD::FSEL, dl, ResVT,

8067

DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);

8068

case ISD::SETULT:

8069

case ISD::SETLT:

8070

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8071

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8072

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8073

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);

8074

case ISD::SETOGE:

8075

case ISD::SETGE:

8076

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);

8077

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8078

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8079

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8080

case ISD::SETUGT:

8081

case ISD::SETGT:

8082

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);

8083

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8084

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8085

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);

8086

case ISD::SETOLE:

8087

case ISD::SETLE:

8088

Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);

8089

if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits

8090

Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);

8091

return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);

8092

}

8093

return Op;

8094

}

8095

8096

static unsigned getPPCStrictOpcode(unsigned Opc) {

8097

switch (Opc) {

8098

default:

8099

llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8099);

8100

case PPCISD::FCTIDZ:

8101

return PPCISD::STRICT_FCTIDZ;

8102

case PPCISD::FCTIWZ:

8103

return PPCISD::STRICT_FCTIWZ;

8104

case PPCISD::FCTIDUZ:

8105

return PPCISD::STRICT_FCTIDUZ;

8106

case PPCISD::FCTIWUZ:

8107

return PPCISD::STRICT_FCTIWUZ;

8108

case PPCISD::FCFID:

8109

return PPCISD::STRICT_FCFID;

8110

case PPCISD::FCFIDU:

8111

return PPCISD::STRICT_FCFIDU;

8112

case PPCISD::FCFIDS:

8113

return PPCISD::STRICT_FCFIDS;

8114

case PPCISD::FCFIDUS:

8115

return PPCISD::STRICT_FCFIDUS;

8116

}

8117

}

8118

8119

static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,

8120

const PPCSubtarget &Subtarget) {

8121

SDLoc dl(Op);

8122

bool IsStrict = Op->isStrictFPOpcode();

8123

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8124

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8125

8126

// TODO: Any other flags to propagate?

8127

SDNodeFlags Flags;

8128

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8129

8130

// For strict nodes, source is the second operand.

8131

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8132

SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

8133

assert(Src.getValueType().isFloatingPoint())((Src.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8133, __PRETTY_FUNCTION__));

8134

if (Src.getValueType() == MVT::f32) {

8135

if (IsStrict) {

8136

Src =

8137

DAG.getNode(ISD::STRICT_FP_EXTEND, dl,

8138

DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);

8139

Chain = Src.getValue(1);

8140

} else

8141

Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);

8142

}

8143

SDValue Conv;

8144

unsigned Opc = ISD::DELETED_NODE;

8145

switch (Op.getSimpleValueType().SimpleTy) {

8146

default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8146);

8147

case MVT::i32:

8148

Opc = IsSigned ? PPCISD::FCTIWZ

8149

: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);

8150

break;

8151

case MVT::i64:

8152

assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__))

8153

"i64 FP_TO_UINT is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__));

8154

Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;

8155

}

8156

if (IsStrict) {

8157

Opc = getPPCStrictOpcode(Opc);

8158

Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),

8159

{Chain, Src}, Flags);

8160

} else {

8161

Conv = DAG.getNode(Opc, dl, MVT::f64, Src);

8162

}

8163

return Conv;

8164

}

8165

8166

void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,

8167

SelectionDAG &DAG,

8168

const SDLoc &dl) const {

8169

SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);

8170

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8171

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8172

bool IsStrict = Op->isStrictFPOpcode();

8173

8174

// Convert the FP value to an int value through memory.

8175

bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&

8176

(IsSigned || Subtarget.hasFPCVT());

8177

SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);

8178

int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();

8179

MachinePointerInfo MPI =

8180

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

8181

8182

// Emit a store to the stack slot.

8183

SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();

8184

Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));

8185

if (i32Stack) {

8186

MachineFunction &MF = DAG.getMachineFunction();

8187

Alignment = Align(4);

8188

MachineMemOperand *MMO =

8189

MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);

8190

SDValue Ops[] = { Chain, Tmp, FIPtr };

8191

Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,

8192

DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);

8193

} else

8194

Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);

8195

8196

// Result is a load from the stack slot. If loading 4 bytes, make sure to

8197

// add in a bias on big endian.

8198

if (Op.getValueType() == MVT::i32 && !i32Stack) {

8199

FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,

8200

DAG.getConstant(4, dl, FIPtr.getValueType()));

8201

MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);

8202

}

8203

8204

RLI.Chain = Chain;

8205

RLI.Ptr = FIPtr;

8206

RLI.MPI = MPI;

8207

RLI.Alignment = Alignment;

8208

}

8209

8210

/// Custom lowers floating point to integer conversions to use

8211

/// the direct move instructions available in ISA 2.07 to avoid the

8212

/// need for load/store combinations.

8213

SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,

8214

SelectionDAG &DAG,

8215

const SDLoc &dl) const {

8216

SDValue Conv = convertFPToInt(Op, DAG, Subtarget);

8217

SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);

8218

if (Op->isStrictFPOpcode())

8219

return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);

8220

else

8221

return Mov;

8222

}

8223

8224

SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

8225

const SDLoc &dl) const {

8226

bool IsStrict = Op->isStrictFPOpcode();

8227

bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||

8228

Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

8229

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8230

EVT SrcVT = Src.getValueType();

8231

EVT DstVT = Op.getValueType();

8232

8233

// FP to INT conversions are legal for f128.

8234

if (SrcVT == MVT::f128)

8235

return Op;

8236

8237

// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on

8238

// PPC (the libcall is not available).

8239

if (SrcVT == MVT::ppcf128) {

8240

if (DstVT == MVT::i32) {

8241

// TODO: Conservatively pass only nofpexcept flag here. Need to check and

8242

// set other fast-math flags to FP operations in both strict and

8243

// non-strict cases. (FP_TO_SINT, FSUB)

8244

SDNodeFlags Flags;

8245

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8246

8247

if (IsSigned) {

8248

SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,

8249

DAG.getIntPtrConstant(0, dl));

8250

SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,

8251

DAG.getIntPtrConstant(1, dl));

8252

8253

// Add the two halves of the long double in round-to-zero mode, and use

8254

// a smaller FP_TO_SINT.

8255

if (IsStrict) {

8256

SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,

8257

DAG.getVTList(MVT::f64, MVT::Other),

8258

{Op.getOperand(0), Lo, Hi}, Flags);

8259

return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,

8260

DAG.getVTList(MVT::i32, MVT::Other),

8261

{Res.getValue(1), Res}, Flags);

8262

} else {

8263

SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);

8264

return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);

8265

}

8266

} else {

8267

const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};

8268

APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));

8269

SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);

8270

SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);

8271

if (IsStrict) {

8272

// Sel = Src < 0x80000000

8273

// FltOfs = select Sel, 0.0, 0x80000000

8274

// IntOfs = select Sel, 0, 0x80000000

8275

// Result = fp_to_sint(Src - FltOfs) ^ IntOfs

8276

SDValue Chain = Op.getOperand(0);

8277

EVT SetCCVT =

8278

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

8279

EVT DstSetCCVT =

8280

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);

8281

SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,

8282

Chain, true);

8283

Chain = Sel.getValue(1);

8284

8285

SDValue FltOfs = DAG.getSelect(

8286

dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);

8287

Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);

8288

8289

SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,

8290

DAG.getVTList(SrcVT, MVT::Other),

8291

{Chain, Src, FltOfs}, Flags);

8292

Chain = Val.getValue(1);

8293

SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,

8294

DAG.getVTList(DstVT, MVT::Other),

8295

{Chain, Val}, Flags);

8296

Chain = SInt.getValue(1);

8297

SDValue IntOfs = DAG.getSelect(

8298

dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);

8299

SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);

8300

return DAG.getMergeValues({Result, Chain}, dl);

8301

} else {

8302

// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X

8303

// FIXME: generated code sucks.

8304

SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);

8305

True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);

8306

True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);

8307

SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);

8308

return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);

8309

}

8310

}

8311

}

8312

8313

return SDValue();

8314

}

8315

8316

if (Subtarget.hasDirectMove() && Subtarget.isPPC64())

8317

return LowerFP_TO_INTDirectMove(Op, DAG, dl);

8318

8319

ReuseLoadInfo RLI;

8320

LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);

8321

8322

return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,

8323

RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);

8324

}

8325

8326

// We're trying to insert a regular store, S, and then a load, L. If the

8327

// incoming value, O, is a load, we might just be able to have our load use the

8328

// address used by O. However, we don't know if anything else will store to

8329

// that address before we can load from it. To prevent this situation, we need

8330

// to insert our load, L, into the chain as a peer of O. To do this, we give L

8331

// the same chain operand as O, we create a token factor from the chain results

8332

// of O and L, and we replace all uses of O's chain result with that token

8333

// factor (see spliceIntoChain below for this last part).

8334

bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,

8335

ReuseLoadInfo &RLI,

8336

SelectionDAG &DAG,

8337

ISD::LoadExtType ET) const {

8338

// Conservatively skip reusing for constrained FP nodes.

8339

if (Op->isStrictFPOpcode())

8340

return false;

8341

8342

SDLoc dl(Op);

8343

bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&

8344

(Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);

8345

if (ET == ISD::NON_EXTLOAD &&

8346

(ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&

8347

isOperationLegalOrCustom(Op.getOpcode(),

8348

Op.getOperand(0).getValueType())) {

8349

8350

LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);

8351

return true;

8352

}

8353

8354

LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);

8355

if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||

8356

LD->isNonTemporal())

8357

return false;

8358

if (LD->getMemoryVT() != MemVT)

8359

return false;

8360

8361

RLI.Ptr = LD->getBasePtr();

8362

if (LD->isIndexed() && !LD->getOffset().isUndef()) {

8363

assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__))

8364

"Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__));

8365

RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,

8366

LD->getOffset());

8367

}

8368

8369

RLI.Chain = LD->getChain();

8370

RLI.MPI = LD->getPointerInfo();

8371

RLI.IsDereferenceable = LD->isDereferenceable();

8372

RLI.IsInvariant = LD->isInvariant();

8373

RLI.Alignment = LD->getAlign();

8374

RLI.AAInfo = LD->getAAInfo();

8375

RLI.Ranges = LD->getRanges();

8376

8377

RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);

8378

return true;

8379

}

8380

8381

// Given the head of the old chain, ResChain, insert a token factor containing

8382

// it and NewResChain, and make users of ResChain now be users of that token

8383

// factor.

8384

// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.

8385

void PPCTargetLowering::spliceIntoChain(SDValue ResChain,

8386

SDValue NewResChain,

8387

SelectionDAG &DAG) const {

8388

if (!ResChain)

8389

return;

8390

8391

SDLoc dl(NewResChain);

8392

8393

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

8394

NewResChain, DAG.getUNDEF(MVT::Other));

8395

assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__))

8396

"A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__));

8397

8398

DAG.ReplaceAllUsesOfValueWith(ResChain, TF);

8399

DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);

8400

}

8401

8402

/// Analyze profitability of direct move

8403

/// prefer float load to int load plus direct move

8404

/// when there is no integer use of int load

8405

bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {

8406

SDNode *Origin = Op.getOperand(0).getNode();

8407

if (Origin->getOpcode() != ISD::LOAD)

8408

return true;

8409

8410

// If there is no LXSIBZX/LXSIHZX, like Power8,

8411

// prefer direct move if the memory size is 1 or 2 bytes.

8412

MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();

8413

if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)

8414

return true;

8415

8416

for (SDNode::use_iterator UI = Origin->use_begin(),

8417

UE = Origin->use_end();

8418

UI != UE; ++UI) {

8419

8420

// Only look at the users of the loaded value.

8421

if (UI.getUse().get().getResNo() != 0)

8422

continue;

8423

8424

if (UI->getOpcode() != ISD::SINT_TO_FP &&

8425

UI->getOpcode() != ISD::UINT_TO_FP &&

8426

UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&

8427

UI->getOpcode() != ISD::STRICT_UINT_TO_FP)

8428

return true;

8429

}

8430

8431

return false;

8432

}

8433

8434

static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,

8435

const PPCSubtarget &Subtarget,

8436

SDValue Chain = SDValue()) {

8437

bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||

8438

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8439

SDLoc dl(Op);

8440

8441

// TODO: Any other flags to propagate?

8442

SDNodeFlags Flags;

8443

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8444

8445

// If we have FCFIDS, then use it when converting to single-precision.

8446

// Otherwise, convert to double-precision and then round.

8447

bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();

8448

unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)

8449

: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);

8450

EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;

8451

if (Op->isStrictFPOpcode()) {

8452

if (!Chain)

8453

Chain = Op.getOperand(0);

8454

return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,

8455

DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);

8456

} else

8457

return DAG.getNode(ConvOpc, dl, ConvTy, Src);

8458

}

8459

8460

/// Custom lowers integer to floating point conversions to use

8461

/// the direct move instructions available in ISA 2.07 to avoid the

8462

/// need for load/store combinations.

8463

SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,

8464

SelectionDAG &DAG,

8465

const SDLoc &dl) const {

8466

assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))

8467

Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))

8468

"Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__));

8469

assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__))

8470

"Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__));

8471

SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);

8472

bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;

8473

bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||

8474

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8475

unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;

8476

SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);

8477

return convertIntToFP(Op, Mov, DAG, Subtarget);

8478

}

8479

8480

static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {

8481

8482

EVT VecVT = Vec.getValueType();

8483

assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8483, __PRETTY_FUNCTION__));

8484

assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8484, __PRETTY_FUNCTION__));

8485

8486

EVT EltVT = VecVT.getVectorElementType();

8487

unsigned WideNumElts = 128 / EltVT.getSizeInBits();

8488

EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

8489

8490

unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();

8491

SmallVector<SDValue, 16> Ops(NumConcat);

8492

Ops[0] = Vec;

8493

SDValue UndefVec = DAG.getUNDEF(VecVT);

8494

for (unsigned i = 1; i < NumConcat; ++i)

8495

Ops[i] = UndefVec;

8496

8497

return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);

8498

}

8499

8500

SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,

8501

const SDLoc &dl) const {

8502

bool IsStrict = Op->isStrictFPOpcode();

8503

unsigned Opc = Op.getOpcode();

8504

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8505

assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))

8506

Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))

8507

"Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__));

8508

assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__))

8509

"Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__));

8510

8511

// TODO: Any other flags to propagate?

8512

SDNodeFlags Flags;

8513

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8514

8515

bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;

8516

bool FourEltRes = Op.getValueType() == MVT::v4f32;

8517

8518

SDValue Wide = widenVec(DAG, Src, dl);

8519

EVT WideVT = Wide.getValueType();

8520

unsigned WideNumElts = WideVT.getVectorNumElements();

8521

MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;

8522

8523

SmallVector<int, 16> ShuffV;

8524

for (unsigned i = 0; i < WideNumElts; ++i)

8525

ShuffV.push_back(i + WideNumElts);

8526

8527

int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;

8528

int SaveElts = FourEltRes ? 4 : 2;

8529

if (Subtarget.isLittleEndian())

8530

for (int i = 0; i < SaveElts; i++)

8531

ShuffV[i * Stride] = i;

8532

else

8533

for (int i = 1; i <= SaveElts; i++)

8534

ShuffV[i * Stride - 1] = i - 1;

8535

8536

SDValue ShuffleSrc2 =

8537

SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);

8538

SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);

8539

8540

SDValue Extend;

8541

if (SignedConv) {

8542

Arrange = DAG.getBitcast(IntermediateVT, Arrange);

8543

EVT ExtVT = Src.getValueType();

8544

if (Subtarget.hasP9Altivec())

8545

ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),

8546

IntermediateVT.getVectorNumElements());

8547

8548

Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,

8549

DAG.getValueType(ExtVT));

8550

} else

8551

Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);

8552

8553

if (IsStrict)

8554

return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),

8555

{Op.getOperand(0), Extend}, Flags);

8556

8557

return DAG.getNode(Opc, dl, Op.getValueType(), Extend);

8558

}

8559

8560

SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

8561

SelectionDAG &DAG) const {

8562

SDLoc dl(Op);

8563

bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||

8564

Op.getOpcode() == ISD::STRICT_SINT_TO_FP;

8565

bool IsStrict = Op->isStrictFPOpcode();

8566

SDValue Src = Op.getOperand(IsStrict ? 1 : 0);

8567

SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

8568

8569

// TODO: Any other flags to propagate?

8570

SDNodeFlags Flags;

8571

Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

8572

8573

EVT InVT = Src.getValueType();

8574

EVT OutVT = Op.getValueType();

8575

if (OutVT.isVector() && OutVT.isFloatingPoint() &&

8576

isOperationCustom(Op.getOpcode(), InVT))

8577

return LowerINT_TO_FPVector(Op, DAG, dl);

8578

8579

// Conversions to f128 are legal.

8580

if (Op.getValueType() == MVT::f128)

8581

return Op;

8582

8583

// Don't handle ppc_fp128 here; let it be lowered to a libcall.

8584

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

8585

return SDValue();

8586

8587

if (Src.getValueType() == MVT::i1)

8588

return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,

8589

DAG.getConstantFP(1.0, dl, Op.getValueType()),

8590

DAG.getConstantFP(0.0, dl, Op.getValueType()));

8591

8592

// If we have direct moves, we can do all the conversion, skip the store/load

8593

// however, without FPCVT we can't do most conversions.

8594

if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&

8595

Subtarget.isPPC64() && Subtarget.hasFPCVT())

8596

return LowerINT_TO_FPDirectMove(Op, DAG, dl);

8597

8598

assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__))

8599

"UINT_TO_FP is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__));

8600

8601

if (Src.getValueType() == MVT::i64) {

8602

SDValue SINT = Src;

8603

// When converting to single-precision, we actually need to convert

8604

// to double-precision first and then round to single-precision.

8605

// To avoid double-rounding effects during that operation, we have

8606

// to prepare the input operand. Bits that might be truncated when

8607

// converting to double-precision are replaced by a bit that won't

8608

// be lost at this stage, but is below the single-precision rounding

8609

// position.

8610

//

8611

// However, if -enable-unsafe-fp-math is in effect, accept double

8612

// rounding to avoid the extra overhead.

8613

if (Op.getValueType() == MVT::f32 &&

8614

!Subtarget.hasFPCVT() &&

8615

!DAG.getTarget().Options.UnsafeFPMath) {

8616

8617

// Twiddle input to make sure the low 11 bits are zero. (If this

8618

// is the case, we are guaranteed the value will fit into the 53 bit

8619

// mantissa of an IEEE double-precision value without rounding.)

8620

// If any of those low 11 bits were not zero originally, make sure

8621

// bit 12 (value 2048) is set instead, so that the final rounding

8622

// to single-precision gets the correct result.

8623

SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,

8624

SINT, DAG.getConstant(2047, dl, MVT::i64));

8625

Round = DAG.getNode(ISD::ADD, dl, MVT::i64,

8626

Round, DAG.getConstant(2047, dl, MVT::i64));

8627

Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);

8628

Round = DAG.getNode(ISD::AND, dl, MVT::i64,

8629

Round, DAG.getConstant(-2048, dl, MVT::i64));

8630

8631

// However, we cannot use that value unconditionally: if the magnitude

8632

// of the input value is small, the bit-twiddling we did above might

8633

// end up visibly changing the output. Fortunately, in that case, we

8634

// don't need to twiddle bits since the original input will convert

8635

// exactly to double-precision floating-point already. Therefore,

8636

// construct a conditional to use the original value if the top 11

8637

// bits are all sign-bit copies, and use the rounded value computed

8638

// above otherwise.

8639

SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,

8640

SINT, DAG.getConstant(53, dl, MVT::i32));

8641

Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,

8642

Cond, DAG.getConstant(1, dl, MVT::i64));

8643

Cond = DAG.getSetCC(

8644

dl,

8645

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),

8646

Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);

8647

8648

SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);

8649

}

8650

8651

ReuseLoadInfo RLI;

8652

SDValue Bits;

8653

8654

MachineFunction &MF = DAG.getMachineFunction();

8655

if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {

8656

Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,

8657

RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);

8658

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8659

} else if (Subtarget.hasLFIWAX() &&

8660

canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {

8661

MachineMemOperand *MMO =

8662

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8663

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8664

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8665

Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,

8666

DAG.getVTList(MVT::f64, MVT::Other),

8667

Ops, MVT::i32, MMO);

8668

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8669

} else if (Subtarget.hasFPCVT() &&

8670

canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {

8671

MachineMemOperand *MMO =

8672

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8673

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8674

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8675

Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,

8676

DAG.getVTList(MVT::f64, MVT::Other),

8677

Ops, MVT::i32, MMO);

8678

spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);

8679

} else if (((Subtarget.hasLFIWAX() &&

8680

SINT.getOpcode() == ISD::SIGN_EXTEND) ||

8681

(Subtarget.hasFPCVT() &&

8682

SINT.getOpcode() == ISD::ZERO_EXTEND)) &&

8683

SINT.getOperand(0).getValueType() == MVT::i32) {

8684

MachineFrameInfo &MFI = MF.getFrameInfo();

8685

EVT PtrVT = getPointerTy(DAG.getDataLayout());

8686

8687

int FrameIdx = MFI.CreateStackObject(4, Align(4), false);

8688

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8689

8690

SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,

8691

MachinePointerInfo::getFixedStack(

8692

DAG.getMachineFunction(), FrameIdx));

8693

Chain = Store;

8694

8695

assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__))

8696

"Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__));

8697

8698

RLI.Ptr = FIdx;

8699

RLI.Chain = Chain;

8700

RLI.MPI =

8701

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);

8702

RLI.Alignment = Align(4);

8703

8704

MachineMemOperand *MMO =

8705

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8706

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8707

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8708

Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?

8709

PPCISD::LFIWZX : PPCISD::LFIWAX,

8710

dl, DAG.getVTList(MVT::f64, MVT::Other),

8711

Ops, MVT::i32, MMO);

8712

Chain = Bits.getValue(1);

8713

} else

8714

Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);

8715

8716

SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);

8717

if (IsStrict)

8718

Chain = FP.getValue(1);

8719

8720

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

8721

if (IsStrict)

8722

FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,

8723

DAG.getVTList(MVT::f32, MVT::Other),

8724

{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);

8725

else

8726

FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,

8727

DAG.getIntPtrConstant(0, dl));

8728

}

8729

return FP;

8730

}

8731

8732

assert(Src.getValueType() == MVT::i32 &&((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__))

8733

"Unhandled INT_TO_FP type in custom expander!")((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__));

8734

// Since we only generate this in 64-bit mode, we can take advantage of

8735

// 64-bit registers. In particular, sign extend the input value into the

8736

// 64-bit register with extsw, store the WHOLE 64-bit value into the stack

8737

// then lfd it and fcfid it.

8738

MachineFunction &MF = DAG.getMachineFunction();

8739

MachineFrameInfo &MFI = MF.getFrameInfo();

8740

EVT PtrVT = getPointerTy(MF.getDataLayout());

8741

8742

SDValue Ld;

8743

if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {

8744

ReuseLoadInfo RLI;

8745

bool ReusingLoad;

8746

if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {

8747

int FrameIdx = MFI.CreateStackObject(4, Align(4), false);

8748

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8749

8750

SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,

8751

MachinePointerInfo::getFixedStack(

8752

DAG.getMachineFunction(), FrameIdx));

8753

Chain = Store;

8754

8755

assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__))

8756

"Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__));

8757

8758

RLI.Ptr = FIdx;

8759

RLI.Chain = Chain;

8760

RLI.MPI =

8761

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);

8762

RLI.Alignment = Align(4);

8763

}

8764

8765

MachineMemOperand *MMO =

8766

MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,

8767

RLI.Alignment, RLI.AAInfo, RLI.Ranges);

8768

SDValue Ops[] = { RLI.Chain, RLI.Ptr };

8769

Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,

8770

DAG.getVTList(MVT::f64, MVT::Other), Ops,

8771

MVT::i32, MMO);

8772

Chain = Ld.getValue(1);

8773

if (ReusingLoad)

8774

spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);

8775

} else {

8776

assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__))

8777

"i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__));

8778

8779

int FrameIdx = MFI.CreateStackObject(8, Align(8), false);

8780

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

8781

8782

SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);

8783

8784

// STD the extended value into the stack slot.

8785

SDValue Store = DAG.getStore(

8786

Chain, dl, Ext64, FIdx,

8787

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));

8788

Chain = Store;

8789

8790

// Load the value as a double.

8791

Ld = DAG.getLoad(

8792

MVT::f64, dl, Chain, FIdx,

8793

MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));

8794

Chain = Ld.getValue(1);

8795

}

8796

8797

// FCFID it and return it.

8798

SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);

8799

if (IsStrict)

8800

Chain = FP.getValue(1);

8801

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

8802

if (IsStrict)

8803

FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,

8804

DAG.getVTList(MVT::f32, MVT::Other),

8805

{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);

8806

else

8807

FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,

8808

DAG.getIntPtrConstant(0, dl));

8809

}

8810

return FP;

8811

}

8812

8813

SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,

8814

SelectionDAG &DAG) const {

8815

SDLoc dl(Op);

8816

/*

8817

The rounding mode is in bits 30:31 of FPSR, and has the following

8818

settings:

8819

00 Round to nearest

8820

01 Round to 0

8821

10 Round to +inf

8822

11 Round to -inf

8823

8824

FLT_ROUNDS, on the other hand, expects the following:

8825

-1 Undefined

8826

0 Round to 0

8827

1 Round to nearest

8828

2 Round to +inf

8829

3 Round to -inf

8830

8831

To perform the conversion, we do:

8832

((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))

8833

*/

8834

8835

MachineFunction &MF = DAG.getMachineFunction();

8836

EVT VT = Op.getValueType();

8837

EVT PtrVT = getPointerTy(MF.getDataLayout());

8838

8839

// Save FP Control Word to register

8840

SDValue Chain = Op.getOperand(0);

8841

SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);

8842

Chain = MFFS.getValue(1);

8843

8844

// Save FP register to stack slot

8845

int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);

8846

SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

8847

Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());

8848

8849

// Load FP Control Word from low 32 bits of stack slot.

8850

SDValue Four = DAG.getConstant(4, dl, PtrVT);

8851

SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);

8852

SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());

8853

Chain = CWD.getValue(1);

8854

8855

// Transform as necessary

8856

SDValue CWD1 =

8857

DAG.getNode(ISD::AND, dl, MVT::i32,

8858

CWD, DAG.getConstant(3, dl, MVT::i32));

8859

SDValue CWD2 =

8860

DAG.getNode(ISD::SRL, dl, MVT::i32,

8861

DAG.getNode(ISD::AND, dl, MVT::i32,

8862

DAG.getNode(ISD::XOR, dl, MVT::i32,

8863

CWD, DAG.getConstant(3, dl, MVT::i32)),

8864

DAG.getConstant(3, dl, MVT::i32)),

8865

DAG.getConstant(1, dl, MVT::i32));

8866

8867

SDValue RetVal =

8868

DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);

8869

8870

RetVal =

8871

DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),

8872

dl, VT, RetVal);

8873

8874

return DAG.getMergeValues({RetVal, Chain}, dl);

8875

}

8876

8877

SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {

8878

EVT VT = Op.getValueType();

8879

unsigned BitWidth = VT.getSizeInBits();

8880

SDLoc dl(Op);

8881

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))

8882

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))

8883

"Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__));

8884

8885

// Expand into a bunch of logical ops. Note that these ops

8886

// depend on the PPC behavior for oversized shift amounts.

8887

SDValue Lo = Op.getOperand(0);

8888

SDValue Hi = Op.getOperand(1);

8889

SDValue Amt = Op.getOperand(2);

8890

EVT AmtVT = Amt.getValueType();

8891

8892

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8893

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8894

SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);

8895

SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);

8896

SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);

8897

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8898

DAG.getConstant(-BitWidth, dl, AmtVT));

8899

SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);

8900

SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

8901

SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);

8902

SDValue OutOps[] = { OutLo, OutHi };

8903

return DAG.getMergeValues(OutOps, dl);

8904

}

8905

8906

SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {

8907

EVT VT = Op.getValueType();

8908

SDLoc dl(Op);

8909

unsigned BitWidth = VT.getSizeInBits();

8910

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))

8911

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))

8912

"Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__));

8913

8914

// Expand into a bunch of logical ops. Note that these ops

8915

// depend on the PPC behavior for oversized shift amounts.

8916

SDValue Lo = Op.getOperand(0);

8917

SDValue Hi = Op.getOperand(1);

8918

SDValue Amt = Op.getOperand(2);

8919

EVT AmtVT = Amt.getValueType();

8920

8921

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8922

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8923

SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);

8924

SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);

8925

SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

8926

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8927

DAG.getConstant(-BitWidth, dl, AmtVT));

8928

SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);

8929

SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

8930

SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);

8931

SDValue OutOps[] = { OutLo, OutHi };

8932

return DAG.getMergeValues(OutOps, dl);

8933

}

8934

8935

SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {

8936

SDLoc dl(Op);

8937

EVT VT = Op.getValueType();

8938

unsigned BitWidth = VT.getSizeInBits();

8939

assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))

8940

VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))

8941

"Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__));

8942

8943

// Expand into a bunch of logical ops, followed by a select_cc.

8944

SDValue Lo = Op.getOperand(0);

8945

SDValue Hi = Op.getOperand(1);

8946

SDValue Amt = Op.getOperand(2);

8947

EVT AmtVT = Amt.getValueType();

8948

8949

SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,

8950

DAG.getConstant(BitWidth, dl, AmtVT), Amt);

8951

SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);

8952

SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);

8953

SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

8954

SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,

8955

DAG.getConstant(-BitWidth, dl, AmtVT));

8956

SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);

8957

SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);

8958

SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),

8959

Tmp4, Tmp6, ISD::SETLE);

8960

SDValue OutOps[] = { OutLo, OutHi };

8961

return DAG.getMergeValues(OutOps, dl);

8962

}

8963

8964

SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,

8965

SelectionDAG &DAG) const {

8966

SDLoc dl(Op);

8967

EVT VT = Op.getValueType();

8968

unsigned BitWidth = VT.getSizeInBits();

8969

8970

bool IsFSHL = Op.getOpcode() == ISD::FSHL;

8971

SDValue X = Op.getOperand(0);

8972

SDValue Y = Op.getOperand(1);

8973

SDValue Z = Op.getOperand(2);

8974

EVT AmtVT = Z.getValueType();

8975

8976

// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))

8977

// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))

8978

// This is simpler than TargetLowering::expandFunnelShift because we can rely

8979

// on PowerPC shift by BW being well defined.

8980

Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,

8981

DAG.getConstant(BitWidth - 1, dl, AmtVT));

8982

SDValue SubZ =

8983

DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);

8984

X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);

8985

Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);

8986

return DAG.getNode(ISD::OR, dl, VT, X, Y);

8987

}

8988

8989

//===----------------------------------------------------------------------===//

8990

// Vector related lowering.

8991

//

8992

8993

/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an

8994

/// element size of SplatSize. Cast the result to VT.

8995

static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,

8996

SelectionDAG &DAG, const SDLoc &dl) {

8997

static const MVT VTys[] = { // canonical VT to use for each size.

8998

MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32

8999

};

9000

9001

EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];

9002

9003

// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.

9004

if (Val == ((1LU << (SplatSize * 8)) - 1)) {

9005

SplatSize = 1;

9006

Val = 0xFF;

9007

}

9008

9009

EVT CanonicalVT = VTys[SplatSize-1];

9010

9011

// Build a canonical splat for this value.

9012

return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));

9013

}

9014

9015

/// BuildIntrinsicOp - Return a unary operator intrinsic node with the

9016

/// specified intrinsic ID.

9017

static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,

9018

const SDLoc &dl, EVT DestVT = MVT::Other) {

9019

if (DestVT == MVT::Other) DestVT = Op.getValueType();

9020

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9021

DAG.getConstant(IID, dl, MVT::i32), Op);

9022

}

9023

9024

/// BuildIntrinsicOp - Return a binary operator intrinsic node with the

9025

/// specified intrinsic ID.

9026

static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,

9027

SelectionDAG &DAG, const SDLoc &dl,

9028

EVT DestVT = MVT::Other) {

9029

if (DestVT == MVT::Other) DestVT = LHS.getValueType();

9030

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9031

DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);

9032

}

9033

9034

/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the

9035

/// specified intrinsic ID.

9036

static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,

9037

SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,

9038

EVT DestVT = MVT::Other) {

9039

if (DestVT == MVT::Other) DestVT = Op0.getValueType();

9040

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,

9041

DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);

9042

}

9043

9044

/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified

9045

/// amount. The result has the specified value type.

9046

static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,

9047

SelectionDAG &DAG, const SDLoc &dl) {

9048

// Force LHS/RHS to be the right type.

9049

LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);

9050

RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);

9051

9052

int Ops[16];

9053

for (unsigned i = 0; i != 16; ++i)

9054

Ops[i] = i + Amt;

9055

SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);

9056

return DAG.getNode(ISD::BITCAST, dl, VT, T);

9057

}

9058

9059

/// Do we have an efficient pattern in a .td file for this node?

9060

///

9061

/// \param V - pointer to the BuildVectorSDNode being matched

9062

/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?

9063

///

9064

/// There are some patterns where it is beneficial to keep a BUILD_VECTOR

9065

/// node as a BUILD_VECTOR node rather than expanding it. The patterns where

9066

/// the opposite is true (expansion is beneficial) are:

9067

/// - The node builds a vector out of integers that are not 32 or 64-bits

9068

/// - The node builds a vector out of constants

9069

/// - The node is a "load-and-splat"

9070

/// In all other cases, we will choose to keep the BUILD_VECTOR.

9071

static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,

9072

bool HasDirectMove,

9073

bool HasP8Vector) {

9074

EVT VecVT = V->getValueType(0);

9075

bool RightType = VecVT == MVT::v2f64 ||

9076

(HasP8Vector && VecVT == MVT::v4f32) ||

9077

(HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));

9078

if (!RightType)

9079

return false;

9080

9081

bool IsSplat = true;

9082

bool IsLoad = false;

9083

SDValue Op0 = V->getOperand(0);

9084

9085

// This function is called in a block that confirms the node is not a constant

9086

// splat. So a constant BUILD_VECTOR here means the vector is built out of

9087

// different constants.

9088

if (V->isConstant())

9089

return false;

9090

for (int i = 0, e = V->getNumOperands(); i < e; ++i) {

9091

if (V->getOperand(i).isUndef())

9092

return false;

9093

// We want to expand nodes that represent load-and-splat even if the

9094

// loaded value is a floating point truncation or conversion to int.

9095

if (V->getOperand(i).getOpcode() == ISD::LOAD ||

9096

(V->getOperand(i).getOpcode() == ISD::FP_ROUND &&

9097

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||

9098

(V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&

9099

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||

9100

(V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&

9101

V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))

9102

IsLoad = true;

9103

// If the operands are different or the input is not a load and has more

9104

// uses than just this BV node, then it isn't a splat.

9105

if (V->getOperand(i) != Op0 ||

9106

(!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))

9107

IsSplat = false;

9108

}

9109

return !(IsSplat && IsLoad);

9110

}

9111

9112

// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.

9113

SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {

9114

9115

SDLoc dl(Op);

9116

SDValue Op0 = Op->getOperand(0);

9117

9118

if ((Op.getValueType() != MVT::f128) ||

9119

(Op0.getOpcode() != ISD::BUILD_PAIR) ||

9120

(Op0.getOperand(0).getValueType() != MVT::i64) ||

9121

(Op0.getOperand(1).getValueType() != MVT::i64))

9122

return SDValue();

9123

9124

return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),

9125

Op0.getOperand(1));

9126

}

9127

9128

static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {

9129

const SDValue *InputLoad = &Op;

9130

if (InputLoad->getOpcode() == ISD::BITCAST)

9131

InputLoad = &InputLoad->getOperand(0);

9132

if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||

9133

InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {

9134

IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;

9135

InputLoad = &InputLoad->getOperand(0);

9136

}

9137

if (InputLoad->getOpcode() != ISD::LOAD)

9138

return nullptr;

9139

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9140

return ISD::isNormalLoad(LD) ? InputLoad : nullptr;

9141

}

9142

9143

// Convert the argument APFloat to a single precision APFloat if there is no

9144

// loss in information during the conversion to single precision APFloat and the

9145

// resulting number is not a denormal number. Return true if successful.

9146

bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {

9147

APFloat APFloatToConvert = ArgAPFloat;

9148

bool LosesInfo = true;

9149

APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,

9150

&LosesInfo);

9151

bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());

9152

if (Success)

9153

ArgAPFloat = APFloatToConvert;

9154

return Success;

9155

}

9156

9157

// Bitcast the argument APInt to a double and convert it to a single precision

9158

// APFloat, bitcast the APFloat to an APInt and assign it to the original

9159

// argument if there is no loss in information during the conversion from

9160

// double to single precision APFloat and the resulting number is not a denormal

9161

// number. Return true if successful.

9162

bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {

9163

double DpValue = ArgAPInt.bitsToDouble();

9164

APFloat APFloatDp(DpValue);

9165

bool Success = convertToNonDenormSingle(APFloatDp);

9166

if (Success)

9167

ArgAPInt = APFloatDp.bitcastToAPInt();

9168

return Success;

9169

}

9170

9171

// If this is a case we can't handle, return null and let the default

9172

// expansion code take care of it. If we CAN select this case, and if it

9173

// selects to a single instruction, return Op. Otherwise, if we can codegen

9174

// this case more efficiently than a constant pool load, lower it to the

9175

// sequence of ops that should be used.

9176

SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

9177

SelectionDAG &DAG) const {

9178

SDLoc dl(Op);

9179

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());

9180

assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9180, __PRETTY_FUNCTION__));

9181

9182

// Check if this is a splat of a constant value.

9183

APInt APSplatBits, APSplatUndef;

9184

unsigned SplatBitSize;

9185

bool HasAnyUndefs;

9186

bool BVNIsConstantSplat =

9187

BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,

9188

HasAnyUndefs, 0, !Subtarget.isLittleEndian());

9189

9190

// If it is a splat of a double, check if we can shrink it to a 32 bit

9191

// non-denormal float which when converted back to double gives us the same

9192

// double. This is to exploit the XXSPLTIDP instruction.

9193

if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&

9194

(SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&

9195

convertToNonDenormSingle(APSplatBits)) {

9196

SDValue SplatNode = DAG.getNode(

9197

PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,

9198

DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));

9199

return DAG.getBitcast(Op.getValueType(), SplatNode);

9200

}

9201

9202

if (!BVNIsConstantSplat || SplatBitSize > 32) {

9203

9204

bool IsPermutedLoad = false;

9205

const SDValue *InputLoad =

9206

getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);

9207

// Handle load-and-splat patterns as we have instructions that will do this

9208

// in one go.

9209

if (InputLoad && DAG.isSplatValue(Op, true)) {

9210

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9211

9212

// We have handling for 4 and 8 byte elements.

9213

unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();

9214

9215

// Checking for a single use of this load, we have to check for vector

9216

// width (128 bits) / ElementSize uses (since each operand of the

9217

// BUILD_VECTOR is a separate use of the value.

9218

if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&

9219

((Subtarget.hasVSX() && ElementSize == 64) ||

9220

(Subtarget.hasP9Vector() && ElementSize == 32))) {

9221

SDValue Ops[] = {

9222

LD->getChain(), // Chain

9223

LD->getBasePtr(), // Ptr

9224

DAG.getValueType(Op.getValueType()) // VT

9225

};

9226

return

9227

DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,

9228

DAG.getVTList(Op.getValueType(), MVT::Other),

9229

Ops, LD->getMemoryVT(), LD->getMemOperand());

9230

}

9231

}

9232

9233

// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be

9234

// lowered to VSX instructions under certain conditions.

9235

// Without VSX, there is no pattern more efficient than expanding the node.

9236

if (Subtarget.hasVSX() &&

9237

haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),

9238

Subtarget.hasP8Vector()))

9239

return Op;

9240

return SDValue();

9241

}

9242

9243

uint64_t SplatBits = APSplatBits.getZExtValue();

9244

uint64_t SplatUndef = APSplatUndef.getZExtValue();

9245

unsigned SplatSize = SplatBitSize / 8;

9246

9247

// First, handle single instruction cases.

9248

9249

// All zeros?

9250

if (SplatBits == 0) {

9251

// Canonicalize all zero vectors to be v4i32.

9252

if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {

9253

SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);

9254

Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);

9255

}

9256

return Op;

9257

}

9258

9259

// We have XXSPLTIW for constant splats four bytes wide.

9260

// Given vector length is a multiple of 4, 2-byte splats can be replaced

9261

// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to

9262

// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be

9263

// turned into a 4-byte splat of 0xABABABAB.

9264

if (Subtarget.hasPrefixInstrs() && SplatSize == 2)

9265

return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,

9266

Op.getValueType(), DAG, dl);

9267

9268

if (Subtarget.hasPrefixInstrs() && SplatSize == 4)

9269

return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,

9270

dl);

9271

9272

// We have XXSPLTIB for constant splats one byte wide.

9273

if (Subtarget.hasP9Vector() && SplatSize == 1)

9274

return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,

9275

dl);

9276

9277

// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].

9278

int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>

9279

(32-SplatBitSize));

9280

if (SextVal >= -16 && SextVal <= 15)

9281

return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,

9282

dl);

9283

9284

// Two instruction sequences.

9285

9286

// If this value is in the range [-32,30] and is even, use:

9287

// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)

9288

// If this value is in the range [17,31] and is odd, use:

9289

// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)

9290

// If this value is in the range [-31,-17] and is odd, use:

9291

// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)

9292

// Note the last two are three-instruction sequences.

9293

if (SextVal >= -32 && SextVal <= 31) {

9294

// To avoid having these optimizations undone by constant folding,

9295

// we convert to a pseudo that will be expanded later into one of

9296

// the above forms.

9297

SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);

9298

EVT VT = (SplatSize == 1 ? MVT::v16i8 :

9299

(SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));

9300

SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);

9301

SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);

9302

if (VT == Op.getValueType())

9303

return RetVal;

9304

else

9305

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);

9306

}

9307

9308

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

9309

// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important

9310

// for fneg/fabs.

9311

if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {

9312

// Make -1 and vspltisw -1:

9313

SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);

9314

9315

// Make the VSLW intrinsic, computing 0x8000_0000.

9316

SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,

9317

OnesV, DAG, dl);

9318

9319

// xor by OnesV to invert it.

9320

Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);

9321

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9322

}

9323

9324

// Check to see if this is a wide variety of vsplti*, binop self cases.

9325

static const signed char SplatCsts[] = {

9326

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

9327

-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16

9328

};

9329

9330

for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {

9331

// Indirect through the SplatCsts array so that we favor 'vsplti -1' for

9332

// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'

9333

int i = SplatCsts[idx];

9334

9335

// Figure out what shift amount will be used by altivec if shifted by i in

9336

// this splat size.

9337

unsigned TypeShiftAmt = i & (SplatBitSize-1);

9338

9339

// vsplti + shl self.

9340

if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {

9341

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9342

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9343

Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,

9344

Intrinsic::ppc_altivec_vslw

9345

};

9346

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9347

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9348

}

9349

9350

// vsplti + srl self.

9351

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

9352

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9353

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9354

Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,

9355

Intrinsic::ppc_altivec_vsrw

9356

};

9357

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9358

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9359

}

9360

9361

// vsplti + sra self.

9362

if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {

9363

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9364

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9365

Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,

9366

Intrinsic::ppc_altivec_vsraw

9367

};

9368

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9369

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9370

}

9371

9372

// vsplti + rol self.

9373

if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |

9374

((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {

9375

SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);

9376

static const unsigned IIDs[] = { // Intrinsic to use for each size.

9377

Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,

9378

Intrinsic::ppc_altivec_vrlw

9379

};

9380

Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);

9381

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

9382

}

9383

9384

// t = vsplti c, result = vsldoi t, t, 1

9385

if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {

9386

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9387

unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;

9388

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9389

}

9390

// t = vsplti c, result = vsldoi t, t, 2

9391

if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {

9392

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9393

unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;

9394

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9395

}

9396

// t = vsplti c, result = vsldoi t, t, 3

9397

if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {

9398

SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);

9399

unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;

9400

return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);

9401

}

9402

}

9403

9404

return SDValue();

9405

}

9406

9407

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

9408

/// the specified operations to build the shuffle.

9409

static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,

9410

SDValue RHS, SelectionDAG &DAG,

9411

const SDLoc &dl) {

9412

unsigned OpNum = (PFEntry >> 26) & 0x0F;

9413

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

9414

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

9415

9416

enum {

9417

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

9418

OP_VMRGHW,

9419

OP_VMRGLW,

9420

OP_VSPLTISW0,

9421

OP_VSPLTISW1,

9422

OP_VSPLTISW2,

9423

OP_VSPLTISW3,

9424

OP_VSLDOI4,

9425

OP_VSLDOI8,

9426

OP_VSLDOI12

9427

};

9428

9429

if (OpNum == OP_COPY) {

9430

if (LHSID == (1*9+2)*9+3) return LHS;

9431

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9431, __PRETTY_FUNCTION__));

9432

return RHS;

9433

}

9434

9435

SDValue OpLHS, OpRHS;

9436

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);

9437

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);

9438

9439

int ShufIdxs[16];

9440

switch (OpNum) {

9441

default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9441);

9442

case OP_VMRGHW:

9443

ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;

9444

ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;

9445

ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;

9446

ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;

9447

break;

9448

case OP_VMRGLW:

9449

ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;

9450

ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;

9451

ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;

9452

ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;

9453

break;

9454

case OP_VSPLTISW0:

9455

for (unsigned i = 0; i != 16; ++i)

9456

ShufIdxs[i] = (i&3)+0;

9457

break;

9458

case OP_VSPLTISW1:

9459

for (unsigned i = 0; i != 16; ++i)

9460

ShufIdxs[i] = (i&3)+4;

9461

break;

9462

case OP_VSPLTISW2:

9463

for (unsigned i = 0; i != 16; ++i)

9464

ShufIdxs[i] = (i&3)+8;

9465

break;

9466

case OP_VSPLTISW3:

9467

for (unsigned i = 0; i != 16; ++i)

9468

ShufIdxs[i] = (i&3)+12;

9469

break;

9470

case OP_VSLDOI4:

9471

return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);

9472

case OP_VSLDOI8:

9473

return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);

9474

case OP_VSLDOI12:

9475

return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);

9476

}

9477

EVT VT = OpLHS.getValueType();

9478

OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);

9479

OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);

9480

SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);

9481

return DAG.getNode(ISD::BITCAST, dl, VT, T);

9482

}

9483

9484

/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled

9485

/// by the VINSERTB instruction introduced in ISA 3.0, else just return default

9486

/// SDValue.

9487

SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,

9488

SelectionDAG &DAG) const {

9489

const unsigned BytesInVector = 16;

9490

bool IsLE = Subtarget.isLittleEndian();

9491

SDLoc dl(N);

9492

SDValue V1 = N->getOperand(0);

9493

SDValue V2 = N->getOperand(1);

9494

unsigned ShiftElts = 0, InsertAtByte = 0;

9495

bool Swap = false;

9496

9497

// Shifts required to get the byte we want at element 7.

9498

unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,

9499

0, 15, 14, 13, 12, 11, 10, 9};

9500

unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,

9501

1, 2, 3, 4, 5, 6, 7, 8};

9502

9503

ArrayRef<int> Mask = N->getMask();

9504

int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

9505

9506

// For each mask element, find out if we're just inserting something

9507

// from V2 into V1 or vice versa.

9508

// Possible permutations inserting an element from V2 into V1:

9509

// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15

9510

// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15

9511

// ...

9512

// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X

9513

// Inserting from V1 into V2 will be similar, except mask range will be

9514

// [16,31].

9515

9516

bool FoundCandidate = false;

9517

// If both vector operands for the shuffle are the same vector, the mask

9518

// will contain only elements from the first one and the second one will be

9519

// undef.

9520

unsigned VINSERTBSrcElem = IsLE ? 8 : 7;

9521

// Go through the mask of half-words to find an element that's being moved

9522

// from one vector to the other.

9523

for (unsigned i = 0; i < BytesInVector; ++i) {

9524

unsigned CurrentElement = Mask[i];

9525

// If 2nd operand is undefined, we should only look for element 7 in the

9526

// Mask.

9527

if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)

9528

continue;

9529

9530

bool OtherElementsInOrder = true;

9531

// Examine the other elements in the Mask to see if they're in original

9532

// order.

9533

for (unsigned j = 0; j < BytesInVector; ++j) {

9534

if (j == i)

9535

continue;

9536

// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be

9537

// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,

9538

// in which we always assume we're always picking from the 1st operand.

9539

int MaskOffset =

9540

(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;

9541

if (Mask[j] != OriginalOrder[j] + MaskOffset) {

9542

OtherElementsInOrder = false;

9543

break;

9544

}

9545

}

9546

// If other elements are in original order, we record the number of shifts

9547

// we need to get the element we want into element 7. Also record which byte

9548

// in the vector we should insert into.

9549

if (OtherElementsInOrder) {

9550

// If 2nd operand is undefined, we assume no shifts and no swapping.

9551

if (V2.isUndef()) {

9552

ShiftElts = 0;

9553

Swap = false;

9554

} else {

9555

// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.

9556

ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]

9557

: BigEndianShifts[CurrentElement & 0xF];

9558

Swap = CurrentElement < BytesInVector;

9559

}

9560

InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;

9561

FoundCandidate = true;

9562

break;

9563

}

9564

}

9565

9566

if (!FoundCandidate)

9567

return SDValue();

9568

9569

// Candidate found, construct the proper SDAG sequence with VINSERTB,

9570

// optionally with VECSHL if shift is required.

9571

if (Swap)

9572

std::swap(V1, V2);

9573

if (V2.isUndef())

9574

V2 = V1;

9575

if (ShiftElts) {

9576

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,

9577

DAG.getConstant(ShiftElts, dl, MVT::i32));

9578

return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,

9579

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9580

}

9581

return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,

9582

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9583

}

9584

9585

/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled

9586

/// by the VINSERTH instruction introduced in ISA 3.0, else just return default

9587

/// SDValue.

9588

SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,

9589

SelectionDAG &DAG) const {

9590

const unsigned NumHalfWords = 8;

9591

const unsigned BytesInVector = NumHalfWords * 2;

9592

// Check that the shuffle is on half-words.

9593

if (!isNByteElemShuffleMask(N, 2, 1))

9594

return SDValue();

9595

9596

bool IsLE = Subtarget.isLittleEndian();

9597

SDLoc dl(N);

9598

SDValue V1 = N->getOperand(0);

9599

SDValue V2 = N->getOperand(1);

9600

unsigned ShiftElts = 0, InsertAtByte = 0;

9601

bool Swap = false;

9602

9603

// Shifts required to get the half-word we want at element 3.

9604

unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};

9605

unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};

9606

9607

uint32_t Mask = 0;

9608

uint32_t OriginalOrderLow = 0x1234567;

9609

uint32_t OriginalOrderHigh = 0x89ABCDEF;

9610

// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a

9611

// 32-bit space, only need 4-bit nibbles per element.

9612

for (unsigned i = 0; i < NumHalfWords; ++i) {

9613

unsigned MaskShift = (NumHalfWords - 1 - i) * 4;

9614

Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);

9615

}

9616

9617

// For each mask element, find out if we're just inserting something

9618

// from V2 into V1 or vice versa. Possible permutations inserting an element

9619

// from V2 into V1:

9620

// X, 1, 2, 3, 4, 5, 6, 7

9621

// 0, X, 2, 3, 4, 5, 6, 7

9622

// 0, 1, X, 3, 4, 5, 6, 7

9623

// 0, 1, 2, X, 4, 5, 6, 7

9624

// 0, 1, 2, 3, X, 5, 6, 7

9625

// 0, 1, 2, 3, 4, X, 6, 7

9626

// 0, 1, 2, 3, 4, 5, X, 7

9627

// 0, 1, 2, 3, 4, 5, 6, X

9628

// Inserting from V1 into V2 will be similar, except mask range will be [8,15].

9629

9630

bool FoundCandidate = false;

9631

// Go through the mask of half-words to find an element that's being moved

9632

// from one vector to the other.

9633

for (unsigned i = 0; i < NumHalfWords; ++i) {

9634

unsigned MaskShift = (NumHalfWords - 1 - i) * 4;

9635

uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;

9636

uint32_t MaskOtherElts = ~(0xF << MaskShift);

9637

uint32_t TargetOrder = 0x0;

9638

9639

// If both vector operands for the shuffle are the same vector, the mask

9640

// will contain only elements from the first one and the second one will be

9641

// undef.

9642

if (V2.isUndef()) {

9643

ShiftElts = 0;

9644

unsigned VINSERTHSrcElem = IsLE ? 4 : 3;

9645

TargetOrder = OriginalOrderLow;

9646

Swap = false;

9647

// Skip if not the correct element or mask of other elements don't equal

9648

// to our expected order.

9649

if (MaskOneElt == VINSERTHSrcElem &&

9650

(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {

9651

InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;

9652

FoundCandidate = true;

9653

break;

9654

}

9655

} else { // If both operands are defined.

9656

// Target order is [8,15] if the current mask is between [0,7].

9657

TargetOrder =

9658

(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;

9659

// Skip if mask of other elements don't equal our expected order.

9660

if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {

9661

// We only need the last 3 bits for the number of shifts.

9662

ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]

9663

: BigEndianShifts[MaskOneElt & 0x7];

9664

InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;

9665

Swap = MaskOneElt < NumHalfWords;

9666

FoundCandidate = true;

9667

break;

9668

}

9669

}

9670

}

9671

9672

if (!FoundCandidate)

9673

return SDValue();

9674

9675

// Candidate found, construct the proper SDAG sequence with VINSERTH,

9676

// optionally with VECSHL if shift is required.

9677

if (Swap)

9678

std::swap(V1, V2);

9679

if (V2.isUndef())

9680

V2 = V1;

9681

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);

9682

if (ShiftElts) {

9683

// Double ShiftElts because we're left shifting on v16i8 type.

9684

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,

9685

DAG.getConstant(2 * ShiftElts, dl, MVT::i32));

9686

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);

9687

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,

9688

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9689

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9690

}

9691

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);

9692

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,

9693

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9694

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9695

}

9696

9697

/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be

9698

/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise

9699

/// return the default SDValue.

9700

SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,

9701

SelectionDAG &DAG) const {

9702

// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles

9703

// to v16i8. Peek through the bitcasts to get the actual operands.

9704

SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));

9705

SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));

9706

9707

auto ShuffleMask = SVN->getMask();

9708

SDValue VecShuffle(SVN, 0);

9709

SDLoc DL(SVN);

9710

9711

// Check that we have a four byte shuffle.

9712

if (!isNByteElemShuffleMask(SVN, 4, 1))

9713

return SDValue();

9714

9715

// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.

9716

if (RHS->getOpcode() != ISD::BUILD_VECTOR) {

9717

std::swap(LHS, RHS);

9718

VecShuffle = DAG.getCommutedVectorShuffle(*SVN);

9719

ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();

9720

}

9721

9722

// Ensure that the RHS is a vector of constants.

9723

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());

9724

if (!BVN)

9725

return SDValue();

9726

9727

// Check if RHS is a splat of 4-bytes (or smaller).

9728

APInt APSplatValue, APSplatUndef;

9729

unsigned SplatBitSize;

9730

bool HasAnyUndefs;

9731

if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,

9732

HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||

9733

SplatBitSize > 32)

9734

return SDValue();

9735

9736

// Check that the shuffle mask matches the semantics of XXSPLTI32DX.

9737

// The instruction splats a constant C into two words of the source vector

9738

// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.

9739

// Thus we check that the shuffle mask is the equivalent of

9740

// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.

9741

// Note: the check above of isNByteElemShuffleMask() ensures that the bytes

9742

// within each word are consecutive, so we only need to check the first byte.

9743

SDValue Index;

9744

bool IsLE = Subtarget.isLittleEndian();

9745

if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&

9746

(ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&

9747

ShuffleMask[4] > 15 && ShuffleMask[12] > 15))

9748

Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);

9749

else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&

9750

(ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&

9751

ShuffleMask[0] > 15 && ShuffleMask[8] > 15))

9752

Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);

9753

else

9754

return SDValue();

9755

9756

// If the splat is narrower than 32-bits, we need to get the 32-bit value

9757

// for XXSPLTI32DX.

9758

unsigned SplatVal = APSplatValue.getZExtValue();

9759

for (; SplatBitSize < 32; SplatBitSize <<= 1)

9760

SplatVal |= (SplatVal << SplatBitSize);

9761

9762

SDValue SplatNode = DAG.getNode(

9763

PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),

9764

Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));

9765

return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);

9766

}

9767

9768

/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).

9769

/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is

9770

/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)

9771

/// i.e (or (shl x, C1), (srl x, 128-C1)).

9772

SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {

9773

assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")((Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9773, __PRETTY_FUNCTION__));

9774

assert(Op.getValueType() == MVT::v1i128 &&((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__))

9775

"Only set v1i128 as custom, other type shouldn't reach here!")((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__));

9776

SDLoc dl(Op);

9777

SDValue N0 = peekThroughBitcasts(Op.getOperand(0));

9778

SDValue N1 = peekThroughBitcasts(Op.getOperand(1));

9779

unsigned SHLAmt = N1.getConstantOperandVal(0);

9780

if (SHLAmt % 8 == 0) {

9781

SmallVector<int, 16> Mask(16, 0);

9782

std::iota(Mask.begin(), Mask.end(), 0);

9783

std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());

9784

if (SDValue Shuffle =

9785

DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),

9786

DAG.getUNDEF(MVT::v16i8), Mask))

9787

return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);

9788

}

9789

SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);

9790

SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,

9791

DAG.getConstant(SHLAmt, dl, MVT::i32));

9792

SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,

9793

DAG.getConstant(128 - SHLAmt, dl, MVT::i32));

9794

SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);

9795

return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);

9796

}

9797

9798

/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this

9799

/// is a shuffle we can handle in a single instruction, return it. Otherwise,

9800

/// return the code it can be lowered into. Worst case, it can always be

9801

/// lowered into a vperm.

9802

SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

9803

SelectionDAG &DAG) const {

9804

SDLoc dl(Op);

9805

SDValue V1 = Op.getOperand(0);

9806

SDValue V2 = Op.getOperand(1);

9807

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);

9808

9809

// Any nodes that were combined in the target-independent combiner prior

9810

// to vector legalization will not be sent to the target combine. Try to

9811

// combine it here.

9812

if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {

9813

if (!isa<ShuffleVectorSDNode>(NewShuffle))

9814

return NewShuffle;

9815

Op = NewShuffle;

9816

SVOp = cast<ShuffleVectorSDNode>(Op);

9817

V1 = Op.getOperand(0);

9818

V2 = Op.getOperand(1);

9819

}

9820

EVT VT = Op.getValueType();

9821

bool isLittleEndian = Subtarget.isLittleEndian();

9822

9823

unsigned ShiftElts, InsertAtByte;

9824

bool Swap = false;

9825

9826

// If this is a load-and-splat, we can do that with a single instruction

9827

// in some cases. However if the load has multiple uses, we don't want to

9828

// combine it because that will just produce multiple loads.

9829

bool IsPermutedLoad = false;

9830

const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);

9831

if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&

9832

(PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&

9833

InputLoad->hasOneUse()) {

9834

bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);

9835

int SplatIdx =

9836

PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);

9837

9838

// The splat index for permuted loads will be in the left half of the vector

9839

// which is strictly wider than the loaded value by 8 bytes. So we need to

9840

// adjust the splat index to point to the correct address in memory.

9841

if (IsPermutedLoad) {

9842

assert(isLittleEndian && "Unexpected permuted load on big endian target")((isLittleEndian && "Unexpected permuted load on big endian target"
) ? static_cast<void> (0) : __assert_fail ("isLittleEndian && \"Unexpected permuted load on big endian target\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9842, __PRETTY_FUNCTION__));

9843

SplatIdx += IsFourByte ? 2 : 1;

9844

assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__))

9845

"Splat of a value outside of the loaded memory")(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__));

9846

}

9847

9848

LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

9849

// For 4-byte load-and-splat, we need Power9.

9850

if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {

9851

uint64_t Offset = 0;

9852

if (IsFourByte)

9853

Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;

9854

else

9855

Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;

9856

9857

SDValue BasePtr = LD->getBasePtr();

9858

if (Offset != 0)

9859

BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),

9860

BasePtr, DAG.getIntPtrConstant(Offset, dl));

9861

SDValue Ops[] = {

9862

LD->getChain(), // Chain

9863

BasePtr, // BasePtr

9864

DAG.getValueType(Op.getValueType()) // VT

9865

};

9866

SDVTList VTL =

9867

DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);

9868

SDValue LdSplt =

9869

DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,

9870

Ops, LD->getMemoryVT(), LD->getMemOperand());

9871

if (LdSplt.getValueType() != SVOp->getValueType(0))

9872

LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);

9873

return LdSplt;

9874

}

9875

}

9876

if (Subtarget.hasP9Vector() &&

9877

PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,

9878

isLittleEndian)) {

9879

if (Swap)

9880

std::swap(V1, V2);

9881

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9882

SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);

9883

if (ShiftElts) {

9884

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,

9885

DAG.getConstant(ShiftElts, dl, MVT::i32));

9886

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,

9887

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9888

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9889

}

9890

SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,

9891

DAG.getConstant(InsertAtByte, dl, MVT::i32));

9892

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);

9893

}

9894

9895

if (Subtarget.hasPrefixInstrs()) {

9896

SDValue SplatInsertNode;

9897

if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))

9898

return SplatInsertNode;

9899

}

9900

9901

if (Subtarget.hasP9Altivec()) {

9902

SDValue NewISDNode;

9903

if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))

9904

return NewISDNode;

9905

9906

if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))

9907

return NewISDNode;

9908

}

9909

9910

if (Subtarget.hasVSX() &&

9911

PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {

9912

if (Swap)

9913

std::swap(V1, V2);

9914

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9915

SDValue Conv2 =

9916

DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);

9917

9918

SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,

9919

DAG.getConstant(ShiftElts, dl, MVT::i32));

9920

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);

9921

}

9922

9923

if (Subtarget.hasVSX() &&

9924

PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {

9925

if (Swap)

9926

std::swap(V1, V2);

9927

SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);

9928

SDValue Conv2 =

9929

DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);

9930

9931

SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,

9932

DAG.getConstant(ShiftElts, dl, MVT::i32));

9933

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);

9934

}

9935

9936

if (Subtarget.hasP9Vector()) {

9937

if (PPC::isXXBRHShuffleMask(SVOp)) {

9938

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);

9939

SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);

9940

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);

9941

} else if (PPC::isXXBRWShuffleMask(SVOp)) {

9942

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9943

SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);

9944

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);

9945

} else if (PPC::isXXBRDShuffleMask(SVOp)) {

9946

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);

9947

SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);

9948

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);

9949

} else if (PPC::isXXBRQShuffleMask(SVOp)) {

9950

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);

9951

SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);

9952

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);

9953

}

9954

}

9955

9956

if (Subtarget.hasVSX()) {

9957

if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {

9958

int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);

9959

9960

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);

9961

SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,

9962

DAG.getConstant(SplatIdx, dl, MVT::i32));

9963

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);

9964

}

9965

9966

// Left shifts of 8 bytes are actually swaps. Convert accordingly.

9967

if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {

9968

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);

9969

SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);

9970

return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);

9971

}

9972

}

9973

9974

// Cases that are handled by instructions that take permute immediates

9975

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

9976

// selected by the instruction selector.

9977

if (V2.isUndef()) {

9978

if (PPC::isSplatShuffleMask(SVOp, 1) ||

9979

PPC::isSplatShuffleMask(SVOp, 2) ||

9980

PPC::isSplatShuffleMask(SVOp, 4) ||

9981

PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||

9982

PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||

9983

PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||

9984

PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||

9985

PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||

9986

PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||

9987

PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||

9988

PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||

9989

PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||

9990

(Subtarget.hasP8Altivec() && (

9991

PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||

9992

PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||

9993

PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {

9994

return Op;

9995

}

9996

}

9997

9998

// Altivec has a variety of "shuffle immediates" that take two vector inputs

9999

// and produce a fixed permutation. If any of these match, do not lower to

10000

// VPERM.

10001

unsigned int ShuffleKind = isLittleEndian ? 2 : 0;

10002

if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10003

PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10004

PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||

10005

PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

10006

PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

10007

PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||

10008

PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

10009

PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

10010

PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||

10011

(Subtarget.hasP8Altivec() && (

10012

PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||

10013

PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||

10014

PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))

10015

return Op;

10016

10017

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

10018

// perfect shuffle table to emit an optimal matching sequence.

10019

ArrayRef<int> PermMask = SVOp->getMask();

10020

10021

unsigned PFIndexes[4];

10022

bool isFourElementShuffle = true;

10023

for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number

10024

unsigned EltNo = 8; // Start out undef.

10025

for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.

10026

if (PermMask[i*4+j] < 0)

10027

continue; // Undef, ignore it.

10028

10029

unsigned ByteSource = PermMask[i*4+j];

10030

if ((ByteSource & 3) != j) {

10031

isFourElementShuffle = false;

10032

break;

10033

}

10034

10035

if (EltNo == 8) {

10036

EltNo = ByteSource/4;

10037

} else if (EltNo != ByteSource/4) {

10038

isFourElementShuffle = false;

10039

break;

10040

}

10041

}

10042

PFIndexes[i] = EltNo;

10043

}

10044

10045

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

10046

// perfect shuffle vector to determine if it is cost effective to do this as

10047

// discrete instructions, or whether we should use a vperm.

10048

// For now, we skip this for little endian until such time as we have a

10049

// little-endian perfect shuffle table.

10050

if (isFourElementShuffle && !isLittleEndian) {

10051

// Compute the index in the perfect shuffle table.

10052

unsigned PFTableIndex =

10053

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

10054

10055

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

10056

unsigned Cost = (PFEntry >> 30);

10057

10058

// Determining when to avoid vperm is tricky. Many things affect the cost

10059

// of vperm, particularly how many times the perm mask needs to be computed.

10060

// For example, if the perm mask can be hoisted out of a loop or is already

10061

// used (perhaps because there are multiple permutes with the same shuffle

10062

// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of

10063

// the loop requires an extra register.

10064

//

10065

// As a compromise, we only emit discrete instructions if the shuffle can be

10066

// generated in 3 or fewer operations. When we have loop information

10067

// available, if this block is within a loop, we should avoid using vperm

10068

// for 3-operation perms and use a constant pool load instead.

10069

if (Cost < 3)

10070

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);

10071

}

10072

10073

// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant

10074

// vector that will get spilled to the constant pool.

10075

if (V2.isUndef()) V2 = V1;

10076

10077

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

10078

// that it is in input element units, not in bytes. Convert now.

10079

10080

// For little endian, the order of the input vectors is reversed, and

10081

// the permutation mask is complemented with respect to 31. This is

10082

// necessary to produce proper semantics with the big-endian-biased vperm

10083

// instruction.

10084

EVT EltVT = V1.getValueType().getVectorElementType();

10085

unsigned BytesPerElement = EltVT.getSizeInBits()/8;

10086

10087

SmallVector<SDValue, 16> ResultMask;

10088

for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {

10089

unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];

10090

10091

for (unsigned j = 0; j != BytesPerElement; ++j)

10092

if (isLittleEndian)

10093

ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),

10094

dl, MVT::i32));

10095

else

10096

ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,

10097

MVT::i32));

10098

}

10099

10100

ShufflesHandledWithVPERM++;

10101

SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);

10102

LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false);

10103

LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false);

10104

LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false);

10105

LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false);

10106

10107

if (isLittleEndian)

10108

return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

10109

V2, V1, VPermMask);

10110

else

10111

return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

10112

V1, V2, VPermMask);

10113

}

10114

10115

/// getVectorCompareInfo - Given an intrinsic, return false if it is not a

10116

/// vector comparison. If it is, return true and fill in Opc/isDot with

10117

/// information about the intrinsic.

10118

static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,

10119

bool &isDot, const PPCSubtarget &Subtarget) {

10120

unsigned IntrinsicID =

10121

cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();

10122

CompareOpc = -1;

10123

isDot = false;

10124

switch (IntrinsicID) {

10125

default:

10126

return false;

10127

// Comparison predicates.

10128

case Intrinsic::ppc_altivec_vcmpbfp_p:

10129

CompareOpc = 966;

10130

isDot = true;

10131

break;

10132

case Intrinsic::ppc_altivec_vcmpeqfp_p:

10133

CompareOpc = 198;

10134

isDot = true;

10135

break;

10136

case Intrinsic::ppc_altivec_vcmpequb_p:

10137

CompareOpc = 6;

10138

isDot = true;

10139

break;

10140

case Intrinsic::ppc_altivec_vcmpequh_p:

10141

CompareOpc = 70;

10142

isDot = true;

10143

break;

10144

case Intrinsic::ppc_altivec_vcmpequw_p:

10145

CompareOpc = 134;

10146

isDot = true;

10147

break;

10148

case Intrinsic::ppc_altivec_vcmpequd_p:

10149

if (Subtarget.hasP8Altivec()) {

10150

CompareOpc = 199;

10151

isDot = true;

10152

} else

10153

return false;

10154

break;

10155

case Intrinsic::ppc_altivec_vcmpneb_p:

10156

case Intrinsic::ppc_altivec_vcmpneh_p:

10157

case Intrinsic::ppc_altivec_vcmpnew_p:

10158

case Intrinsic::ppc_altivec_vcmpnezb_p:

10159

case Intrinsic::ppc_altivec_vcmpnezh_p:

10160

case Intrinsic::ppc_altivec_vcmpnezw_p:

10161

if (Subtarget.hasP9Altivec()) {

10162

switch (IntrinsicID) {

10163

default:

10164

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10164);

10165

case Intrinsic::ppc_altivec_vcmpneb_p:

10166

CompareOpc = 7;

10167

break;

10168

case Intrinsic::ppc_altivec_vcmpneh_p:

10169

CompareOpc = 71;

10170

break;

10171

case Intrinsic::ppc_altivec_vcmpnew_p:

10172

CompareOpc = 135;

10173

break;

10174

case Intrinsic::ppc_altivec_vcmpnezb_p:

10175

CompareOpc = 263;

10176

break;

10177

case Intrinsic::ppc_altivec_vcmpnezh_p:

10178

CompareOpc = 327;

10179

break;

10180

case Intrinsic::ppc_altivec_vcmpnezw_p:

10181

CompareOpc = 391;

10182

break;

10183

}

10184

isDot = true;

10185

} else

10186

return false;

10187

break;

10188

case Intrinsic::ppc_altivec_vcmpgefp_p:

10189

CompareOpc = 454;

10190

isDot = true;

10191

break;

10192

case Intrinsic::ppc_altivec_vcmpgtfp_p:

10193

CompareOpc = 710;

10194

isDot = true;

10195

break;

10196

case Intrinsic::ppc_altivec_vcmpgtsb_p:

10197

CompareOpc = 774;

10198

isDot = true;

10199

break;

10200

case Intrinsic::ppc_altivec_vcmpgtsh_p:

10201

CompareOpc = 838;

10202

isDot = true;

10203

break;

10204

case Intrinsic::ppc_altivec_vcmpgtsw_p:

10205

CompareOpc = 902;

10206

isDot = true;

10207

break;

10208

case Intrinsic::ppc_altivec_vcmpgtsd_p:

10209

if (Subtarget.hasP8Altivec()) {

10210

CompareOpc = 967;

10211

isDot = true;

10212

} else

10213

return false;

10214

break;

10215

case Intrinsic::ppc_altivec_vcmpgtub_p:

10216

CompareOpc = 518;

10217

isDot = true;

10218

break;

10219

case Intrinsic::ppc_altivec_vcmpgtuh_p:

10220

CompareOpc = 582;

10221

isDot = true;

10222

break;

10223

case Intrinsic::ppc_altivec_vcmpgtuw_p:

10224

CompareOpc = 646;

10225

isDot = true;

10226

break;

10227

case Intrinsic::ppc_altivec_vcmpgtud_p:

10228

if (Subtarget.hasP8Altivec()) {

10229

CompareOpc = 711;

10230

isDot = true;

10231

} else

10232

return false;

10233

break;

10234

10235

case Intrinsic::ppc_altivec_vcmpequq:

10236

case Intrinsic::ppc_altivec_vcmpgtsq:

10237

case Intrinsic::ppc_altivec_vcmpgtuq:

10238

if (!Subtarget.isISA3_1())

10239

return false;

10240

switch (IntrinsicID) {

10241

default:

10242

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10242);

10243

case Intrinsic::ppc_altivec_vcmpequq:

10244

CompareOpc = 455;

10245

break;

10246

case Intrinsic::ppc_altivec_vcmpgtsq:

10247

CompareOpc = 903;

10248

break;

10249

case Intrinsic::ppc_altivec_vcmpgtuq:

10250

CompareOpc = 647;

10251

break;

10252

}

10253

break;

10254

10255

// VSX predicate comparisons use the same infrastructure

10256

case Intrinsic::ppc_vsx_xvcmpeqdp_p:

10257

case Intrinsic::ppc_vsx_xvcmpgedp_p:

10258

case Intrinsic::ppc_vsx_xvcmpgtdp_p:

10259

case Intrinsic::ppc_vsx_xvcmpeqsp_p:

10260

case Intrinsic::ppc_vsx_xvcmpgesp_p:

10261

case Intrinsic::ppc_vsx_xvcmpgtsp_p:

10262

if (Subtarget.hasVSX()) {

10263

switch (IntrinsicID) {

10264

case Intrinsic::ppc_vsx_xvcmpeqdp_p:

10265

CompareOpc = 99;

10266

break;

10267

case Intrinsic::ppc_vsx_xvcmpgedp_p:

10268

CompareOpc = 115;

10269

break;

10270

case Intrinsic::ppc_vsx_xvcmpgtdp_p:

10271

CompareOpc = 107;

10272

break;

10273

case Intrinsic::ppc_vsx_xvcmpeqsp_p:

10274

CompareOpc = 67;

10275

break;

10276

case Intrinsic::ppc_vsx_xvcmpgesp_p:

10277

CompareOpc = 83;

10278

break;

10279

case Intrinsic::ppc_vsx_xvcmpgtsp_p:

10280

CompareOpc = 75;

10281

break;

10282

}

10283

isDot = true;

10284

} else

10285

return false;

10286

break;

10287

10288

// Normal Comparisons.

10289

case Intrinsic::ppc_altivec_vcmpbfp:

10290

CompareOpc = 966;

10291

break;

10292

case Intrinsic::ppc_altivec_vcmpeqfp:

10293

CompareOpc = 198;

10294

break;

10295

case Intrinsic::ppc_altivec_vcmpequb:

10296

CompareOpc = 6;

10297

break;

10298

case Intrinsic::ppc_altivec_vcmpequh:

10299

CompareOpc = 70;

10300

break;

10301

case Intrinsic::ppc_altivec_vcmpequw:

10302

CompareOpc = 134;

10303

break;

10304

case Intrinsic::ppc_altivec_vcmpequd:

10305

if (Subtarget.hasP8Altivec())

10306

CompareOpc = 199;

10307

else

10308

return false;

10309

break;

10310

case Intrinsic::ppc_altivec_vcmpneb:

10311

case Intrinsic::ppc_altivec_vcmpneh:

10312

case Intrinsic::ppc_altivec_vcmpnew:

10313

case Intrinsic::ppc_altivec_vcmpnezb:

10314

case Intrinsic::ppc_altivec_vcmpnezh:

10315

case Intrinsic::ppc_altivec_vcmpnezw:

10316

if (Subtarget.hasP9Altivec())

10317

switch (IntrinsicID) {

10318

default:

10319

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10319);

10320

case Intrinsic::ppc_altivec_vcmpneb:

10321

CompareOpc = 7;

10322

break;

10323

case Intrinsic::ppc_altivec_vcmpneh:

10324

CompareOpc = 71;

10325

break;

10326

case Intrinsic::ppc_altivec_vcmpnew:

10327

CompareOpc = 135;

10328

break;

10329

case Intrinsic::ppc_altivec_vcmpnezb:

10330

CompareOpc = 263;

10331

break;

10332

case Intrinsic::ppc_altivec_vcmpnezh:

10333

CompareOpc = 327;

10334

break;

10335

case Intrinsic::ppc_altivec_vcmpnezw:

10336

CompareOpc = 391;

10337

break;

10338

}

10339

else

10340

return false;

10341

break;

10342

case Intrinsic::ppc_altivec_vcmpgefp:

10343

CompareOpc = 454;

10344

break;

10345

case Intrinsic::ppc_altivec_vcmpgtfp:

10346

CompareOpc = 710;

10347

break;

10348

case Intrinsic::ppc_altivec_vcmpgtsb:

10349

CompareOpc = 774;

10350

break;

10351

case Intrinsic::ppc_altivec_vcmpgtsh:

10352

CompareOpc = 838;

10353

break;

10354

case Intrinsic::ppc_altivec_vcmpgtsw:

10355

CompareOpc = 902;

10356

break;

10357

case Intrinsic::ppc_altivec_vcmpgtsd:

10358

if (Subtarget.hasP8Altivec())

10359

CompareOpc = 967;

10360

else

10361

return false;

10362

break;

10363

case Intrinsic::ppc_altivec_vcmpgtub:

10364

CompareOpc = 518;

10365

break;

10366

case Intrinsic::ppc_altivec_vcmpgtuh:

10367

CompareOpc = 582;

10368

break;

10369

case Intrinsic::ppc_altivec_vcmpgtuw:

10370

CompareOpc = 646;

10371

break;

10372

case Intrinsic::ppc_altivec_vcmpgtud:

10373

if (Subtarget.hasP8Altivec())

10374

CompareOpc = 711;

10375

else

10376

return false;

10377

break;

10378

case Intrinsic::ppc_altivec_vcmpequq_p:

10379

case Intrinsic::ppc_altivec_vcmpgtsq_p:

10380

case Intrinsic::ppc_altivec_vcmpgtuq_p:

10381

if (!Subtarget.isISA3_1())

10382

return false;

10383

switch (IntrinsicID) {

10384

default:

10385

llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10385);

10386

case Intrinsic::ppc_altivec_vcmpequq_p:

10387

CompareOpc = 455;

10388

break;

10389

case Intrinsic::ppc_altivec_vcmpgtsq_p:

10390

CompareOpc = 903;

10391

break;

10392

case Intrinsic::ppc_altivec_vcmpgtuq_p:

10393

CompareOpc = 647;

10394

break;

10395

}

10396

isDot = true;

10397

break;

10398

}

10399

return true;

10400

}

10401

10402

/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom

10403

/// lower, do it, otherwise return null.

10404

SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

10405

SelectionDAG &DAG) const {

10406

unsigned IntrinsicID =

10407

cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

10408

10409

SDLoc dl(Op);

10410

10411

switch (IntrinsicID) {

10412

case Intrinsic::thread_pointer:

10413

// Reads the thread pointer register, used for __builtin_thread_pointer.

10414

if (Subtarget.isPPC64())

10415

return DAG.getRegister(PPC::X13, MVT::i64);

10416

return DAG.getRegister(PPC::R2, MVT::i32);

10417

10418

case Intrinsic::ppc_mma_disassemble_acc:

10419

case Intrinsic::ppc_mma_disassemble_pair: {

10420

int NumVecs = 2;

10421

SDValue WideVec = Op.getOperand(1);

10422

if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {

10423

NumVecs = 4;

10424

WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);

10425

}

10426

SmallVector<SDValue, 4> RetOps;

10427

for (int VecNo = 0; VecNo < NumVecs; VecNo++) {

10428

SDValue Extract = DAG.getNode(

10429

PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,

10430

DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo

10431

: VecNo,

10432

dl, MVT::i64));

10433

RetOps.push_back(Extract);

10434

}

10435

return DAG.getMergeValues(RetOps, dl);

10436

}

10437

}

10438

10439

// If this is a lowered altivec predicate compare, CompareOpc is set to the

10440

// opcode number of the comparison.

10441

int CompareOpc;

10442

bool isDot;

10443

if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))

10444

return SDValue(); // Don't custom lower most intrinsics.

10445

10446

// If this is a non-dot comparison, make the VCMP node and we are done.

10447

if (!isDot) {

10448

SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),

10449

Op.getOperand(1), Op.getOperand(2),

10450

DAG.getConstant(CompareOpc, dl, MVT::i32));

10451

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);

10452

}

10453

10454

// Create the PPCISD altivec 'dot' comparison node.

10455

SDValue Ops[] = {

10456

Op.getOperand(2), // LHS

10457

Op.getOperand(3), // RHS

10458

DAG.getConstant(CompareOpc, dl, MVT::i32)

10459

};

10460

EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };

10461

SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

10462

10463

// Now that we have the comparison, emit a copy from the CR to a GPR.

10464

// This is flagged to the above dot comparison.

10465

SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,

10466

DAG.getRegister(PPC::CR6, MVT::i32),

10467

CompNode.getValue(1));

10468

10469

// Unpack the result based on how the target uses it.

10470

unsigned BitNo; // Bit # of CR6.

10471

bool InvertBit; // Invert result?

10472

switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {

10473

default: // Can't happen, don't crash on invalid number though.

10474

case 0: // Return the value of the EQ bit of CR6.

10475

BitNo = 0; InvertBit = false;

10476

break;

10477

case 1: // Return the inverted value of the EQ bit of CR6.

10478

BitNo = 0; InvertBit = true;

10479

break;

10480

case 2: // Return the value of the LT bit of CR6.

10481

BitNo = 2; InvertBit = false;

10482

break;

10483

case 3: // Return the inverted value of the LT bit of CR6.

10484

BitNo = 2; InvertBit = true;

10485

break;

10486

}

10487

10488

// Shift the bit into the low position.

10489

Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,

10490

DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));

10491

// Isolate the bit.

10492

Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,

10493

DAG.getConstant(1, dl, MVT::i32));

10494

10495

// If we are supposed to, toggle the bit.

10496

if (InvertBit)

10497

Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,

10498

DAG.getConstant(1, dl, MVT::i32));

10499

return Flags;

10500

}

10501

10502

SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,

10503

SelectionDAG &DAG) const {

10504

// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to

10505

// the beginning of the argument list.

10506

int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;

10507

SDLoc DL(Op);

10508

switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {

10509

case Intrinsic::ppc_cfence: {

10510

assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10510, __PRETTY_FUNCTION__));

10511

assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10511, __PRETTY_FUNCTION__));

10512

return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,

10513

DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,

10514

Op.getOperand(ArgStart + 1)),

10515

Op.getOperand(0)),

10516

0);

10517

}

10518

default:

10519

break;

10520

}

10521

return SDValue();

10522

}

10523

10524

// Lower scalar BSWAP64 to xxbrd.

10525

SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {

10526

SDLoc dl(Op);

10527

// MTVSRDD

10528

Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),

10529

Op.getOperand(0));

10530

// XXBRD

10531

Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);

10532

// MFVSRD

10533

int VectorIndex = 0;

10534

if (Subtarget.isLittleEndian())

10535

VectorIndex = 1;

10536

Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,

10537

DAG.getTargetConstant(VectorIndex, dl, MVT::i32));

10538

return Op;

10539

}

10540

10541

// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be

10542

// compared to a value that is atomically loaded (atomic loads zero-extend).

10543

SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,

10544

SelectionDAG &DAG) const {

10545

assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__))

10546

"Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__));

10547

SDLoc dl(Op);

10548

auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());

10549

EVT MemVT = AtomicNode->getMemoryVT();

10550

if (MemVT.getSizeInBits() >= 32)

10551

return Op;

10552

10553

SDValue CmpOp = Op.getOperand(2);

10554

// If this is already correctly zero-extended, leave it alone.

10555

auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());

10556

if (DAG.MaskedValueIsZero(CmpOp, HighBits))

10557

return Op;

10558

10559

// Clear the high bits of the compare operand.

10560

unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;

10561

SDValue NewCmpOp =

10562

DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,

10563

DAG.getConstant(MaskVal, dl, MVT::i32));

10564

10565

// Replace the existing compare operand with the properly zero-extended one.

10566

SmallVector<SDValue, 4> Ops;

10567

for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)

10568

Ops.push_back(AtomicNode->getOperand(i));

10569

Ops[2] = NewCmpOp;

10570

MachineMemOperand *MMO = AtomicNode->getMemOperand();

10571

SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);

10572

auto NodeTy =

10573

(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;

10574

return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);

10575

}

10576

10577

SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,

10578

SelectionDAG &DAG) const {

10579

SDLoc dl(Op);

10580

// Create a stack slot that is 16-byte aligned.

10581

MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

10582

int FrameIdx = MFI.CreateStackObject(16, Align(16), false);

10583

EVT PtrVT = getPointerTy(DAG.getDataLayout());

10584

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

10585

10586

// Store the input value into Value#0 of the stack slot.

10587

SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,

10588

MachinePointerInfo());

10589

// Load it out.

10590

return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());

10591

}

10592

10593

SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

10594

SelectionDAG &DAG) const {

10595

assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__))

10596

"Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__));

10597

10598

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));

10599

// We have legal lowering for constant indices but not for variable ones.

10600

if (!C)

10601

return SDValue();

10602

10603

EVT VT = Op.getValueType();

10604

SDLoc dl(Op);

10605

SDValue V1 = Op.getOperand(0);

10606

SDValue V2 = Op.getOperand(1);

10607

// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.

10608

if (VT == MVT::v8i16 || VT == MVT::v16i8) {

10609

SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);

10610

unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;

10611

unsigned InsertAtElement = C->getZExtValue();

10612

unsigned InsertAtByte = InsertAtElement * BytesInEachElement;

10613

if (Subtarget.isLittleEndian()) {

10614

InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;

10615

}

10616

return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,

10617

DAG.getConstant(InsertAtByte, dl, MVT::i32));

10618

}

10619

return Op;

10620

}

10621

10622

SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,

10623

SelectionDAG &DAG) const {

10624

SDLoc dl(Op);

10625

LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());

10626

SDValue LoadChain = LN->getChain();

10627

SDValue BasePtr = LN->getBasePtr();

10628

EVT VT = Op.getValueType();

10629

10630

if (VT != MVT::v256i1 && VT != MVT::v512i1)

10631

return Op;

10632

10633

// Type v256i1 is used for pairs and v512i1 is used for accumulators.

10634

// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in

10635

// 2 or 4 vsx registers.

10636

assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__))

10637

"Type unsupported without MMA")(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__));

10638

assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))

10639

"Type unsupported without paired vector support")(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__));

10640

Align Alignment = LN->getAlign();

10641

SmallVector<SDValue, 4> Loads;

10642

SmallVector<SDValue, 4> LoadChains;

10643

unsigned NumVecs = VT.getSizeInBits() / 128;

10644

for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

10645

SDValue Load =

10646

DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,

10647

LN->getPointerInfo().getWithOffset(Idx * 16),

10648

commonAlignment(Alignment, Idx * 16),

10649

LN->getMemOperand()->getFlags(), LN->getAAInfo());

10650

BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

10651

DAG.getConstant(16, dl, BasePtr.getValueType()));

10652

Loads.push_back(Load);

10653

LoadChains.push_back(Load.getValue(1));

10654

}

10655

if (Subtarget.isLittleEndian()) {

10656

std::reverse(Loads.begin(), Loads.end());

10657

std::reverse(LoadChains.begin(), LoadChains.end());

10658

}

10659

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);

10660

SDValue Value =

10661

DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,

10662

dl, VT, Loads);

10663

SDValue RetOps[] = {Value, TF};

10664

return DAG.getMergeValues(RetOps, dl);

10665

}

10666

10667

SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,

10668

SelectionDAG &DAG) const {

10669

SDLoc dl(Op);

10670

StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());

10671

SDValue StoreChain = SN->getChain();

10672

SDValue BasePtr = SN->getBasePtr();

10673

SDValue Value = SN->getValue();

10674

EVT StoreVT = Value.getValueType();

10675

10676

if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)

10677

return Op;

10678

10679

// Type v256i1 is used for pairs and v512i1 is used for accumulators.

10680

// Here we create 2 or 4 v16i8 stores to store the pair or accumulator

10681

// underlying registers individually.

10682

assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__))

10683

"Type unsupported without MMA")(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__));

10684

assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))

10685

"Type unsupported without paired vector support")(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__));

10686

Align Alignment = SN->getAlign();

10687

SmallVector<SDValue, 4> Stores;

10688

unsigned NumVecs = 2;

10689

if (StoreVT == MVT::v512i1) {

10690

Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);

10691

NumVecs = 4;

10692

}

10693

for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

10694

unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;

10695

SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,

10696

DAG.getConstant(VecNum, dl, MVT::i64));

10697

SDValue Store =

10698

DAG.getStore(StoreChain, dl, Elt, BasePtr,

10699

SN->getPointerInfo().getWithOffset(Idx * 16),

10700

commonAlignment(Alignment, Idx * 16),

10701

SN->getMemOperand()->getFlags(), SN->getAAInfo());

10702

BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

10703

DAG.getConstant(16, dl, BasePtr.getValueType()));

10704

Stores.push_back(Store);

10705

}

10706

SDValue TF = DAG.getTokenFactor(dl, Stores);

10707

return TF;

10708

}

10709

10710

SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {

10711

SDLoc dl(Op);

10712

if (Op.getValueType() == MVT::v4i32) {

10713

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

10714

10715

SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);

10716

// +16 as shift amt.

10717

SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);

10718

SDValue RHSSwap = // = vrlw RHS, 16

10719

BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);

10720

10721

// Shrinkify inputs to v8i16.

10722

LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);

10723

RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);

10724

RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);

10725

10726

// Low parts multiplied together, generating 32-bit results (we ignore the

10727

// top parts).

10728

SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,

10729

LHS, RHS, DAG, dl, MVT::v4i32);

10730

10731

SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,

10732

LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);

10733

// Shift the high parts up 16 bits.

10734

HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,

10735

Neg16, DAG, dl);

10736

return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);

10737

} else if (Op.getValueType() == MVT::v16i8) {

10738

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

10739

bool isLittleEndian = Subtarget.isLittleEndian();

10740

10741

// Multiply the even 8-bit parts, producing 16-bit sums.

10742

SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

10743

LHS, RHS, DAG, dl, MVT::v8i16);

10744

EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);

10745

10746

// Multiply the odd 8-bit parts, producing 16-bit sums.

10747

SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,

10748

LHS, RHS, DAG, dl, MVT::v8i16);

10749

OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);

10750

10751

// Merge the results together. Because vmuleub and vmuloub are

10752

// instructions with a big-endian bias, we must reverse the

10753

// element numbering and reverse the meaning of "odd" and "even"

10754

// when generating little endian code.

10755

int Ops[16];

10756

for (unsigned i = 0; i != 8; ++i) {

10757

if (isLittleEndian) {

10758

Ops[i*2 ] = 2*i;

10759

Ops[i*2+1] = 2*i+16;

10760

} else {

10761

Ops[i*2 ] = 2*i+1;

10762

Ops[i*2+1] = 2*i+1+16;

10763

}

10764

}

10765

if (isLittleEndian)

10766

return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);

10767

else

10768

return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);

10769

} else {

10770

llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10770);

10771

}

10772

}

10773

10774

SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {

10775

10776

assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10776, __PRETTY_FUNCTION__));

10777

10778

EVT VT = Op.getValueType();

10779

assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__))

10780

"Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__));

10781

assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))

10782

VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))

10783

"Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__));

10784

assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))

10785

"Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__));

10786

10787

// For vector abs, it can be lowered to:

10788

// abs x

10789

// ==>

10790

// y = -x

10791

// smax(x, y)

10792

10793

SDLoc dl(Op);

10794

SDValue X = Op.getOperand(0);

10795

SDValue Zero = DAG.getConstant(0, dl, VT);

10796

SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);

10797

10798

// SMAX patch https://reviews.llvm.org/D47332

10799

// hasn't landed yet, so use intrinsic first here.

10800

// TODO: Should use SMAX directly once SMAX patch landed

10801

Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;

10802

if (VT == MVT::v2i64)

10803

BifID = Intrinsic::ppc_altivec_vmaxsd;

10804

else if (VT == MVT::v8i16)

10805

BifID = Intrinsic::ppc_altivec_vmaxsh;

10806

else if (VT == MVT::v16i8)

10807

BifID = Intrinsic::ppc_altivec_vmaxsb;

10808

10809

return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);

10810

}

10811

10812

// Custom lowering for fpext vf32 to v2f64

10813

SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

10814

10815

assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__))

10816

"Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__));

10817

10818

// FIXME: handle extends from half precision float vectors on P9.

10819

// We only want to custom lower an extend from v2f32 to v2f64.

10820

if (Op.getValueType() != MVT::v2f64 ||

10821

Op.getOperand(0).getValueType() != MVT::v2f32)

10822

return SDValue();

10823

10824

SDLoc dl(Op);

10825

SDValue Op0 = Op.getOperand(0);

10826

10827

switch (Op0.getOpcode()) {

10828

default:

10829

return SDValue();

10830

case ISD::EXTRACT_SUBVECTOR: {

10831

assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))

10832

isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))

10833

"Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__));

10834

10835

if (Op0.getOperand(0).getValueType() != MVT::v4f32)

10836

return SDValue();

10837

10838

// Custom lower is only done for high or low doubleword.

10839

int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();

10840

if (Idx % 2 != 0)

10841

return SDValue();

10842

10843

// Since input is v4f32, at this point Idx is either 0 or 2.

10844

// Shift to get the doubleword position we want.

10845

int DWord = Idx >> 1;

10846

10847

// High and low word positions are different on little endian.

10848

if (Subtarget.isLittleEndian())

10849

DWord ^= 0x1;

10850

10851

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,

10852

Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));

10853

}

10854

case ISD::FADD:

10855

case ISD::FMUL:

10856

case ISD::FSUB: {

10857

SDValue NewLoad[2];

10858

for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {

10859

// Ensure both input are loads.

10860

SDValue LdOp = Op0.getOperand(i);

10861

if (LdOp.getOpcode() != ISD::LOAD)

10862

return SDValue();

10863

// Generate new load node.

10864

LoadSDNode *LD = cast<LoadSDNode>(LdOp);

10865

SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};

10866

NewLoad[i] = DAG.getMemIntrinsicNode(

10867

PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,

10868

LD->getMemoryVT(), LD->getMemOperand());

10869

}

10870

SDValue NewOp =

10871

DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],

10872

NewLoad[1], Op0.getNode()->getFlags());

10873

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,

10874

DAG.getConstant(0, dl, MVT::i32));

10875

}

10876

case ISD::LOAD: {

10877

LoadSDNode *LD = cast<LoadSDNode>(Op0);

10878

SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};

10879

SDValue NewLd = DAG.getMemIntrinsicNode(

10880

PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,

10881

LD->getMemoryVT(), LD->getMemOperand());

10882

return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,

10883

DAG.getConstant(0, dl, MVT::i32));

10884

}

10885

}

10886

llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10886);

10887

}

10888

10889

/// LowerOperation - Provide custom lowering hooks for some operations.

10890

///

10891

SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

10892

switch (Op.getOpcode()) {

10893

default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10893);

10894

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

10895

case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);

10896

case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

10897

case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);

10898

case ISD::JumpTable: return LowerJumpTable(Op, DAG);

10899

case ISD::SETCC: return LowerSETCC(Op, DAG);

10900

case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);

10901

case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);

10902

10903

// Variable argument lowering.

10904

case ISD::VASTART: return LowerVASTART(Op, DAG);

10905

case ISD::VAARG: return LowerVAARG(Op, DAG);

10906

case ISD::VACOPY: return LowerVACOPY(Op, DAG);

10907

10908

case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);

10909

case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);

10910

case ISD::GET_DYNAMIC_AREA_OFFSET:

10911

return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);

10912

10913

// Exception handling lowering.

10914

case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);

10915

case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);

10916

case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);

10917

10918

case ISD::LOAD: return LowerLOAD(Op, DAG);

10919

case ISD::STORE: return LowerSTORE(Op, DAG);

10920

case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);

10921

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

10922

case ISD::STRICT_FP_TO_UINT:

10923

case ISD::STRICT_FP_TO_SINT:

10924

case ISD::FP_TO_UINT:

10925

case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));

10926

case ISD::STRICT_UINT_TO_FP:

10927

case ISD::STRICT_SINT_TO_FP:

10928

case ISD::UINT_TO_FP:

10929

case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);

10930

case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);

10931

10932

// Lower 64-bit shifts.

10933

case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);

10934

case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);

10935

case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);

10936

10937

case ISD::FSHL: return LowerFunnelShift(Op, DAG);

10938

case ISD::FSHR: return LowerFunnelShift(Op, DAG);

10939

10940

// Vector-related lowering.

10941

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);

10942

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

10943

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

10944

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

10945

case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

10946

case ISD::MUL: return LowerMUL(Op, DAG);

10947

case ISD::ABS: return LowerABS(Op, DAG);

10948

case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);

10949

case ISD::ROTL: return LowerROTL(Op, DAG);

10950

10951

// For counter-based loop handling.

10952

case ISD::INTRINSIC_W_CHAIN: return SDValue();

10953

10954

case ISD::BITCAST: return LowerBITCAST(Op, DAG);

10955

10956

// Frame & Return address.

10957

case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);

10958

case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);

10959

10960

case ISD::INTRINSIC_VOID:

10961

return LowerINTRINSIC_VOID(Op, DAG);

10962

case ISD::BSWAP:

10963

return LowerBSWAP(Op, DAG);

10964

case ISD::ATOMIC_CMP_SWAP:

10965

return LowerATOMIC_CMP_SWAP(Op, DAG);

10966

}

10967

}

10968

10969

void PPCTargetLowering::LowerOperationWrapper(SDNode *N,

10970

SmallVectorImpl<SDValue> &Results,

10971

SelectionDAG &DAG) const {

10972

SDValue Res = LowerOperation(SDValue(N, 0), DAG);

10973

10974

if (!Res.getNode())

10975

return;

10976

10977

// Take the return value as-is if original node has only one result.

10978

if (N->getNumValues() == 1) {

10979

Results.push_back(Res);

10980

return;

10981

}

10982

10983

// New node should have the same number of results.

10984

assert((N->getNumValues() == Res->getNumValues()) &&(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__))

10985

"Lowering returned the wrong number of results!")(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__));

10986

10987

for (unsigned i = 0; i < N->getNumValues(); ++i)

10988

Results.push_back(Res.getValue(i));

10989

}

10990

10991

void PPCTargetLowering::ReplaceNodeResults(SDNode *N,

10992

SmallVectorImpl<SDValue>&Results,

10993

SelectionDAG &DAG) const {

10994

SDLoc dl(N);

10995

switch (N->getOpcode()) {

10996

default:

10997

llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10997);

10998

case ISD::READCYCLECOUNTER: {

10999

SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);

11000

SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));

11001

11002

Results.push_back(

11003

DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));

11004

Results.push_back(RTB.getValue(2));

11005

break;

11006

}

11007

case ISD::INTRINSIC_W_CHAIN: {

11008

if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=

11009

Intrinsic::loop_decrement)

11010

break;

11011

11012

assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__))

11013

"Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__));

11014

EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

11015

N->getValueType(0));

11016

SDVTList VTs = DAG.getVTList(SVT, MVT::Other);

11017

SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),

11018

N->getOperand(1));

11019

11020

Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));

11021

Results.push_back(NewInt.getValue(1));

11022

break;

11023

}

11024

case ISD::VAARG: {

11025

if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())

11026

return;

11027

11028

EVT VT = N->getValueType(0);

11029

11030

if (VT == MVT::i64) {

11031

SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);

11032

11033

Results.push_back(NewNode);

11034

Results.push_back(NewNode.getValue(1));

11035

}

11036

return;

11037

}

11038

case ISD::STRICT_FP_TO_SINT:

11039

case ISD::STRICT_FP_TO_UINT:

11040

case ISD::FP_TO_SINT:

11041

case ISD::FP_TO_UINT:

11042

// LowerFP_TO_INT() can only handle f32 and f64.

11043

if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==

11044

MVT::ppcf128)

11045

return;

11046

Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));

11047

return;

11048

case ISD::TRUNCATE: {

11049

if (!N->getValueType(0).isVector())

11050

return;

11051

SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);

11052

if (Lowered)

11053

Results.push_back(Lowered);

11054

return;

11055

}

11056

case ISD::FSHL:

11057

case ISD::FSHR:

11058

// Don't handle funnel shifts here.

11059

return;

11060

case ISD::BITCAST:

11061

// Don't handle bitcast here.

11062

return;

11063

case ISD::FP_EXTEND:

11064

SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);

11065

if (Lowered)

11066

Results.push_back(Lowered);

11067

return;

11068

}

11069

}

11070

11071

//===----------------------------------------------------------------------===//

11072

// Other Lowering Code

11073

//===----------------------------------------------------------------------===//

11074

11075

static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {

11076

Module *M = Builder.GetInsertBlock()->getParent()->getParent();

11077

Function *Func = Intrinsic::getDeclaration(M, Id);

11078

return Builder.CreateCall(Func, {});

11079

}

11080

11081

// The mappings for emitLeading/TrailingFence is taken from

11082

// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html

11083

Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,

11084

Instruction *Inst,

11085

AtomicOrdering Ord) const {

11086

if (Ord == AtomicOrdering::SequentiallyConsistent)

11087

return callIntrinsic(Builder, Intrinsic::ppc_sync);

11088

if (isReleaseOrStronger(Ord))

11089

return callIntrinsic(Builder, Intrinsic::ppc_lwsync);

11090

return nullptr;

11091

}

11092

11093

Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,

11094

Instruction *Inst,

11095

AtomicOrdering Ord) const {

11096

if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {

11097

// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and

11098

// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html

11099

// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.

11100

if (isa<LoadInst>(Inst) && Subtarget.isPPC64())

11101

return Builder.CreateCall(

11102

Intrinsic::getDeclaration(

11103

Builder.GetInsertBlock()->getParent()->getParent(),

11104

Intrinsic::ppc_cfence, {Inst->getType()}),

11105

{Inst});

11106

// FIXME: Can use isync for rmw operation.

11107

return callIntrinsic(Builder, Intrinsic::ppc_lwsync);

11108

}

11109

return nullptr;

11110

}

11111

11112

MachineBasicBlock *

11113

PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,

11114

unsigned AtomicSize,

11115

unsigned BinOpcode,

11116

unsigned CmpOpcode,

11117

unsigned CmpPred) const {

11118

// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.

11119

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11120

11121

auto LoadMnemonic = PPC::LDARX;

11122

auto StoreMnemonic = PPC::STDCX;

11123

switch (AtomicSize) {

11124

default:

11125

llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11125);

11126

case 1:

11127

LoadMnemonic = PPC::LBARX;

11128

StoreMnemonic = PPC::STBCX;

11129

assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11129, __PRETTY_FUNCTION__));

11130

break;

11131

case 2:

11132

LoadMnemonic = PPC::LHARX;

11133

StoreMnemonic = PPC::STHCX;

11134

assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11134, __PRETTY_FUNCTION__));

11135

break;

11136

case 4:

11137

LoadMnemonic = PPC::LWARX;

11138

StoreMnemonic = PPC::STWCX;

11139

break;

11140

case 8:

11141

LoadMnemonic = PPC::LDARX;

11142

StoreMnemonic = PPC::STDCX;

11143

break;

11144

}

11145

11146

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11147

MachineFunction *F = BB->getParent();

11148

MachineFunction::iterator It = ++BB->getIterator();

11149

11150

Register dest = MI.getOperand(0).getReg();

11151

Register ptrA = MI.getOperand(1).getReg();

11152

Register ptrB = MI.getOperand(2).getReg();

11153

Register incr = MI.getOperand(3).getReg();

11154

DebugLoc dl = MI.getDebugLoc();

11155

11156

MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);

11157

MachineBasicBlock *loop2MBB =

11158

CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;

11159

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

11160

F->insert(It, loopMBB);

11161

if (CmpOpcode)

11162

F->insert(It, loop2MBB);

11163

F->insert(It, exitMBB);

11164

exitMBB->splice(exitMBB->begin(), BB,

11165

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11166

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

11167

11168

MachineRegisterInfo &RegInfo = F->getRegInfo();

11169

Register TmpReg = (!BinOpcode) ? incr :

11170

RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass

11171

: &PPC::GPRCRegClass);

11172

11173

// thisMBB:

11174

// ...

11175

// fallthrough --> loopMBB

11176

BB->addSuccessor(loopMBB);

11177

11178

// loopMBB:

11179

// l[wd]arx dest, ptr

11180

// add r0, dest, incr

11181

// st[wd]cx. r0, ptr

11182

// bne- loopMBB

11183

// fallthrough --> exitMBB

11184

11185

// For max/min...

11186

// loopMBB:

11187

// l[wd]arx dest, ptr

11188

// cmpl?[wd] incr, dest

11189

// bgt exitMBB

11190

// loop2MBB:

11191

// st[wd]cx. dest, ptr

11192

// bne- loopMBB

11193

// fallthrough --> exitMBB

11194

11195

BB = loopMBB;

11196

BuildMI(BB, dl, TII->get(LoadMnemonic), dest)

11197

.addReg(ptrA).addReg(ptrB);

11198

if (BinOpcode)

11199

BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);

11200

if (CmpOpcode) {

11201

// Signed comparisons of byte or halfword values must be sign-extended.

11202

if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {

11203

Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

11204

BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),

11205

ExtReg).addReg(dest);

11206

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11207

.addReg(incr).addReg(ExtReg);

11208

} else

11209

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11210

.addReg(incr).addReg(dest);

11211

11212

BuildMI(BB, dl, TII->get(PPC::BCC))

11213

.addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);

11214

BB->addSuccessor(loop2MBB);

11215

BB->addSuccessor(exitMBB);

11216

BB = loop2MBB;

11217

}

11218

BuildMI(BB, dl, TII->get(StoreMnemonic))

11219

.addReg(TmpReg).addReg(ptrA).addReg(ptrB);

11220

BuildMI(BB, dl, TII->get(PPC::BCC))

11221

.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);

11222

BB->addSuccessor(loopMBB);

11223

BB->addSuccessor(exitMBB);

11224

11225

// exitMBB:

11226

// ...

11227

BB = exitMBB;

11228

return BB;

11229

}

11230

11231

MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(

11232

MachineInstr &MI, MachineBasicBlock *BB,

11233

bool is8bit, // operation

11234

unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {

11235

// If we support part-word atomic mnemonics, just use them

11236

if (Subtarget.hasPartwordAtomics())

11237

return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,

11238

CmpPred);

11239

11240

// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.

11241

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11242

// In 64 bit mode we have to use 64 bits for addresses, even though the

11243

// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address

11244

// registers without caring whether they're 32 or 64, but here we're

11245

// doing actual arithmetic on the addresses.

11246

bool is64bit = Subtarget.isPPC64();

11247

bool isLittleEndian = Subtarget.isLittleEndian();

11248

unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

11249

11250

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11251

MachineFunction *F = BB->getParent();

11252

MachineFunction::iterator It = ++BB->getIterator();

11253

11254

Register dest = MI.getOperand(0).getReg();

11255

Register ptrA = MI.getOperand(1).getReg();

11256

Register ptrB = MI.getOperand(2).getReg();

11257

Register incr = MI.getOperand(3).getReg();

11258

DebugLoc dl = MI.getDebugLoc();

11259

11260

MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);

11261

MachineBasicBlock *loop2MBB =

11262

CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;

11263

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

11264

F->insert(It, loopMBB);

11265

if (CmpOpcode)

11266

F->insert(It, loop2MBB);

11267

F->insert(It, exitMBB);

11268

exitMBB->splice(exitMBB->begin(), BB,

11269

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11270

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

11271

11272

MachineRegisterInfo &RegInfo = F->getRegInfo();

11273

const TargetRegisterClass *RC =

11274

is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

11275

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

11276

11277

Register PtrReg = RegInfo.createVirtualRegister(RC);

11278

Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);

11279

Register ShiftReg =

11280

isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);

11281

Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);

11282

Register MaskReg = RegInfo.createVirtualRegister(GPRC);

11283

Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);

11284

Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);

11285

Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);

11286

Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);

11287

Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);

11288

Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);

11289

Register Ptr1Reg;

11290

Register TmpReg =

11291

(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);

11292

11293

// thisMBB:

11294

// ...

11295

// fallthrough --> loopMBB

11296

BB->addSuccessor(loopMBB);

11297

11298

// The 4-byte load must be aligned, while a char or short may be

11299

// anywhere in the word. Hence all this nasty bookkeeping code.

11300

// add ptr1, ptrA, ptrB [copy if ptrA==0]

11301

// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]

11302

// xori shift, shift1, 24 [16]

11303

// rlwinm ptr, ptr1, 0, 0, 29

11304

// slw incr2, incr, shift

11305

// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]

11306

// slw mask, mask2, shift

11307

// loopMBB:

11308

// lwarx tmpDest, ptr

11309

// add tmp, tmpDest, incr2

11310

// andc tmp2, tmpDest, mask

11311

// and tmp3, tmp, mask

11312

// or tmp4, tmp3, tmp2

11313

// stwcx. tmp4, ptr

11314

// bne- loopMBB

11315

// fallthrough --> exitMBB

11316

// srw dest, tmpDest, shift

11317

if (ptrA != ZeroReg) {

11318

Ptr1Reg = RegInfo.createVirtualRegister(RC);

11319

BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)

11320

.addReg(ptrA)

11321

.addReg(ptrB);

11322

} else {

11323

Ptr1Reg = ptrB;

11324

}

11325

// We need use 32-bit subregister to avoid mismatch register class in 64-bit

11326

// mode.

11327

BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)

11328

.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)

11329

.addImm(3)

11330

.addImm(27)

11331

.addImm(is8bit ? 28 : 27);

11332

if (!isLittleEndian)

11333

BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)

11334

.addReg(Shift1Reg)

11335

.addImm(is8bit ? 24 : 16);

11336

if (is64bit)

11337

BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)

11338

.addReg(Ptr1Reg)

11339

.addImm(0)

11340

.addImm(61);

11341

else

11342

BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)

11343

.addReg(Ptr1Reg)

11344

.addImm(0)

11345

.addImm(0)

11346

.addImm(29);

11347

BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);

11348

if (is8bit)

11349

BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);

11350

else {

11351

BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);

11352

BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)

11353

.addReg(Mask3Reg)

11354

.addImm(65535);

11355

}

11356

BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)

11357

.addReg(Mask2Reg)

11358

.addReg(ShiftReg);

11359

11360

BB = loopMBB;

11361

BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)

11362

.addReg(ZeroReg)

11363

.addReg(PtrReg);

11364

if (BinOpcode)

11365

BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)

11366

.addReg(Incr2Reg)

11367

.addReg(TmpDestReg);

11368

BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)

11369

.addReg(TmpDestReg)

11370

.addReg(MaskReg);

11371

BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);

11372

if (CmpOpcode) {

11373

// For unsigned comparisons, we can directly compare the shifted values.

11374

// For signed comparisons we shift and sign extend.

11375

Register SReg = RegInfo.createVirtualRegister(GPRC);

11376

BuildMI(BB, dl, TII->get(PPC::AND), SReg)

11377

.addReg(TmpDestReg)

11378

.addReg(MaskReg);

11379

unsigned ValueReg = SReg;

11380

unsigned CmpReg = Incr2Reg;

11381

if (CmpOpcode == PPC::CMPW) {

11382

ValueReg = RegInfo.createVirtualRegister(GPRC);

11383

BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)

11384

.addReg(SReg)

11385

.addReg(ShiftReg);

11386

Register ValueSReg = RegInfo.createVirtualRegister(GPRC);

11387

BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)

11388

.addReg(ValueReg);

11389

ValueReg = ValueSReg;

11390

CmpReg = incr;

11391

}

11392

BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)

11393

.addReg(CmpReg)

11394

.addReg(ValueReg);

11395

BuildMI(BB, dl, TII->get(PPC::BCC))

11396

.addImm(CmpPred)

11397

.addReg(PPC::CR0)

11398

.addMBB(exitMBB);

11399

BB->addSuccessor(loop2MBB);

11400

BB->addSuccessor(exitMBB);

11401

BB = loop2MBB;

11402

}

11403

BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);

11404

BuildMI(BB, dl, TII->get(PPC::STWCX))

11405

.addReg(Tmp4Reg)

11406

.addReg(ZeroReg)

11407

.addReg(PtrReg);

11408

BuildMI(BB, dl, TII->get(PPC::BCC))

11409

.addImm(PPC::PRED_NE)

11410

.addReg(PPC::CR0)

11411

.addMBB(loopMBB);

11412

BB->addSuccessor(loopMBB);

11413

BB->addSuccessor(exitMBB);

11414

11415

// exitMBB:

11416

// ...

11417

BB = exitMBB;

11418

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)

11419

.addReg(TmpDestReg)

11420

.addReg(ShiftReg);

11421

return BB;

11422

}

11423

11424

llvm::MachineBasicBlock *

11425

PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,

11426

MachineBasicBlock *MBB) const {

11427

DebugLoc DL = MI.getDebugLoc();

11428

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11429

const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();

11430

11431

MachineFunction *MF = MBB->getParent();

11432

MachineRegisterInfo &MRI = MF->getRegInfo();

11433

11434

const BasicBlock *BB = MBB->getBasicBlock();

11435

MachineFunction::iterator I = ++MBB->getIterator();

11436

11437

Register DstReg = MI.getOperand(0).getReg();

11438

const TargetRegisterClass *RC = MRI.getRegClass(DstReg);

11439

assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11439, __PRETTY_FUNCTION__));

11440

Register mainDstReg = MRI.createVirtualRegister(RC);

11441

Register restoreDstReg = MRI.createVirtualRegister(RC);

11442

11443

MVT PVT = getPointerTy(MF->getDataLayout());

11444

assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__))

11445

"Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__));

11446

// For v = setjmp(buf), we generate

11447

//

11448

// thisMBB:

11449

// SjLjSetup mainMBB

11450

// bl mainMBB

11451

// v_restore = 1

11452

// b sinkMBB

11453

//

11454

// mainMBB:

11455

// buf[LabelOffset] = LR

11456

// v_main = 0

11457

//

11458

// sinkMBB:

11459

// v = phi(main, restore)

11460

//

11461

11462

MachineBasicBlock *thisMBB = MBB;

11463

MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);

11464

MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);

11465

MF->insert(I, mainMBB);

11466

MF->insert(I, sinkMBB);

11467

11468

MachineInstrBuilder MIB;

11469

11470

// Transfer the remainder of BB and its successor edges to sinkMBB.

11471

sinkMBB->splice(sinkMBB->begin(), MBB,

11472

std::next(MachineBasicBlock::iterator(MI)), MBB->end());

11473

sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

11474

11475

// Note that the structure of the jmp_buf used here is not compatible

11476

// with that used by libc, and is not designed to be. Specifically, it

11477

// stores only those 'reserved' registers that LLVM does not otherwise

11478

// understand how to spill. Also, by convention, by the time this

11479

// intrinsic is called, Clang has already stored the frame address in the

11480

// first slot of the buffer and stack address in the third. Following the

11481

// X86 target code, we'll store the jump address in the second slot. We also

11482

// need to save the TOC pointer (R2) to handle jumps between shared

11483

// libraries, and that will be stored in the fourth slot. The thread

11484

// identifier (R13) is not affected.

11485

11486

// thisMBB:

11487

const int64_t LabelOffset = 1 * PVT.getStoreSize();

11488

const int64_t TOCOffset = 3 * PVT.getStoreSize();

11489

const int64_t BPOffset = 4 * PVT.getStoreSize();

11490

11491

// Prepare IP either in reg.

11492

const TargetRegisterClass *PtrRC = getRegClassFor(PVT);

11493

Register LabelReg = MRI.createVirtualRegister(PtrRC);

11494

Register BufReg = MI.getOperand(1).getReg();

11495

11496

if (Subtarget.is64BitELFABI()) {

11497

setUsesTOCBasePtr(*MBB->getParent());

11498

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))

11499

.addReg(PPC::X2)

11500

.addImm(TOCOffset)

11501

.addReg(BufReg)

11502

.cloneMemRefs(MI);

11503

}

11504

11505

// Naked functions never have a base pointer, and so we use r1. For all

11506

// other functions, this decision must be delayed until during PEI.

11507

unsigned BaseReg;

11508

if (MF->getFunction().hasFnAttribute(Attribute::Naked))

11509

BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;

11510

else

11511

BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;

11512

11513

MIB = BuildMI(*thisMBB, MI, DL,

11514

TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))

11515

.addReg(BaseReg)

11516

.addImm(BPOffset)

11517

.addReg(BufReg)

11518

.cloneMemRefs(MI);

11519

11520

// Setup

11521

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);

11522

MIB.addRegMask(TRI->getNoPreservedMask());

11523

11524

BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);

11525

11526

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))

11527

.addMBB(mainMBB);

11528

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);

11529

11530

thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());

11531

thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());

11532

11533

// mainMBB:

11534

// mainDstReg = 0

11535

MIB =

11536

BuildMI(mainMBB, DL,

11537

TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);

11538

11539

// Store IP

11540

if (Subtarget.isPPC64()) {

11541

MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))

11542

.addReg(LabelReg)

11543

.addImm(LabelOffset)

11544

.addReg(BufReg);

11545

} else {

11546

MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))

11547

.addReg(LabelReg)

11548

.addImm(LabelOffset)

11549

.addReg(BufReg);

11550

}

11551

MIB.cloneMemRefs(MI);

11552

11553

BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);

11554

mainMBB->addSuccessor(sinkMBB);

11555

11556

// sinkMBB:

11557

BuildMI(*sinkMBB, sinkMBB->begin(), DL,

11558

TII->get(PPC::PHI), DstReg)

11559

.addReg(mainDstReg).addMBB(mainMBB)

11560

.addReg(restoreDstReg).addMBB(thisMBB);

11561

11562

MI.eraseFromParent();

11563

return sinkMBB;

11564

}

11565

11566

MachineBasicBlock *

11567

PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,

11568

MachineBasicBlock *MBB) const {

11569

DebugLoc DL = MI.getDebugLoc();

11570

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11571

11572

MachineFunction *MF = MBB->getParent();

11573

MachineRegisterInfo &MRI = MF->getRegInfo();

11574

11575

MVT PVT = getPointerTy(MF->getDataLayout());

11576

assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__))

11577

"Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__));

11578

11579

const TargetRegisterClass *RC =

11580

(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

11581

Register Tmp = MRI.createVirtualRegister(RC);

11582

// Since FP is only updated here but NOT referenced, it's treated as GPR.

11583

unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;

11584

unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;

11585

unsigned BP =

11586

(PVT == MVT::i64)

11587

? PPC::X30

11588

: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29

11589

: PPC::R30);

11590

11591

MachineInstrBuilder MIB;

11592

11593

const int64_t LabelOffset = 1 * PVT.getStoreSize();

11594

const int64_t SPOffset = 2 * PVT.getStoreSize();

11595

const int64_t TOCOffset = 3 * PVT.getStoreSize();

11596

const int64_t BPOffset = 4 * PVT.getStoreSize();

11597

11598

Register BufReg = MI.getOperand(0).getReg();

11599

11600

// Reload FP (the jumped-to function may not have had a

11601

// frame pointer, and if so, then its r31 will be restored

11602

// as necessary).

11603

if (PVT == MVT::i64) {

11604

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)

11605

.addImm(0)

11606

.addReg(BufReg);

11607

} else {

11608

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)

11609

.addImm(0)

11610

.addReg(BufReg);

11611

}

11612

MIB.cloneMemRefs(MI);

11613

11614

// Reload IP

11615

if (PVT == MVT::i64) {

11616

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)

11617

.addImm(LabelOffset)

11618

.addReg(BufReg);

11619

} else {

11620

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)

11621

.addImm(LabelOffset)

11622

.addReg(BufReg);

11623

}

11624

MIB.cloneMemRefs(MI);

11625

11626

// Reload SP

11627

if (PVT == MVT::i64) {

11628

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)

11629

.addImm(SPOffset)

11630

.addReg(BufReg);

11631

} else {

11632

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)

11633

.addImm(SPOffset)

11634

.addReg(BufReg);

11635

}

11636

MIB.cloneMemRefs(MI);

11637

11638

// Reload BP

11639

if (PVT == MVT::i64) {

11640

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)

11641

.addImm(BPOffset)

11642

.addReg(BufReg);

11643

} else {

11644

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)

11645

.addImm(BPOffset)

11646

.addReg(BufReg);

11647

}

11648

MIB.cloneMemRefs(MI);

11649

11650

// Reload TOC

11651

if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {

11652

setUsesTOCBasePtr(*MBB->getParent());

11653

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)

11654

.addImm(TOCOffset)

11655

.addReg(BufReg)

11656

.cloneMemRefs(MI);

11657

}

11658

11659

// Jump

11660

BuildMI(*MBB, MI, DL,

11661

TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);

11662

BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));

11663

11664

MI.eraseFromParent();

11665

return MBB;

11666

}

11667

11668

bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {

11669

// If the function specifically requests inline stack probes, emit them.

11670

if (MF.getFunction().hasFnAttribute("probe-stack"))

11671

return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==

11672

"inline-asm";

11673

return false;

11674

}

11675

11676

unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {

11677

const TargetFrameLowering *TFI = Subtarget.getFrameLowering();

11678

unsigned StackAlign = TFI->getStackAlignment();

11679

assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__))

11680

"Unexpected stack alignment")((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__));

11681

// The default stack probe size is 4096 if the function has no

11682

// stack-probe-size attribute.

11683

unsigned StackProbeSize = 4096;

11684

const Function &Fn = MF.getFunction();

11685

if (Fn.hasFnAttribute("stack-probe-size"))

11686

Fn.getFnAttribute("stack-probe-size")

11687

.getValueAsString()

11688

.getAsInteger(0, StackProbeSize);

11689

// Round down to the stack alignment.

11690

StackProbeSize &= ~(StackAlign - 1);

11691

return StackProbeSize ? StackProbeSize : StackAlign;

11692

}

11693

11694

// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted

11695

// into three phases. In the first phase, it uses pseudo instruction

11696

// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and

11697

// FinalStackPtr. In the second phase, it generates a loop for probing blocks.

11698

// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of

11699

// MaxCallFrameSize so that it can calculate correct data area pointer.

11700

MachineBasicBlock *

11701

PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,

11702

MachineBasicBlock *MBB) const {

11703

const bool isPPC64 = Subtarget.isPPC64();

11704

MachineFunction *MF = MBB->getParent();

11705

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11706

DebugLoc DL = MI.getDebugLoc();

11707

const unsigned ProbeSize = getStackProbeSize(*MF);

11708

const BasicBlock *ProbedBB = MBB->getBasicBlock();

11709

MachineRegisterInfo &MRI = MF->getRegInfo();

11710

// The CFG of probing stack looks as

11711

// +-----+

11712

// | MBB |

11713

// +--+--+

11714

// |

11715

// +----v----+

11716

// +--->+ TestMBB +---+

11717

// | +----+----+ |

11718

// | | |

11719

// | +-----v----+ |

11720

// +---+ BlockMBB | |

11721

// +----------+ |

11722

// |

11723

// +---------+ |

11724

// | TailMBB +<--+

11725

// +---------+

11726

// In MBB, calculate previous frame pointer and final stack pointer.

11727

// In TestMBB, test if sp is equal to final stack pointer, if so, jump to

11728

// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.

11729

// TailMBB is spliced via \p MI.

11730

MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);

11731

MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);

11732

MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);

11733

11734

MachineFunction::iterator MBBIter = ++MBB->getIterator();

11735

MF->insert(MBBIter, TestMBB);

11736

MF->insert(MBBIter, BlockMBB);

11737

MF->insert(MBBIter, TailMBB);

11738

11739

const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;

11740

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

11741

11742

Register DstReg = MI.getOperand(0).getReg();

11743

Register NegSizeReg = MI.getOperand(1).getReg();

11744

Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;

11745

Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11746

Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11747

Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11748

11749

// Since value of NegSizeReg might be realigned in prologepilog, insert a

11750

// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and

11751

// NegSize.

11752

unsigned ProbeOpc;

11753

if (!MRI.hasOneNonDBGUse(NegSizeReg))

11754

ProbeOpc =

11755

isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;

11756

else

11757

// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg

11758

// and NegSizeReg will be allocated in the same phyreg to avoid

11759

// redundant copy when NegSizeReg has only one use which is current MI and

11760

// will be replaced by PREPARE_PROBED_ALLOCA then.

11761

ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64

11762

: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;

11763

BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)

11764

.addDef(ActualNegSizeReg)

11765

.addReg(NegSizeReg)

11766

.add(MI.getOperand(2))

11767

.add(MI.getOperand(3));

11768

11769

// Calculate final stack pointer, which equals to SP + ActualNegSize.

11770

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),

11771

FinalStackPtr)

11772

.addReg(SPReg)

11773

.addReg(ActualNegSizeReg);

11774

11775

// Materialize a scratch register for update.

11776

int64_t NegProbeSize = -(int64_t)ProbeSize;

11777

assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")((isInt<32>(NegProbeSize) && "Unhandled probe size!"
) ? static_cast<void> (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11777, __PRETTY_FUNCTION__));

11778

Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11779

if (!isInt<16>(NegProbeSize)) {

11780

Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11781

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)

11782

.addImm(NegProbeSize >> 16);

11783

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),

11784

ScratchReg)

11785

.addReg(TempReg)

11786

.addImm(NegProbeSize & 0xFFFF);

11787

} else

11788

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)

11789

.addImm(NegProbeSize);

11790

11791

{

11792

// Probing leading residual part.

11793

Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11794

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)

11795

.addReg(ActualNegSizeReg)

11796

.addReg(ScratchReg);

11797

Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11798

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)

11799

.addReg(Div)

11800

.addReg(ScratchReg);

11801

Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11802

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)

11803

.addReg(Mul)

11804

.addReg(ActualNegSizeReg);

11805

BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)

11806

.addReg(FramePointer)

11807

.addReg(SPReg)

11808

.addReg(NegMod);

11809

}

11810

11811

{

11812

// Remaining part should be multiple of ProbeSize.

11813

Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);

11814

BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)

11815

.addReg(SPReg)

11816

.addReg(FinalStackPtr);

11817

BuildMI(TestMBB, DL, TII->get(PPC::BCC))

11818

.addImm(PPC::PRED_EQ)

11819

.addReg(CmpResult)

11820

.addMBB(TailMBB);

11821

TestMBB->addSuccessor(BlockMBB);

11822

TestMBB->addSuccessor(TailMBB);

11823

}

11824

11825

{

11826

// Touch the block.

11827

// |P...|P...|P...

11828

BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)

11829

.addReg(FramePointer)

11830

.addReg(SPReg)

11831

.addReg(ScratchReg);

11832

BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);

11833

BlockMBB->addSuccessor(TestMBB);

11834

}

11835

11836

// Calculation of MaxCallFrameSize is deferred to prologepilog, use

11837

// DYNAREAOFFSET pseudo instruction to get the future result.

11838

Register MaxCallFrameSizeReg =

11839

MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

11840

BuildMI(TailMBB, DL,

11841

TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),

11842

MaxCallFrameSizeReg)

11843

.add(MI.getOperand(2))

11844

.add(MI.getOperand(3));

11845

BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)

11846

.addReg(SPReg)

11847

.addReg(MaxCallFrameSizeReg);

11848

11849

// Splice instructions after MI to TailMBB.

11850

TailMBB->splice(TailMBB->end(), MBB,

11851

std::next(MachineBasicBlock::iterator(MI)), MBB->end());

11852

TailMBB->transferSuccessorsAndUpdatePHIs(MBB);

11853

MBB->addSuccessor(TestMBB);

11854

11855

// Delete the pseudo instruction.

11856

MI.eraseFromParent();

11857

11858

++NumDynamicAllocaProbed;

11859

return TailMBB;

11860

}

11861

11862

MachineBasicBlock *

11863

PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

11864

MachineBasicBlock *BB) const {

11865

if (MI.getOpcode() == TargetOpcode::STACKMAP ||

11866

MI.getOpcode() == TargetOpcode::PATCHPOINT) {

11867

if (Subtarget.is64BitELFABI() &&

11868

MI.getOpcode() == TargetOpcode::PATCHPOINT &&

11869

!Subtarget.isUsingPCRelativeCalls()) {

11870

// Call lowering should have added an r2 operand to indicate a dependence

11871

// on the TOC base pointer value. It can't however, because there is no

11872

// way to mark the dependence as implicit there, and so the stackmap code

11873

// will confuse it with a regular operand. Instead, add the dependence

11874

// here.

11875

MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));

11876

}

11877

11878

return emitPatchPoint(MI, BB);

11879

}

11880

11881

if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||

11882

MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {

11883

return emitEHSjLjSetJmp(MI, BB);

11884

} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||

11885

MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {

11886

return emitEHSjLjLongJmp(MI, BB);

11887

}

11888

11889

const TargetInstrInfo *TII = Subtarget.getInstrInfo();

11890

11891

// To "insert" these instructions we actually have to insert their

11892

// control-flow patterns.

11893

const BasicBlock *LLVM_BB = BB->getBasicBlock();

11894

MachineFunction::iterator It = ++BB->getIterator();

11895

11896

MachineFunction *F = BB->getParent();

11897

11898

if (MI.getOpcode() == PPC::SELECT_CC_I4 ||

11899

MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||

11900

MI.getOpcode() == PPC::SELECT_I8) {

11901

SmallVector<MachineOperand, 2> Cond;

11902

if (MI.getOpcode() == PPC::SELECT_CC_I4 ||

11903

MI.getOpcode() == PPC::SELECT_CC_I8)

11904

Cond.push_back(MI.getOperand(4));

11905

else

11906

Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));

11907

Cond.push_back(MI.getOperand(1));

11908

11909

DebugLoc dl = MI.getDebugLoc();

11910

TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,

11911

MI.getOperand(2).getReg(), MI.getOperand(3).getReg());

11912

} else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||

11913

MI.getOpcode() == PPC::SELECT_CC_F8 ||

11914

MI.getOpcode() == PPC::SELECT_CC_F16 ||

11915

MI.getOpcode() == PPC::SELECT_CC_VRRC ||

11916

MI.getOpcode() == PPC::SELECT_CC_VSFRC ||

11917

MI.getOpcode() == PPC::SELECT_CC_VSSRC ||

11918

MI.getOpcode() == PPC::SELECT_CC_VSRC ||

11919

MI.getOpcode() == PPC::SELECT_CC_SPE4 ||

11920

MI.getOpcode() == PPC::SELECT_CC_SPE ||

11921

MI.getOpcode() == PPC::SELECT_F4 ||

11922

MI.getOpcode() == PPC::SELECT_F8 ||

11923

MI.getOpcode() == PPC::SELECT_F16 ||

11924

MI.getOpcode() == PPC::SELECT_SPE ||

11925

MI.getOpcode() == PPC::SELECT_SPE4 ||

11926

MI.getOpcode() == PPC::SELECT_VRRC ||

11927

MI.getOpcode() == PPC::SELECT_VSFRC ||

11928

MI.getOpcode() == PPC::SELECT_VSSRC ||

11929

MI.getOpcode() == PPC::SELECT_VSRC) {

11930

// The incoming instruction knows the destination vreg to set, the

11931

// condition code register to branch on, the true/false values to

11932

// select between, and a branch opcode to use.

11933

11934

// thisMBB:

11935

// ...

11936

// TrueVal = ...

11937

// cmpTY ccX, r1, r2

11938

// bCC copy1MBB

11939

// fallthrough --> copy0MBB

11940

MachineBasicBlock *thisMBB = BB;

11941

MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);

11942

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

11943

DebugLoc dl = MI.getDebugLoc();

11944

F->insert(It, copy0MBB);

11945

F->insert(It, sinkMBB);

11946

11947

// Transfer the remainder of BB and its successor edges to sinkMBB.

11948

sinkMBB->splice(sinkMBB->begin(), BB,

11949

std::next(MachineBasicBlock::iterator(MI)), BB->end());

11950

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

11951

11952

// Next, add the true and fallthrough blocks as its successors.

11953

BB->addSuccessor(copy0MBB);

11954

BB->addSuccessor(sinkMBB);

11955

11956

if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||

11957

MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||

11958

MI.getOpcode() == PPC::SELECT_F16 ||

11959

MI.getOpcode() == PPC::SELECT_SPE4 ||

11960

MI.getOpcode() == PPC::SELECT_SPE ||

11961

MI.getOpcode() == PPC::SELECT_VRRC ||

11962

MI.getOpcode() == PPC::SELECT_VSFRC ||

11963

MI.getOpcode() == PPC::SELECT_VSSRC ||

11964

MI.getOpcode() == PPC::SELECT_VSRC) {

11965

BuildMI(BB, dl, TII->get(PPC::BC))

11966

.addReg(MI.getOperand(1).getReg())

11967

.addMBB(sinkMBB);

11968

} else {

11969

unsigned SelectPred = MI.getOperand(4).getImm();

11970

BuildMI(BB, dl, TII->get(PPC::BCC))

11971

.addImm(SelectPred)

11972

.addReg(MI.getOperand(1).getReg())

11973

.addMBB(sinkMBB);

11974

}

11975

11976

// copy0MBB:

11977

// %FalseValue = ...

11978

// # fallthrough to sinkMBB

11979

BB = copy0MBB;

11980

11981

// Update machine-CFG edges

11982

BB->addSuccessor(sinkMBB);

11983

11984

// sinkMBB:

11985

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

11986

// ...

11987

BB = sinkMBB;

11988

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())

11989

.addReg(MI.getOperand(3).getReg())

11990

.addMBB(copy0MBB)

11991

.addReg(MI.getOperand(2).getReg())

11992

.addMBB(thisMBB);

11993

} else if (MI.getOpcode() == PPC::ReadTB) {

11994

// To read the 64-bit time-base register on a 32-bit target, we read the

11995

// two halves. Should the counter have wrapped while it was being read, we

11996

// need to try again.

11997

// ...

11998

// readLoop:

11999

// mfspr Rx,TBU # load from TBU

12000

// mfspr Ry,TB # load from TB

12001

// mfspr Rz,TBU # load from TBU

12002

// cmpw crX,Rx,Rz # check if 'old'='new'

12003

// bne readLoop # branch if they're not equal

12004

// ...

12005

12006

MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);

12007

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

12008

DebugLoc dl = MI.getDebugLoc();

12009

F->insert(It, readMBB);

12010

F->insert(It, sinkMBB);

12011

12012

// Transfer the remainder of BB and its successor edges to sinkMBB.

12013

sinkMBB->splice(sinkMBB->begin(), BB,

12014

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12015

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

12016

12017

BB->addSuccessor(readMBB);

12018

BB = readMBB;

12019

12020

MachineRegisterInfo &RegInfo = F->getRegInfo();

12021

Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);

12022

Register LoReg = MI.getOperand(0).getReg();

12023

Register HiReg = MI.getOperand(1).getReg();

12024

12025

BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);

12026

BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);

12027

BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);

12028

12029

Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);

12030

12031

BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)

12032

.addReg(HiReg)

12033

.addReg(ReadAgainReg);

12034

BuildMI(BB, dl, TII->get(PPC::BCC))

12035

.addImm(PPC::PRED_NE)

12036

.addReg(CmpReg)

12037

.addMBB(readMBB);

12038

12039

BB->addSuccessor(readMBB);

12040

BB->addSuccessor(sinkMBB);

12041

} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)

12042

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);

12043

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)

12044

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);

12045

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)

12046

BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);

12047

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)

12048

BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);

12049

12050

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)

12051

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);

12052

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)

12053

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);

12054

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)

12055

BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);

12056

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)

12057

BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);

12058

12059

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)

12060

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);

12061

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)

12062

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);

12063

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)

12064

BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);

12065

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)

12066

BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);

12067

12068

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)

12069

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);

12070

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)

12071

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);

12072

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)

12073

BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);

12074

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)

12075

BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);

12076

12077

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)

12078

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);

12079

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)

12080

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);

12081

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)

12082

BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);

12083

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)

12084

BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);

12085

12086

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)

12087

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);

12088

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)

12089

BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);

12090

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)

12091

BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);

12092

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)

12093

BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);

12094

12095

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)

12096

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);

12097

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)

12098

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);

12099

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)

12100

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);

12101

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)

12102

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);

12103

12104

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)

12105

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);

12106

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)

12107

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);

12108

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)

12109

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);

12110

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)

12111

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);

12112

12113

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)

12114

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);

12115

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)

12116

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);

12117

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)

12118

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);

12119

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)

12120

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);

12121

12122

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)

12123

BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);

12124

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)

12125

BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);

12126

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)

12127

BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);

12128

else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)

12129

BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);

12130

12131

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)

12132

BB = EmitPartwordAtomicBinary(MI, BB, true, 0);

12133

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)

12134

BB = EmitPartwordAtomicBinary(MI, BB, false, 0);

12135

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)

12136

BB = EmitAtomicBinary(MI, BB, 4, 0);

12137

else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)

12138

BB = EmitAtomicBinary(MI, BB, 8, 0);

12139

else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||

12140

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||

12141

(Subtarget.hasPartwordAtomics() &&

12142

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||

12143

(Subtarget.hasPartwordAtomics() &&

12144

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {

12145

bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;

12146

12147

auto LoadMnemonic = PPC::LDARX;

12148

auto StoreMnemonic = PPC::STDCX;

12149

switch (MI.getOpcode()) {

12150

default:

12151

llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12151);

12152

case PPC::ATOMIC_CMP_SWAP_I8:

12153

LoadMnemonic = PPC::LBARX;

12154

StoreMnemonic = PPC::STBCX;

12155

assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12155, __PRETTY_FUNCTION__));

12156

break;

12157

case PPC::ATOMIC_CMP_SWAP_I16:

12158

LoadMnemonic = PPC::LHARX;

12159

StoreMnemonic = PPC::STHCX;

12160

assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12160, __PRETTY_FUNCTION__));

12161

break;

12162

case PPC::ATOMIC_CMP_SWAP_I32:

12163

LoadMnemonic = PPC::LWARX;

12164

StoreMnemonic = PPC::STWCX;

12165

break;

12166

case PPC::ATOMIC_CMP_SWAP_I64:

12167

LoadMnemonic = PPC::LDARX;

12168

StoreMnemonic = PPC::STDCX;

12169

break;

12170

}

12171

Register dest = MI.getOperand(0).getReg();

12172

Register ptrA = MI.getOperand(1).getReg();

12173

Register ptrB = MI.getOperand(2).getReg();

12174

Register oldval = MI.getOperand(3).getReg();

12175

Register newval = MI.getOperand(4).getReg();

12176

DebugLoc dl = MI.getDebugLoc();

12177

12178

MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);

12179

MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);

12180

MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);

12181

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

12182

F->insert(It, loop1MBB);

12183

F->insert(It, loop2MBB);

12184

F->insert(It, midMBB);

12185

F->insert(It, exitMBB);

12186

exitMBB->splice(exitMBB->begin(), BB,

12187

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12188

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

12189

12190

// thisMBB:

12191

// ...

12192

// fallthrough --> loopMBB

12193

BB->addSuccessor(loop1MBB);

12194

12195

// loop1MBB:

12196

// l[bhwd]arx dest, ptr

12197

// cmp[wd] dest, oldval

12198

// bne- midMBB

12199

// loop2MBB:

12200

// st[bhwd]cx. newval, ptr

12201

// bne- loopMBB

12202

// b exitBB

12203

// midMBB:

12204

// st[bhwd]cx. dest, ptr

12205

// exitBB:

12206

BB = loop1MBB;

12207

BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);

12208

BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)

12209

.addReg(oldval)

12210

.addReg(dest);

12211

BuildMI(BB, dl, TII->get(PPC::BCC))

12212

.addImm(PPC::PRED_NE)

12213

.addReg(PPC::CR0)

12214

.addMBB(midMBB);

12215

BB->addSuccessor(loop2MBB);

12216

BB->addSuccessor(midMBB);

12217

12218

BB = loop2MBB;

12219

BuildMI(BB, dl, TII->get(StoreMnemonic))

12220

.addReg(newval)

12221

.addReg(ptrA)

12222

.addReg(ptrB);

12223

BuildMI(BB, dl, TII->get(PPC::BCC))

12224

.addImm(PPC::PRED_NE)

12225

.addReg(PPC::CR0)

12226

.addMBB(loop1MBB);

12227

BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);

12228

BB->addSuccessor(loop1MBB);

12229

BB->addSuccessor(exitMBB);

12230

12231

BB = midMBB;

12232

BuildMI(BB, dl, TII->get(StoreMnemonic))

12233

.addReg(dest)

12234

.addReg(ptrA)

12235

.addReg(ptrB);

12236

BB->addSuccessor(exitMBB);

12237

12238

// exitMBB:

12239

// ...

12240

BB = exitMBB;

12241

} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||

12242

MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {

12243

// We must use 64-bit registers for addresses when targeting 64-bit,

12244

// since we're actually doing arithmetic on them. Other registers

12245

// can be 32-bit.

12246

bool is64bit = Subtarget.isPPC64();

12247

bool isLittleEndian = Subtarget.isLittleEndian();

12248

bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

12249

12250

Register dest = MI.getOperand(0).getReg();

12251

Register ptrA = MI.getOperand(1).getReg();

12252

Register ptrB = MI.getOperand(2).getReg();

12253

Register oldval = MI.getOperand(3).getReg();

12254

Register newval = MI.getOperand(4).getReg();

12255

DebugLoc dl = MI.getDebugLoc();

12256

12257

MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);

12258

MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);

12259

MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);

12260

MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);

12261

F->insert(It, loop1MBB);

12262

F->insert(It, loop2MBB);

12263

F->insert(It, midMBB);

12264

F->insert(It, exitMBB);

12265

exitMBB->splice(exitMBB->begin(), BB,

12266

std::next(MachineBasicBlock::iterator(MI)), BB->end());

12267

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

12268

12269

MachineRegisterInfo &RegInfo = F->getRegInfo();

12270

const TargetRegisterClass *RC =

12271

is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

12272

const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

12273

12274

Register PtrReg = RegInfo.createVirtualRegister(RC);

12275

Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);

12276

Register ShiftReg =

12277

isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);

12278

Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);

12279

Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);

12280

Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);

12281

Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);

12282

Register MaskReg = RegInfo.createVirtualRegister(GPRC);

12283

Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);

12284

Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);

12285

Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);

12286

Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);

12287

Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);

12288

Register Ptr1Reg;

12289

Register TmpReg = RegInfo.createVirtualRegister(GPRC);

12290

Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

12291

// thisMBB:

12292

// ...

12293

// fallthrough --> loopMBB

12294

BB->addSuccessor(loop1MBB);

12295

12296

// The 4-byte load must be aligned, while a char or short may be

12297

// anywhere in the word. Hence all this nasty bookkeeping code.

12298

// add ptr1, ptrA, ptrB [copy if ptrA==0]

12299

// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]

12300

// xori shift, shift1, 24 [16]

12301

// rlwinm ptr, ptr1, 0, 0, 29

12302

// slw newval2, newval, shift

12303

// slw oldval2, oldval,shift

12304

// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]

12305

// slw mask, mask2, shift

12306

// and newval3, newval2, mask

12307

// and oldval3, oldval2, mask

12308

// loop1MBB:

12309

// lwarx tmpDest, ptr

12310

// and tmp, tmpDest, mask

12311

// cmpw tmp, oldval3

12312

// bne- midMBB

12313

// loop2MBB:

12314

// andc tmp2, tmpDest, mask

12315

// or tmp4, tmp2, newval3

12316

// stwcx. tmp4, ptr

12317

// bne- loop1MBB

12318

// b exitBB

12319

// midMBB:

12320

// stwcx. tmpDest, ptr

12321

// exitBB:

12322

// srw dest, tmpDest, shift

12323

if (ptrA != ZeroReg) {

12324

Ptr1Reg = RegInfo.createVirtualRegister(RC);

12325

BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)

12326

.addReg(ptrA)

12327

.addReg(ptrB);

12328

} else {

12329

Ptr1Reg = ptrB;

12330

}

12331

12332

// We need use 32-bit subregister to avoid mismatch register class in 64-bit

12333

// mode.

12334

BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)

12335

.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)

12336

.addImm(3)

12337

.addImm(27)

12338

.addImm(is8bit ? 28 : 27);

12339

if (!isLittleEndian)

12340

BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)

12341

.addReg(Shift1Reg)

12342

.addImm(is8bit ? 24 : 16);

12343

if (is64bit)

12344

BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)

12345

.addReg(Ptr1Reg)

12346

.addImm(0)

12347

.addImm(61);

12348

else

12349

BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)

12350

.addReg(Ptr1Reg)

12351

.addImm(0)

12352

.addImm(0)

12353

.addImm(29);

12354

BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)

12355

.addReg(newval)

12356

.addReg(ShiftReg);

12357

BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)

12358

.addReg(oldval)

12359

.addReg(ShiftReg);

12360

if (is8bit)

12361

BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);

12362

else {

12363

BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);

12364

BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)

12365

.addReg(Mask3Reg)

12366

.addImm(65535);

12367

}

12368

BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)

12369

.addReg(Mask2Reg)

12370

.addReg(ShiftReg);

12371

BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)

12372

.addReg(NewVal2Reg)

12373

.addReg(MaskReg);

12374

BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)

12375

.addReg(OldVal2Reg)

12376

.addReg(MaskReg);

12377

12378

BB = loop1MBB;

12379

BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)

12380

.addReg(ZeroReg)

12381

.addReg(PtrReg);

12382

BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)

12383

.addReg(TmpDestReg)

12384

.addReg(MaskReg);

12385

BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)

12386

.addReg(TmpReg)

12387

.addReg(OldVal3Reg);

12388

BuildMI(BB, dl, TII->get(PPC::BCC))

12389

.addImm(PPC::PRED_NE)

12390

.addReg(PPC::CR0)

12391

.addMBB(midMBB);

12392

BB->addSuccessor(loop2MBB);

12393

BB->addSuccessor(midMBB);

12394

12395

BB = loop2MBB;

12396

BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)

12397

.addReg(TmpDestReg)

12398

.addReg(MaskReg);

12399

BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)

12400

.addReg(Tmp2Reg)

12401

.addReg(NewVal3Reg);

12402

BuildMI(BB, dl, TII->get(PPC::STWCX))

12403

.addReg(Tmp4Reg)

12404

.addReg(ZeroReg)

12405

.addReg(PtrReg);

12406

BuildMI(BB, dl, TII->get(PPC::BCC))

12407

.addImm(PPC::PRED_NE)

12408

.addReg(PPC::CR0)

12409

.addMBB(loop1MBB);

12410

BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);

12411

BB->addSuccessor(loop1MBB);

12412

BB->addSuccessor(exitMBB);

12413

12414

BB = midMBB;

12415

BuildMI(BB, dl, TII->get(PPC::STWCX))

12416

.addReg(TmpDestReg)

12417

.addReg(ZeroReg)

12418

.addReg(PtrReg);

12419

BB->addSuccessor(exitMBB);

12420

12421

// exitMBB:

12422

// ...

12423

BB = exitMBB;

12424

BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)

12425

.addReg(TmpReg)

12426

.addReg(ShiftReg);

12427

} else if (MI.getOpcode() == PPC::FADDrtz) {

12428

// This pseudo performs an FADD with rounding mode temporarily forced

12429

// to round-to-zero. We emit this via custom inserter since the FPSCR

12430

// is not modeled at the SelectionDAG level.

12431

Register Dest = MI.getOperand(0).getReg();

12432

Register Src1 = MI.getOperand(1).getReg();

12433

Register Src2 = MI.getOperand(2).getReg();

12434

DebugLoc dl = MI.getDebugLoc();

12435

12436

MachineRegisterInfo &RegInfo = F->getRegInfo();

12437

Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

12438

12439

// Save FPSCR value.

12440

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);

12441

12442

// Set rounding mode to round-to-zero.

12443

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))

12444

.addImm(31)

12445

.addReg(PPC::RM, RegState::ImplicitDefine);

12446

12447

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))

12448

.addImm(30)

12449

.addReg(PPC::RM, RegState::ImplicitDefine);

12450

12451

// Perform addition.

12452

auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)

12453

.addReg(Src1)

12454

.addReg(Src2);

12455

if (MI.getFlag(MachineInstr::NoFPExcept))

12456

MIB.setMIFlag(MachineInstr::NoFPExcept);

12457

12458

// Restore FPSCR value.

12459

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);

12460

} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||

12461

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||

12462

MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||

12463

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {

12464

unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||

12465

MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)

12466

? PPC::ANDI8_rec

12467

: PPC::ANDI_rec;

12468

bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||

12469

MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);

12470

12471

MachineRegisterInfo &RegInfo = F->getRegInfo();

12472

Register Dest = RegInfo.createVirtualRegister(

12473

Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);

12474

12475

DebugLoc Dl = MI.getDebugLoc();

12476

BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)

12477

.addReg(MI.getOperand(1).getReg())

12478

.addImm(1);

12479

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12480

MI.getOperand(0).getReg())

12481

.addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);

12482

} else if (MI.getOpcode() == PPC::TCHECK_RET) {

12483

DebugLoc Dl = MI.getDebugLoc();

12484

MachineRegisterInfo &RegInfo = F->getRegInfo();

12485

Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);

12486

BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);

12487

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12488

MI.getOperand(0).getReg())

12489

.addReg(CRReg);

12490

} else if (MI.getOpcode() == PPC::TBEGIN_RET) {

12491

DebugLoc Dl = MI.getDebugLoc();

12492

unsigned Imm = MI.getOperand(1).getImm();

12493

BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);

12494

BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),

12495

MI.getOperand(0).getReg())

12496

.addReg(PPC::CR0EQ);

12497

} else if (MI.getOpcode() == PPC::SETRNDi) {

12498

DebugLoc dl = MI.getDebugLoc();

12499

Register OldFPSCRReg = MI.getOperand(0).getReg();

12500

12501

// Save FPSCR value.

12502

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

12503

12504

// The floating point rounding mode is in the bits 62:63 of FPCSR, and has

12505

// the following settings:

12506

// 00 Round to nearest

12507

// 01 Round to 0

12508

// 10 Round to +inf

12509

// 11 Round to -inf

12510

12511

// When the operand is immediate, using the two least significant bits of

12512

// the immediate to set the bits 62:63 of FPSCR.

12513

unsigned Mode = MI.getOperand(1).getImm();

12514

BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))

12515

.addImm(31)

12516

.addReg(PPC::RM, RegState::ImplicitDefine);

12517

12518

BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))

12519

.addImm(30)

12520

.addReg(PPC::RM, RegState::ImplicitDefine);

12521

} else if (MI.getOpcode() == PPC::SETRND) {

12522

DebugLoc dl = MI.getDebugLoc();

12523

12524

// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg

12525

// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.

12526

// If the target doesn't have DirectMove, we should use stack to do the

12527

// conversion, because the target doesn't have the instructions like mtvsrd

12528

// or mfvsrd to do this conversion directly.

12529

auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {

12530

if (Subtarget.hasDirectMove()) {

12531

BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)

12532

.addReg(SrcReg);

12533

} else {

12534

// Use stack to do the register copy.

12535

unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;

12536

MachineRegisterInfo &RegInfo = F->getRegInfo();

12537

const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);

12538

if (RC == &PPC::F8RCRegClass) {

12539

// Copy register from F8RCRegClass to G8RCRegclass.

12540

assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__))

12541

"Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__));

12542

12543

StoreOp = PPC::STFD;

12544

LoadOp = PPC::LD;

12545

} else {

12546

// Copy register from G8RCRegClass to F8RCRegclass.

12547

assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))

12548

(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))

12549

"Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__));

12550

}

12551

12552

MachineFrameInfo &MFI = F->getFrameInfo();

12553

int FrameIdx = MFI.CreateStackObject(8, Align(8), false);

12554

12555

MachineMemOperand *MMOStore = F->getMachineMemOperand(

12556

MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),

12557

MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),

12558

MFI.getObjectAlign(FrameIdx));

12559

12560

// Store the SrcReg into the stack.

12561

BuildMI(*BB, MI, dl, TII->get(StoreOp))

12562

.addReg(SrcReg)

12563

.addImm(0)

12564

.addFrameIndex(FrameIdx)

12565

.addMemOperand(MMOStore);

12566

12567

MachineMemOperand *MMOLoad = F->getMachineMemOperand(

12568

MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),

12569

MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),

12570

MFI.getObjectAlign(FrameIdx));

12571

12572

// Load from the stack where SrcReg is stored, and save to DestReg,

12573

// so we have done the RegClass conversion from RegClass::SrcReg to

12574

// RegClass::DestReg.

12575

BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)

12576

.addImm(0)

12577

.addFrameIndex(FrameIdx)

12578

.addMemOperand(MMOLoad);

12579

}

12580

};

12581

12582

Register OldFPSCRReg = MI.getOperand(0).getReg();

12583

12584

// Save FPSCR value.

12585

BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

12586

12587

// When the operand is gprc register, use two least significant bits of the

12588

// register and mtfsf instruction to set the bits 62:63 of FPSCR.

12589

//

12590

// copy OldFPSCRTmpReg, OldFPSCRReg

12591

// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)

12592

// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62

12593

// copy NewFPSCRReg, NewFPSCRTmpReg

12594

// mtfsf 255, NewFPSCRReg

12595

MachineOperand SrcOp = MI.getOperand(1);

12596

MachineRegisterInfo &RegInfo = F->getRegInfo();

12597

Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12598

12599

copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);

12600

12601

Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12602

Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12603

12604

// The first operand of INSERT_SUBREG should be a register which has

12605

// subregisters, we only care about its RegClass, so we should use an

12606

// IMPLICIT_DEF register.

12607

BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);

12608

BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)

12609

.addReg(ImDefReg)

12610

.add(SrcOp)

12611

.addImm(1);

12612

12613

Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

12614

BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)

12615

.addReg(OldFPSCRTmpReg)

12616

.addReg(ExtSrcReg)

12617

.addImm(0)

12618

.addImm(62);

12619

12620

Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

12621

copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);

12622

12623

// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63

12624

// bits of FPSCR.

12625

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))

12626

.addImm(255)

12627

.addReg(NewFPSCRReg)

12628

.addImm(0)

12629

.addImm(0);

12630

} else if (MI.getOpcode() == PPC::SETFLM) {

12631

DebugLoc Dl = MI.getDebugLoc();

12632

12633

// Result of setflm is previous FPSCR content, so we need to save it first.

12634

Register OldFPSCRReg = MI.getOperand(0).getReg();

12635

BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);

12636

12637

// Put bits in 32:63 to FPSCR.

12638

Register NewFPSCRReg = MI.getOperand(1).getReg();

12639

BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))

12640

.addImm(255)

12641

.addReg(NewFPSCRReg)

12642

.addImm(0)

12643

.addImm(0);

12644

} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||

12645

MI.getOpcode() == PPC::PROBED_ALLOCA_64) {

12646

return emitProbedAlloca(MI, BB);

12647

} else {

12648

llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12648);

12649

}

12650

12651

MI.eraseFromParent(); // The pseudo instruction is gone now.

12652

return BB;

12653

}

12654

12655

//===----------------------------------------------------------------------===//

12656

// Target Optimization Hooks

12657

//===----------------------------------------------------------------------===//

12658

12659

static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {

12660

// For the estimates, convergence is quadratic, so we essentially double the

12661

// number of digits correct after every iteration. For both FRE and FRSQRTE,

12662

// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),

12663

// this is 2^-14. IEEE float has 23 digits and double has 52 digits.

12664

int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;

12665

if (VT.getScalarType() == MVT::f64)

12666

RefinementSteps++;

12667

return RefinementSteps;

12668

}

12669

12670

SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,

12671

int Enabled, int &RefinementSteps,

12672

bool &UseOneConstNR,

12673

bool Reciprocal) const {

12674

EVT VT = Operand.getValueType();

12675

if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||

12676

(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||

12677

(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

12678

(VT == MVT::v2f64 && Subtarget.hasVSX())) {

12679

if (RefinementSteps == ReciprocalEstimate::Unspecified)

12680

RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

12681

12682

// The Newton-Raphson computation with a single constant does not provide

12683

// enough accuracy on some CPUs.

12684

UseOneConstNR = !Subtarget.needsTwoConstNR();

12685

return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);

12686

}

12687

return SDValue();

12688

}

12689

12690

SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,

12691

int Enabled,

12692

int &RefinementSteps) const {

12693

EVT VT = Operand.getValueType();

12694

if ((VT == MVT::f32 && Subtarget.hasFRES()) ||

12695

(VT == MVT::f64 && Subtarget.hasFRE()) ||

12696

(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

12697

(VT == MVT::v2f64 && Subtarget.hasVSX())) {

12698

if (RefinementSteps == ReciprocalEstimate::Unspecified)

12699

RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

12700

return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);

12701

}

12702

return SDValue();

12703

}

12704

12705

unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {

12706

// Note: This functionality is used only when unsafe-fp-math is enabled, and

12707

// on cores with reciprocal estimates (which are used when unsafe-fp-math is

12708

// enabled for division), this functionality is redundant with the default

12709

// combiner logic (once the division -> reciprocal/multiply transformation

12710

// has taken place). As a result, this matters more for older cores than for

12711

// newer ones.

12712

12713

// Combine multiple FDIVs with the same divisor into multiple FMULs by the

12714

// reciprocal if there are two or more FDIVs (for embedded cores with only

12715

// one FP pipeline) for three or more FDIVs (for generic OOO cores).

12716

switch (Subtarget.getCPUDirective()) {

12717

default:

12718

return 3;

12719

case PPC::DIR_440:

12720

case PPC::DIR_A2:

12721

case PPC::DIR_E500:

12722

case PPC::DIR_E500mc:

12723

case PPC::DIR_E5500:

12724

return 2;

12725

}

12726

}

12727

12728

// isConsecutiveLSLoc needs to work even if all adds have not yet been

12729

// collapsed, and so we need to look through chains of them.

12730

static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,

12731

int64_t& Offset, SelectionDAG &DAG) {

12732

if (DAG.isBaseWithConstantOffset(Loc)) {

12733

Base = Loc.getOperand(0);

12734

Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();

12735

12736

// The base might itself be a base plus an offset, and if so, accumulate

12737

// that as well.

12738

getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);

12739

}

12740

}

12741

12742

static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,

12743

unsigned Bytes, int Dist,

12744

SelectionDAG &DAG) {

12745

if (VT.getSizeInBits() / 8 != Bytes)

12746

return false;

12747

12748

SDValue BaseLoc = Base->getBasePtr();

12749

if (Loc.getOpcode() == ISD::FrameIndex) {

12750

if (BaseLoc.getOpcode() != ISD::FrameIndex)

12751

return false;

12752

const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

12753

int FI = cast<FrameIndexSDNode>(Loc)->getIndex();

12754

int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();

12755

int FS = MFI.getObjectSize(FI);

12756

int BFS = MFI.getObjectSize(BFI);

12757

if (FS != BFS || FS != (int)Bytes) return false;

12758

return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);

12759

}

12760

12761

SDValue Base1 = Loc, Base2 = BaseLoc;

12762

int64_t Offset1 = 0, Offset2 = 0;

12763

getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);

12764

getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);

12765

if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))

12766

return true;

12767

12768

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

12769

const GlobalValue *GV1 = nullptr;

12770

const GlobalValue *GV2 = nullptr;

12771

Offset1 = 0;

12772

Offset2 = 0;

12773

bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);

12774

bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);

12775

if (isGA1 && isGA2 && GV1 == GV2)

12776

return Offset1 == (Offset2 + Dist*Bytes);

12777

return false;

12778

}

12779

12780

// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does

12781

// not enforce equality of the chain operands.

12782

static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,

12783

unsigned Bytes, int Dist,

12784

SelectionDAG &DAG) {

12785

if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {

12786

EVT VT = LS->getMemoryVT();

12787

SDValue Loc = LS->getBasePtr();

12788

return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);

12789

}

12790

12791

if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {

12792

EVT VT;

12793

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

12794

default: return false;

12795

case Intrinsic::ppc_altivec_lvx:

12796

case Intrinsic::ppc_altivec_lvxl:

12797

case Intrinsic::ppc_vsx_lxvw4x:

12798

case Intrinsic::ppc_vsx_lxvw4x_be:

12799

VT = MVT::v4i32;

12800

break;

12801

case Intrinsic::ppc_vsx_lxvd2x:

12802

case Intrinsic::ppc_vsx_lxvd2x_be:

12803

VT = MVT::v2f64;

12804

break;

12805

case Intrinsic::ppc_altivec_lvebx:

12806

VT = MVT::i8;

12807

break;

12808

case Intrinsic::ppc_altivec_lvehx:

12809

VT = MVT::i16;

12810

break;

12811

case Intrinsic::ppc_altivec_lvewx:

12812

VT = MVT::i32;

12813

break;

12814

}

12815

12816

return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);

12817

}

12818

12819

if (N->getOpcode() == ISD::INTRINSIC_VOID) {

12820

EVT VT;

12821

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

12822

default: return false;

12823

case Intrinsic::ppc_altivec_stvx:

12824

case Intrinsic::ppc_altivec_stvxl:

12825

case Intrinsic::ppc_vsx_stxvw4x:

12826

VT = MVT::v4i32;

12827

break;

12828

case Intrinsic::ppc_vsx_stxvd2x:

12829

VT = MVT::v2f64;

12830

break;

12831

case Intrinsic::ppc_vsx_stxvw4x_be:

12832

VT = MVT::v4i32;

12833

break;

12834

case Intrinsic::ppc_vsx_stxvd2x_be:

12835

VT = MVT::v2f64;

12836

break;

12837

case Intrinsic::ppc_altivec_stvebx:

12838

VT = MVT::i8;

12839

break;

12840

case Intrinsic::ppc_altivec_stvehx:

12841

VT = MVT::i16;

12842

break;

12843

case Intrinsic::ppc_altivec_stvewx:

12844

VT = MVT::i32;

12845

break;

12846

}

12847

12848

return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);

12849

}

12850

12851

return false;

12852

}

12853

12854

// Return true is there is a nearyby consecutive load to the one provided

12855

// (regardless of alignment). We search up and down the chain, looking though

12856

// token factors and other loads (but nothing else). As a result, a true result

12857

// indicates that it is safe to create a new consecutive load adjacent to the

12858

// load provided.

12859

static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {

12860

SDValue Chain = LD->getChain();

12861

EVT VT = LD->getMemoryVT();

12862

12863

SmallSet<SDNode *, 16> LoadRoots;

12864

SmallVector<SDNode *, 8> Queue(1, Chain.getNode());

12865

SmallSet<SDNode *, 16> Visited;

12866

12867

// First, search up the chain, branching to follow all token-factor operands.

12868

// If we find a consecutive load, then we're done, otherwise, record all

12869

// nodes just above the top-level loads and token factors.

12870

while (!Queue.empty()) {

12871

SDNode *ChainNext = Queue.pop_back_val();

12872

if (!Visited.insert(ChainNext).second)

12873

continue;

12874

12875

if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {

12876

if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))

12877

return true;

12878

12879

if (!Visited.count(ChainLD->getChain().getNode()))

12880

Queue.push_back(ChainLD->getChain().getNode());

12881

} else if (ChainNext->getOpcode() == ISD::TokenFactor) {

12882

for (const SDUse &O : ChainNext->ops())

12883

if (!Visited.count(O.getNode()))

12884

Queue.push_back(O.getNode());

12885

} else

12886

LoadRoots.insert(ChainNext);

12887

}

12888

12889

// Second, search down the chain, starting from the top-level nodes recorded

12890

// in the first phase. These top-level nodes are the nodes just above all

12891

// loads and token factors. Starting with their uses, recursively look though

12892

// all loads (just the chain uses) and token factors to find a consecutive

12893

// load.

12894

Visited.clear();

12895

Queue.clear();

12896

12897

for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),

12898

IE = LoadRoots.end(); I != IE; ++I) {

12899

Queue.push_back(*I);

12900

12901

while (!Queue.empty()) {

12902

SDNode *LoadRoot = Queue.pop_back_val();

12903

if (!Visited.insert(LoadRoot).second)

12904

continue;

12905

12906

if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))

12907

if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))

12908

return true;

12909

12910

for (SDNode::use_iterator UI = LoadRoot->use_begin(),

12911

UE = LoadRoot->use_end(); UI != UE; ++UI)

12912

if (((isa<MemSDNode>(*UI) &&

12913

cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||

12914

UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))

12915

Queue.push_back(*UI);

12916

}

12917

}

12918

12919

return false;

12920

}

12921

12922

/// This function is called when we have proved that a SETCC node can be replaced

12923

/// by subtraction (and other supporting instructions) so that the result of

12924

/// comparison is kept in a GPR instead of CR. This function is purely for

12925

/// codegen purposes and has some flags to guide the codegen process.

12926

static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,

12927

bool Swap, SDLoc &DL, SelectionDAG &DAG) {

12928

assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12928, __PRETTY_FUNCTION__));

12929

12930

// Zero extend the operands to the largest legal integer. Originally, they

12931

// must be of a strictly smaller size.

12932

auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),

12933

DAG.getConstant(Size, DL, MVT::i32));

12934

auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),

12935

DAG.getConstant(Size, DL, MVT::i32));

12936

12937

// Swap if needed. Depends on the condition code.

12938

if (Swap)

12939

std::swap(Op0, Op1);

12940

12941

// Subtract extended integers.

12942

auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);

12943

12944

// Move the sign bit to the least significant position and zero out the rest.

12945

// Now the least significant bit carries the result of original comparison.

12946

auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,

12947

DAG.getConstant(Size - 1, DL, MVT::i32));

12948

auto Final = Shifted;

12949

12950

// Complement the result if needed. Based on the condition code.

12951

if (Complement)

12952

Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,

12953

DAG.getConstant(1, DL, MVT::i64));

12954

12955

return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);

12956

}

12957

12958

SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,

12959

DAGCombinerInfo &DCI) const {

12960

assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12960, __PRETTY_FUNCTION__));

12961

12962

SelectionDAG &DAG = DCI.DAG;

12963

SDLoc DL(N);

12964

12965

// Size of integers being compared has a critical role in the following

12966

// analysis, so we prefer to do this when all types are legal.

12967

if (!DCI.isAfterLegalizeDAG())

12968

return SDValue();

12969

12970

// If all users of SETCC extend its value to a legal integer type

12971

// then we replace SETCC with a subtraction

12972

for (SDNode::use_iterator UI = N->use_begin(),

12973

UE = N->use_end(); UI != UE; ++UI) {

12974

if (UI->getOpcode() != ISD::ZERO_EXTEND)

12975

return SDValue();

12976

}

12977

12978

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

12979

auto OpSize = N->getOperand(0).getValueSizeInBits();

12980

12981

unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();

12982

12983

if (OpSize < Size) {

12984

switch (CC) {

12985

default: break;

12986

case ISD::SETULT:

12987

return generateEquivalentSub(N, Size, false, false, DL, DAG);

12988

case ISD::SETULE:

12989

return generateEquivalentSub(N, Size, true, true, DL, DAG);

12990

case ISD::SETUGT:

12991

return generateEquivalentSub(N, Size, false, true, DL, DAG);

12992

case ISD::SETUGE:

12993

return generateEquivalentSub(N, Size, true, false, DL, DAG);

12994

}

12995

}

12996

12997

return SDValue();

12998

}

12999

13000

SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,

13001

DAGCombinerInfo &DCI) const {

13002

SelectionDAG &DAG = DCI.DAG;

13003

SDLoc dl(N);

13004

13005

assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13005, __PRETTY_FUNCTION__));

13006

// If we're tracking CR bits, we need to be careful that we don't have:

13007

// trunc(binary-ops(zext(x), zext(y)))

13008

// or

13009

// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)

13010

// such that we're unnecessarily moving things into GPRs when it would be

13011

// better to keep them in CR bits.

13012

13013

// Note that trunc here can be an actual i1 trunc, or can be the effective

13014

// truncation that comes from a setcc or select_cc.

13015

if (N->getOpcode() == ISD::TRUNCATE &&

13016

N->getValueType(0) != MVT::i1)

13017

return SDValue();

13018

13019

if (N->getOperand(0).getValueType() != MVT::i32 &&

13020

N->getOperand(0).getValueType() != MVT::i64)

13021

return SDValue();

13022

13023

if (N->getOpcode() == ISD::SETCC ||

13024

N->getOpcode() == ISD::SELECT_CC) {

13025

// If we're looking at a comparison, then we need to make sure that the

13026

// high bits (all except for the first) don't matter the result.

13027

ISD::CondCode CC =

13028

cast<CondCodeSDNode>(N->getOperand(

13029

N->getOpcode() == ISD::SETCC ? 2 : 4))->get();

13030

unsigned OpBits = N->getOperand(0).getValueSizeInBits();

13031

13032

if (ISD::isSignedIntSetCC(CC)) {

13033

if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||

13034

DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)

13035

return SDValue();

13036

} else if (ISD::isUnsignedIntSetCC(CC)) {

13037

if (!DAG.MaskedValueIsZero(N->getOperand(0),

13038

APInt::getHighBitsSet(OpBits, OpBits-1)) ||

13039

!DAG.MaskedValueIsZero(N->getOperand(1),

13040

APInt::getHighBitsSet(OpBits, OpBits-1)))

13041

return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)

13042

: SDValue());

13043

} else {

13044

// This is neither a signed nor an unsigned comparison, just make sure

13045

// that the high bits are equal.

13046

KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));

13047

KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));

13048

13049

// We don't really care about what is known about the first bit (if

13050

// anything), so clear it in all masks prior to comparing them.

13051

Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);

13052

Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);

13053

13054

if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)

13055

return SDValue();

13056

}

13057

}

13058

13059

// We now know that the higher-order bits are irrelevant, we just need to

13060

// make sure that all of the intermediate operations are bit operations, and

13061

// all inputs are extensions.

13062

if (N->getOperand(0).getOpcode() != ISD::AND &&

13063

N->getOperand(0).getOpcode() != ISD::OR &&

13064

N->getOperand(0).getOpcode() != ISD::XOR &&

13065

N->getOperand(0).getOpcode() != ISD::SELECT &&

13066

N->getOperand(0).getOpcode() != ISD::SELECT_CC &&

13067

N->getOperand(0).getOpcode() != ISD::TRUNCATE &&

13068

N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&

13069

N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&

13070

N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)

13071

return SDValue();

13072

13073

if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&

13074

N->getOperand(1).getOpcode() != ISD::AND &&

13075

N->getOperand(1).getOpcode() != ISD::OR &&

13076

N->getOperand(1).getOpcode() != ISD::XOR &&

13077

N->getOperand(1).getOpcode() != ISD::SELECT &&

13078

N->getOperand(1).getOpcode() != ISD::SELECT_CC &&

13079

N->getOperand(1).getOpcode() != ISD::TRUNCATE &&

13080

N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&

13081

N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&

13082

N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)

13083

return SDValue();

13084

13085

SmallVector<SDValue, 4> Inputs;

13086

SmallVector<SDValue, 8> BinOps, PromOps;

13087

SmallPtrSet<SDNode *, 16> Visited;

13088

13089

for (unsigned i = 0; i < 2; ++i) {

13090

if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13091

N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13092

N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

13093

N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

13094

isa<ConstantSDNode>(N->getOperand(i)))

13095

Inputs.push_back(N->getOperand(i));

13096

else

13097

BinOps.push_back(N->getOperand(i));

13098

13099

if (N->getOpcode() == ISD::TRUNCATE)

13100

break;

13101

}

13102

13103

// Visit all inputs, collect all binary operations (and, or, xor and

13104

// select) that are all fed by extensions.

13105

while (!BinOps.empty()) {

13106

SDValue BinOp = BinOps.back();

13107

BinOps.pop_back();

13108

13109

if (!Visited.insert(BinOp.getNode()).second)

13110

continue;

13111

13112

PromOps.push_back(BinOp);

13113

13114

for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

13115

// The condition of the select is not promoted.

13116

if (BinOp.getOpcode() == ISD::SELECT && i == 0)

13117

continue;

13118

if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

13119

continue;

13120

13121

if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13122

BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13123

BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

13124

BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

13125

isa<ConstantSDNode>(BinOp.getOperand(i))) {

13126

Inputs.push_back(BinOp.getOperand(i));

13127

} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

13128

BinOp.getOperand(i).getOpcode() == ISD::OR ||

13129

BinOp.getOperand(i).getOpcode() == ISD::XOR ||

13130

BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

13131

BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||

13132

BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

13133

BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

13134

BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

13135

BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {

13136

BinOps.push_back(BinOp.getOperand(i));

13137

} else {

13138

// We have an input that is not an extension or another binary

13139

// operation; we'll abort this transformation.

13140

return SDValue();

13141

}

13142

}

13143

}

13144

13145

// Make sure that this is a self-contained cluster of operations (which

13146

// is not quite the same thing as saying that everything has only one

13147

// use).

13148

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13149

if (isa<ConstantSDNode>(Inputs[i]))

13150

continue;

13151

13152

for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

13153

UE = Inputs[i].getNode()->use_end();

13154

UI != UE; ++UI) {

13155

SDNode *User = *UI;

13156

if (User != N && !Visited.count(User))

13157

return SDValue();

13158

13159

// Make sure that we're not going to promote the non-output-value

13160

// operand(s) or SELECT or SELECT_CC.

13161

// FIXME: Although we could sometimes handle this, and it does occur in

13162

// practice that one of the condition inputs to the select is also one of

13163

// the outputs, we currently can't deal with this.

13164

if (User->getOpcode() == ISD::SELECT) {

13165

if (User->getOperand(0) == Inputs[i])

13166

return SDValue();

13167

} else if (User->getOpcode() == ISD::SELECT_CC) {

13168

if (User->getOperand(0) == Inputs[i] ||

13169

User->getOperand(1) == Inputs[i])

13170

return SDValue();

13171

}

13172

}

13173

}

13174

13175

for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

13176

for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

13177

UE = PromOps[i].getNode()->use_end();

13178

UI != UE; ++UI) {

13179

SDNode *User = *UI;

13180

if (User != N && !Visited.count(User))

13181

return SDValue();

13182

13183

// Make sure that we're not going to promote the non-output-value

13184

// operand(s) or SELECT or SELECT_CC.

13185

// FIXME: Although we could sometimes handle this, and it does occur in

13186

// practice that one of the condition inputs to the select is also one of

13187

// the outputs, we currently can't deal with this.

13188

if (User->getOpcode() == ISD::SELECT) {

13189

if (User->getOperand(0) == PromOps[i])

13190

return SDValue();

13191

} else if (User->getOpcode() == ISD::SELECT_CC) {

13192

if (User->getOperand(0) == PromOps[i] ||

13193

User->getOperand(1) == PromOps[i])

13194

return SDValue();

13195

}

13196

}

13197

}

13198

13199

// Replace all inputs with the extension operand.

13200

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13201

// Constants may have users outside the cluster of to-be-promoted nodes,

13202

// and so we need to replace those as we do the promotions.

13203

if (isa<ConstantSDNode>(Inputs[i]))

13204

continue;

13205

else

13206

DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));

13207

}

13208

13209

std::list<HandleSDNode> PromOpHandles;

13210

for (auto &PromOp : PromOps)

13211

PromOpHandles.emplace_back(PromOp);

13212

13213

// Replace all operations (these are all the same, but have a different

13214

// (i1) return type). DAG.getNode will validate that the types of

13215

// a binary operator match, so go through the list in reverse so that

13216

// we've likely promoted both operands first. Any intermediate truncations or

13217

// extensions disappear.

13218

while (!PromOpHandles.empty()) {

13219

SDValue PromOp = PromOpHandles.back().getValue();

13220

PromOpHandles.pop_back();

13221

13222

if (PromOp.getOpcode() == ISD::TRUNCATE ||

13223

PromOp.getOpcode() == ISD::SIGN_EXTEND ||

13224

PromOp.getOpcode() == ISD::ZERO_EXTEND ||

13225

PromOp.getOpcode() == ISD::ANY_EXTEND) {

13226

if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&

13227

PromOp.getOperand(0).getValueType() != MVT::i1) {

13228

// The operand is not yet ready (see comment below).

13229

PromOpHandles.emplace_front(PromOp);

13230

continue;

13231

}

13232

13233

SDValue RepValue = PromOp.getOperand(0);

13234

if (isa<ConstantSDNode>(RepValue))

13235

RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);

13236

13237

DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);

13238

continue;

13239

}

13240

13241

unsigned C;

13242

switch (PromOp.getOpcode()) {

13243

default: C = 0; break;

13244

case ISD::SELECT: C = 1; break;

13245

case ISD::SELECT_CC: C = 2; break;

13246

}

13247

13248

if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

13249

PromOp.getOperand(C).getValueType() != MVT::i1) ||

13250

(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

13251

PromOp.getOperand(C+1).getValueType() != MVT::i1)) {

13252

// The to-be-promoted operands of this node have not yet been

13253

// promoted (this should be rare because we're going through the

13254

// list backward, but if one of the operands has several users in

13255

// this cluster of to-be-promoted nodes, it is possible).

13256

PromOpHandles.emplace_front(PromOp);

13257

continue;

13258

}

13259

13260

SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

13261

PromOp.getNode()->op_end());

13262

13263

// If there are any constant inputs, make sure they're replaced now.

13264

for (unsigned i = 0; i < 2; ++i)

13265

if (isa<ConstantSDNode>(Ops[C+i]))

13266

Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);

13267

13268

DAG.ReplaceAllUsesOfValueWith(PromOp,

13269

DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));

13270

}

13271

13272

// Now we're left with the initial truncation itself.

13273

if (N->getOpcode() == ISD::TRUNCATE)

13274

return N->getOperand(0);

13275

13276

// Otherwise, this is a comparison. The operands to be compared have just

13277

// changed type (to i1), but everything else is the same.

13278

return SDValue(N, 0);

13279

}

13280

13281

SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,

13282

DAGCombinerInfo &DCI) const {

13283

SelectionDAG &DAG = DCI.DAG;

13284

SDLoc dl(N);

13285

13286

// If we're tracking CR bits, we need to be careful that we don't have:

13287

// zext(binary-ops(trunc(x), trunc(y)))

13288

// or

13289

// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)

13290

// such that we're unnecessarily moving things into CR bits that can more

13291

// efficiently stay in GPRs. Note that if we're not certain that the high

13292

// bits are set as required by the final extension, we still may need to do

13293

// some masking to get the proper behavior.

13294

13295

// This same functionality is important on PPC64 when dealing with

13296

// 32-to-64-bit extensions; these occur often when 32-bit values are used as

13297

// the return values of functions. Because it is so similar, it is handled

13298

// here as well.

13299

13300

if (N->getValueType(0) != MVT::i32 &&

13301

N->getValueType(0) != MVT::i64)

13302

return SDValue();

13303

13304

if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||

13305

(N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))

13306

return SDValue();

13307

13308

if (N->getOperand(0).getOpcode() != ISD::AND &&

13309

N->getOperand(0).getOpcode() != ISD::OR &&

13310

N->getOperand(0).getOpcode() != ISD::XOR &&

13311

N->getOperand(0).getOpcode() != ISD::SELECT &&

13312

N->getOperand(0).getOpcode() != ISD::SELECT_CC)

13313

return SDValue();

13314

13315

SmallVector<SDValue, 4> Inputs;

13316

SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;

13317

SmallPtrSet<SDNode *, 16> Visited;

13318

13319

// Visit all inputs, collect all binary operations (and, or, xor and

13320

// select) that are all fed by truncations.

13321

while (!BinOps.empty()) {

13322

SDValue BinOp = BinOps.back();

13323

BinOps.pop_back();

13324

13325

if (!Visited.insert(BinOp.getNode()).second)

13326

continue;

13327

13328

PromOps.push_back(BinOp);

13329

13330

for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

13331

// The condition of the select is not promoted.

13332

if (BinOp.getOpcode() == ISD::SELECT && i == 0)

13333

continue;

13334

if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

13335

continue;

13336

13337

if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

13338

isa<ConstantSDNode>(BinOp.getOperand(i))) {

13339

Inputs.push_back(BinOp.getOperand(i));

13340

} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

13341

BinOp.getOperand(i).getOpcode() == ISD::OR ||

13342

BinOp.getOperand(i).getOpcode() == ISD::XOR ||

13343

BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

13344

BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {

13345

BinOps.push_back(BinOp.getOperand(i));

13346

} else {

13347

// We have an input that is not a truncation or another binary

13348

// operation; we'll abort this transformation.

13349

return SDValue();

13350

}

13351

}

13352

}

13353

13354

// The operands of a select that must be truncated when the select is

13355

// promoted because the operand is actually part of the to-be-promoted set.

13356

DenseMap<SDNode *, EVT> SelectTruncOp[2];

13357

13358

// Make sure that this is a self-contained cluster of operations (which

13359

// is not quite the same thing as saying that everything has only one

13360

// use).

13361

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13362

if (isa<ConstantSDNode>(Inputs[i]))

13363

continue;

13364

13365

for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

13366

UE = Inputs[i].getNode()->use_end();

13367

UI != UE; ++UI) {

13368

SDNode *User = *UI;

13369

if (User != N && !Visited.count(User))

13370

return SDValue();

13371

13372

// If we're going to promote the non-output-value operand(s) or SELECT or

13373

// SELECT_CC, record them for truncation.

13374

if (User->getOpcode() == ISD::SELECT) {

13375

if (User->getOperand(0) == Inputs[i])

13376

SelectTruncOp[0].insert(std::make_pair(User,

13377

User->getOperand(0).getValueType()));

13378

} else if (User->getOpcode() == ISD::SELECT_CC) {

13379

if (User->getOperand(0) == Inputs[i])

13380

SelectTruncOp[0].insert(std::make_pair(User,

13381

User->getOperand(0).getValueType()));

13382

if (User->getOperand(1) == Inputs[i])

13383

SelectTruncOp[1].insert(std::make_pair(User,

13384

User->getOperand(1).getValueType()));

13385

}

13386

}

13387

}

13388

13389

for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

13390

for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

13391

UE = PromOps[i].getNode()->use_end();

13392

UI != UE; ++UI) {

13393

SDNode *User = *UI;

13394

if (User != N && !Visited.count(User))

13395

return SDValue();

13396

13397

// If we're going to promote the non-output-value operand(s) or SELECT or

13398

// SELECT_CC, record them for truncation.

13399

if (User->getOpcode() == ISD::SELECT) {

13400

if (User->getOperand(0) == PromOps[i])

13401

SelectTruncOp[0].insert(std::make_pair(User,

13402

User->getOperand(0).getValueType()));

13403

} else if (User->getOpcode() == ISD::SELECT_CC) {

13404

if (User->getOperand(0) == PromOps[i])

13405

SelectTruncOp[0].insert(std::make_pair(User,

13406

User->getOperand(0).getValueType()));

13407

if (User->getOperand(1) == PromOps[i])

13408

SelectTruncOp[1].insert(std::make_pair(User,

13409

User->getOperand(1).getValueType()));

13410

}

13411

}

13412

}

13413

13414

unsigned PromBits = N->getOperand(0).getValueSizeInBits();

13415

bool ReallyNeedsExt = false;

13416

if (N->getOpcode() != ISD::ANY_EXTEND) {

13417

// If all of the inputs are not already sign/zero extended, then

13418

// we'll still need to do that at the end.

13419

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13420

if (isa<ConstantSDNode>(Inputs[i]))

13421

continue;

13422

13423

unsigned OpBits =

13424

Inputs[i].getOperand(0).getValueSizeInBits();

13425

assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13425, __PRETTY_FUNCTION__));

13426

13427

if ((N->getOpcode() == ISD::ZERO_EXTEND &&

13428

!DAG.MaskedValueIsZero(Inputs[i].getOperand(0),

13429

APInt::getHighBitsSet(OpBits,

13430

OpBits-PromBits))) ||

13431

(N->getOpcode() == ISD::SIGN_EXTEND &&

13432

DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <

13433

(OpBits-(PromBits-1)))) {

13434

ReallyNeedsExt = true;

13435

break;

13436

}

13437

}

13438

}

13439

13440

// Replace all inputs, either with the truncation operand, or a

13441

// truncation or extension to the final output type.

13442

for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

13443

// Constant inputs need to be replaced with the to-be-promoted nodes that

13444

// use them because they might have users outside of the cluster of

13445

// promoted nodes.

13446

if (isa<ConstantSDNode>(Inputs[i]))

13447

continue;

13448

13449

SDValue InSrc = Inputs[i].getOperand(0);

13450

if (Inputs[i].getValueType() == N->getValueType(0))

13451

DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);

13452

else if (N->getOpcode() == ISD::SIGN_EXTEND)

13453

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13454

DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));

13455

else if (N->getOpcode() == ISD::ZERO_EXTEND)

13456

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13457

DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));

13458

else

13459

DAG.ReplaceAllUsesOfValueWith(Inputs[i],

13460

DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));

13461

}

13462

13463

std::list<HandleSDNode> PromOpHandles;

13464

for (auto &PromOp : PromOps)

13465

PromOpHandles.emplace_back(PromOp);

13466

13467

// Replace all operations (these are all the same, but have a different

13468

// (promoted) return type). DAG.getNode will validate that the types of

13469

// a binary operator match, so go through the list in reverse so that

13470

// we've likely promoted both operands first.

13471

while (!PromOpHandles.empty()) {

13472

SDValue PromOp = PromOpHandles.back().getValue();

13473

PromOpHandles.pop_back();

13474

13475

unsigned C;

13476

switch (PromOp.getOpcode()) {

13477

default: C = 0; break;

13478

case ISD::SELECT: C = 1; break;

13479

case ISD::SELECT_CC: C = 2; break;

13480

}

13481

13482

if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

13483

PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||

13484

(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

13485

PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {

13486

// The to-be-promoted operands of this node have not yet been

13487

// promoted (this should be rare because we're going through the

13488

// list backward, but if one of the operands has several users in

13489

// this cluster of to-be-promoted nodes, it is possible).

13490

PromOpHandles.emplace_front(PromOp);

13491

continue;

13492

}

13493

13494

// For SELECT and SELECT_CC nodes, we do a similar check for any

13495

// to-be-promoted comparison inputs.

13496

if (PromOp.getOpcode() == ISD::SELECT ||

13497

PromOp.getOpcode() == ISD::SELECT_CC) {

13498

if ((SelectTruncOp[0].count(PromOp.getNode()) &&

13499

PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||

13500

(SelectTruncOp[1].count(PromOp.getNode()) &&

13501

PromOp.getOperand(1).getValueType() != N->getValueType(0))) {

13502

PromOpHandles.emplace_front(PromOp);

13503

continue;

13504

}

13505

}

13506

13507

SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

13508

PromOp.getNode()->op_end());

13509

13510

// If this node has constant inputs, then they'll need to be promoted here.

13511

for (unsigned i = 0; i < 2; ++i) {

13512

if (!isa<ConstantSDNode>(Ops[C+i]))

13513

continue;

13514

if (Ops[C+i].getValueType() == N->getValueType(0))

13515

continue;

13516

13517

if (N->getOpcode() == ISD::SIGN_EXTEND)

13518

Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13519

else if (N->getOpcode() == ISD::ZERO_EXTEND)

13520

Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13521

else

13522

Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

13523

}

13524

13525

// If we've promoted the comparison inputs of a SELECT or SELECT_CC,

13526

// truncate them again to the original value type.

13527

if (PromOp.getOpcode() == ISD::SELECT ||

13528

PromOp.getOpcode() == ISD::SELECT_CC) {

13529

auto SI0 = SelectTruncOp[0].find(PromOp.getNode());

13530

if (SI0 != SelectTruncOp[0].end())

13531

Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);

13532

auto SI1 = SelectTruncOp[1].find(PromOp.getNode());

13533

if (SI1 != SelectTruncOp[1].end())

13534

Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);

13535

}

13536

13537

DAG.ReplaceAllUsesOfValueWith(PromOp,

13538

DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));

13539

}

13540

13541

// Now we're left with the initial extension itself.

13542

if (!ReallyNeedsExt)

13543

return N->getOperand(0);

13544

13545

// To zero extend, just mask off everything except for the first bit (in the

13546

// i1 case).

13547

if (N->getOpcode() == ISD::ZERO_EXTEND)

13548

return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),

13549

DAG.getConstant(APInt::getLowBitsSet(

13550

N->getValueSizeInBits(0), PromBits),

13551

dl, N->getValueType(0)));

13552

13553

assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__))

13554

"Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__));

13555

EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());

13556

SDValue ShiftCst =

13557

DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);

13558

return DAG.getNode(

13559

ISD::SRA, dl, N->getValueType(0),

13560

DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),

13561

ShiftCst);

13562

}

13563

13564

SDValue PPCTargetLowering::combineSetCC(SDNode *N,

13565

DAGCombinerInfo &DCI) const {

13566

assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__))

13567

"Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__));

13568

13569

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

13570

if (CC == ISD::SETNE || CC == ISD::SETEQ) {

13571

SDValue LHS = N->getOperand(0);

13572

SDValue RHS = N->getOperand(1);

13573

13574

// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.

13575

if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&

13576

LHS.hasOneUse())

13577

std::swap(LHS, RHS);

13578

13579

// x == 0-y --> x+y == 0

13580

// x != 0-y --> x+y != 0

13581

if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&

13582

RHS.hasOneUse()) {

13583

SDLoc DL(N);

13584

SelectionDAG &DAG = DCI.DAG;

13585

EVT VT = N->getValueType(0);

13586

EVT OpVT = LHS.getValueType();

13587

SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));

13588

return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);

13589

}

13590

}

13591

13592

return DAGCombineTruncBoolExt(N, DCI);

13593

}

13594

13595

// Is this an extending load from an f32 to an f64?

13596

static bool isFPExtLoad(SDValue Op) {

13597

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))

13598

return LD->getExtensionType() == ISD::EXTLOAD &&

13599

Op.getValueType() == MVT::f64;

13600

return false;

13601

}

13602

13603

/// Reduces the number of fp-to-int conversion when building a vector.

13604

///

13605

/// If this vector is built out of floating to integer conversions,

13606

/// transform it to a vector built out of floating point values followed by a

13607

/// single floating to integer conversion of the vector.

13608

/// Namely (build_vector (fptosi $A), (fptosi $B), ...)

13609

/// becomes (fptosi (build_vector ($A, $B, ...)))

13610

SDValue PPCTargetLowering::

13611

combineElementTruncationToVectorTruncation(SDNode *N,

13612

DAGCombinerInfo &DCI) const {

13613

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__))

13614

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__));

13615

13616

SelectionDAG &DAG = DCI.DAG;

13617

SDLoc dl(N);

13618

13619

SDValue FirstInput = N->getOperand(0);

13620

assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))

13621

"The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__));

13622

13623

// This combine happens after legalization so the fp_to_[su]i nodes are

13624

// already converted to PPCSISD nodes.

13625

unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();

13626

if (FirstConversion == PPCISD::FCTIDZ ||

13627

FirstConversion == PPCISD::FCTIDUZ ||

13628

FirstConversion == PPCISD::FCTIWZ ||

13629

FirstConversion == PPCISD::FCTIWUZ) {

13630

bool IsSplat = true;

13631

bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||

13632

FirstConversion == PPCISD::FCTIWUZ;

13633

EVT SrcVT = FirstInput.getOperand(0).getValueType();

13634

SmallVector<SDValue, 4> Ops;

13635

EVT TargetVT = N->getValueType(0);

13636

for (int i = 0, e = N->getNumOperands(); i < e; ++i) {

13637

SDValue NextOp = N->getOperand(i);

13638

if (NextOp.getOpcode() != PPCISD::MFVSR)

13639

return SDValue();

13640

unsigned NextConversion = NextOp.getOperand(0).getOpcode();

13641

if (NextConversion != FirstConversion)

13642

return SDValue();

13643

// If we are converting to 32-bit integers, we need to add an FP_ROUND.

13644

// This is not valid if the input was originally double precision. It is

13645

// also not profitable to do unless this is an extending load in which

13646

// case doing this combine will allow us to combine consecutive loads.

13647

if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))

13648

return SDValue();

13649

if (N->getOperand(i) != FirstInput)

13650

IsSplat = false;

13651

}

13652

13653

// If this is a splat, we leave it as-is since there will be only a single

13654

// fp-to-int conversion followed by a splat of the integer. This is better

13655

// for 32-bit and smaller ints and neutral for 64-bit ints.

13656

if (IsSplat)

13657

return SDValue();

13658

13659

// Now that we know we have the right type of node, get its operands

13660

for (int i = 0, e = N->getNumOperands(); i < e; ++i) {

13661

SDValue In = N->getOperand(i).getOperand(0);

13662

if (Is32Bit) {

13663

// For 32-bit values, we need to add an FP_ROUND node (if we made it

13664

// here, we know that all inputs are extending loads so this is safe).

13665

if (In.isUndef())

13666

Ops.push_back(DAG.getUNDEF(SrcVT));

13667

else {

13668

SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,

13669

MVT::f32, In.getOperand(0),

13670

DAG.getIntPtrConstant(1, dl));

13671

Ops.push_back(Trunc);

13672

}

13673

} else

13674

Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));

13675

}

13676

13677

unsigned Opcode;

13678

if (FirstConversion == PPCISD::FCTIDZ ||

13679

FirstConversion == PPCISD::FCTIWZ)

13680

Opcode = ISD::FP_TO_SINT;

13681

else

13682

Opcode = ISD::FP_TO_UINT;

13683

13684

EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;

13685

SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);

13686

return DAG.getNode(Opcode, dl, TargetVT, BV);

13687

}

13688

return SDValue();

13689

}

13690

13691

/// Reduce the number of loads when building a vector.

13692

///

13693

/// Building a vector out of multiple loads can be converted to a load

13694

/// of the vector type if the loads are consecutive. If the loads are

13695

/// consecutive but in descending order, a shuffle is added at the end

13696

/// to reorder the vector.

13697

static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {

13698

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__))

13699

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__));

13700

13701

SDLoc dl(N);

13702

13703

// Return early for non byte-sized type, as they can't be consecutive.

13704

if (!N->getValueType(0).getVectorElementType().isByteSized())

13705

return SDValue();

13706

13707

bool InputsAreConsecutiveLoads = true;

13708

bool InputsAreReverseConsecutive = true;

13709

unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();

13710

SDValue FirstInput = N->getOperand(0);

13711

bool IsRoundOfExtLoad = false;

13712

13713

if (FirstInput.getOpcode() == ISD::FP_ROUND &&

13714

FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {

13715

LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));

13716

IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;

13717

}

13718

// Not a build vector of (possibly fp_rounded) loads.

13719

if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||

13720

N->getNumOperands() == 1)

13721

return SDValue();

13722

13723

for (int i = 1, e = N->getNumOperands(); i < e; ++i) {

13724

// If any inputs are fp_round(extload), they all must be.

13725

if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)

13726

return SDValue();

13727

13728

SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :

13729

N->getOperand(i);

13730

if (NextInput.getOpcode() != ISD::LOAD)

13731

return SDValue();

13732

13733

SDValue PreviousInput =

13734

IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);

13735

LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);

13736

LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);

13737

13738

// If any inputs are fp_round(extload), they all must be.

13739

if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)

13740

return SDValue();

13741

13742

if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))

13743

InputsAreConsecutiveLoads = false;

13744

if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))

13745

InputsAreReverseConsecutive = false;

13746

13747

// Exit early if the loads are neither consecutive nor reverse consecutive.

13748

if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)

13749

return SDValue();

13750

}

13751

13752

assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__))

13753

"The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__));

13754

13755

SDValue FirstLoadOp =

13756

IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;

13757

SDValue LastLoadOp =

13758

IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :

13759

N->getOperand(N->getNumOperands()-1);

13760

13761

LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);

13762

LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);

13763

if (InputsAreConsecutiveLoads) {

13764

assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13764, __PRETTY_FUNCTION__));

13765

return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),

13766

LD1->getBasePtr(), LD1->getPointerInfo(),

13767

LD1->getAlignment());

13768

}

13769

if (InputsAreReverseConsecutive) {

13770

assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13770, __PRETTY_FUNCTION__));

13771

SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),

13772

LDL->getBasePtr(), LDL->getPointerInfo(),

13773

LDL->getAlignment());

13774

SmallVector<int, 16> Ops;

13775

for (int i = N->getNumOperands() - 1; i >= 0; i--)

13776

Ops.push_back(i);

13777

13778

return DAG.getVectorShuffle(N->getValueType(0), dl, Load,

13779

DAG.getUNDEF(N->getValueType(0)), Ops);

13780

}

13781

return SDValue();

13782

}

13783

13784

// This function adds the required vector_shuffle needed to get

13785

// the elements of the vector extract in the correct position

13786

// as specified by the CorrectElems encoding.

13787

static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,

13788

SDValue Input, uint64_t Elems,

13789

uint64_t CorrectElems) {

13790

SDLoc dl(N);

13791

13792

unsigned NumElems = Input.getValueType().getVectorNumElements();

13793

SmallVector<int, 16> ShuffleMask(NumElems, -1);

13794

13795

// Knowing the element indices being extracted from the original

13796

// vector and the order in which they're being inserted, just put

13797

// them at element indices required for the instruction.

13798

for (unsigned i = 0; i < N->getNumOperands(); i++) {

13799

if (DAG.getDataLayout().isLittleEndian())

13800

ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;

13801

else

13802

ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;

13803

CorrectElems = CorrectElems >> 8;

13804

Elems = Elems >> 8;

13805

}

13806

13807

SDValue Shuffle =

13808

DAG.getVectorShuffle(Input.getValueType(), dl, Input,

13809

DAG.getUNDEF(Input.getValueType()), ShuffleMask);

13810

13811

EVT VT = N->getValueType(0);

13812

SDValue Conv = DAG.getBitcast(VT, Shuffle);

13813

13814

EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),

13815

Input.getValueType().getVectorElementType(),

13816

VT.getVectorNumElements());

13817

return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,

13818

DAG.getValueType(ExtVT));

13819

}

13820

13821

// Look for build vector patterns where input operands come from sign

13822

// extended vector_extract elements of specific indices. If the correct indices

13823

// aren't used, add a vector shuffle to fix up the indices and create

13824

// SIGN_EXTEND_INREG node which selects the vector sign extend instructions

13825

// during instruction selection.

13826

static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {

13827

// This array encodes the indices that the vector sign extend instructions

13828

// extract from when extending from one type to another for both BE and LE.

13829

// The right nibble of each byte corresponds to the LE incides.

13830

// and the left nibble of each byte corresponds to the BE incides.

13831

// For example: 0x3074B8FC byte->word

13832

// For LE: the allowed indices are: 0x0,0x4,0x8,0xC

13833

// For BE: the allowed indices are: 0x3,0x7,0xB,0xF

13834

// For example: 0x000070F8 byte->double word

13835

// For LE: the allowed indices are: 0x0,0x8

13836

// For BE: the allowed indices are: 0x7,0xF

13837

uint64_t TargetElems[] = {

13838

0x3074B8FC, // b->w

13839

0x000070F8, // b->d

13840

0x10325476, // h->w

13841

0x00003074, // h->d

13842

0x00001032, // w->d

13843

};

13844

13845

uint64_t Elems = 0;

13846

int Index;

13847

SDValue Input;

13848

13849

auto isSExtOfVecExtract = [&](SDValue Op) -> bool {

13850

if (!Op)

13851

return false;

13852

if (Op.getOpcode() != ISD::SIGN_EXTEND &&

13853

Op.getOpcode() != ISD::SIGN_EXTEND_INREG)

13854

return false;

13855

13856

// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value

13857

// of the right width.

13858

SDValue Extract = Op.getOperand(0);

13859

if (Extract.getOpcode() == ISD::ANY_EXTEND)

13860

Extract = Extract.getOperand(0);

13861

if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

13862

return false;

13863

13864

ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));

13865

if (!ExtOp)

13866

return false;

13867

13868

Index = ExtOp->getZExtValue();

13869

if (Input && Input != Extract.getOperand(0))

13870

return false;

13871

13872

if (!Input)

13873

Input = Extract.getOperand(0);

13874

13875

Elems = Elems << 8;

13876

Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;

13877

Elems |= Index;

13878

13879

return true;

13880

};

13881

13882

// If the build vector operands aren't sign extended vector extracts,

13883

// of the same input vector, then return.

13884

for (unsigned i = 0; i < N->getNumOperands(); i++) {

13885

if (!isSExtOfVecExtract(N->getOperand(i))) {

13886

return SDValue();

13887

}

13888

}

13889

13890

// If the vector extract indicies are not correct, add the appropriate

13891

// vector_shuffle.

13892

int TgtElemArrayIdx;

13893

int InputSize = Input.getValueType().getScalarSizeInBits();

13894

int OutputSize = N->getValueType(0).getScalarSizeInBits();

13895

if (InputSize + OutputSize == 40)

13896

TgtElemArrayIdx = 0;

13897

else if (InputSize + OutputSize == 72)

13898

TgtElemArrayIdx = 1;

13899

else if (InputSize + OutputSize == 48)

13900

TgtElemArrayIdx = 2;

13901

else if (InputSize + OutputSize == 80)

13902

TgtElemArrayIdx = 3;

13903

else if (InputSize + OutputSize == 96)

13904

TgtElemArrayIdx = 4;

13905

else

13906

return SDValue();

13907

13908

uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];

13909

CorrectElems = DAG.getDataLayout().isLittleEndian()

13910

? CorrectElems & 0x0F0F0F0F0F0F0F0F

13911

: CorrectElems & 0xF0F0F0F0F0F0F0F0;

13912

if (Elems != CorrectElems) {

13913

return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);

13914

}

13915

13916

// Regular lowering will catch cases where a shuffle is not needed.

13917

return SDValue();

13918

}

13919

13920

// Look for the pattern of a load from a narrow width to i128, feeding

13921

// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node

13922

// (LXVRZX). This node represents a zero extending load that will be matched

13923

// to the Load VSX Vector Rightmost instructions.

13924

static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {

13925

SDLoc DL(N);

13926

13927

// This combine is only eligible for a BUILD_VECTOR of v1i128.

13928

if (N->getValueType(0) != MVT::v1i128)

13929

return SDValue();

13930

13931

SDValue Operand = N->getOperand(0);

13932

// Proceed with the transformation if the operand to the BUILD_VECTOR

13933

// is a load instruction.

13934

if (Operand.getOpcode() != ISD::LOAD)

13935

return SDValue();

13936

13937

LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);

13938

EVT MemoryType = LD->getMemoryVT();

13939

13940

// This transformation is only valid if the we are loading either a byte,

13941

// halfword, word, or doubleword.

13942

bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||

13943

MemoryType == MVT::i32 || MemoryType == MVT::i64;

13944

13945

// Ensure that the load from the narrow width is being zero extended to i128.

13946

if (!ValidLDType ||

13947

(LD->getExtensionType() != ISD::ZEXTLOAD &&

13948

LD->getExtensionType() != ISD::EXTLOAD))

13949

return SDValue();

13950

13951

SDValue LoadOps[] = {

13952

LD->getChain(), LD->getBasePtr(),

13953

DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};

13954

13955

return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,

13956

DAG.getVTList(MVT::v1i128, MVT::Other),

13957

LoadOps, MemoryType, LD->getMemOperand());

13958

}

13959

13960

SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,

13961

DAGCombinerInfo &DCI) const {

13962

assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__))

13963

"Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__));

13964

13965

SelectionDAG &DAG = DCI.DAG;

13966

SDLoc dl(N);

13967

13968

if (!Subtarget.hasVSX())

13969

return SDValue();

13970

13971

// The target independent DAG combiner will leave a build_vector of

13972

// float-to-int conversions intact. We can generate MUCH better code for

13973

// a float-to-int conversion of a vector of floats.

13974

SDValue FirstInput = N->getOperand(0);

13975

if (FirstInput.getOpcode() == PPCISD::MFVSR) {

13976

SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);

13977

if (Reduced)

13978

return Reduced;

13979

}

13980

13981

// If we're building a vector out of consecutive loads, just load that

13982

// vector type.

13983

SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);

13984

if (Reduced)

13985

return Reduced;

13986

13987

// If we're building a vector out of extended elements from another vector

13988

// we have P9 vector integer extend instructions. The code assumes legal

13989

// input types (i.e. it can't handle things like v4i16) so do not run before

13990

// legalization.

13991

if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {

13992

Reduced = combineBVOfVecSExt(N, DAG);

13993

if (Reduced)

13994

return Reduced;

13995

}

13996

13997

// On Power10, the Load VSX Vector Rightmost instructions can be utilized

13998

// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR

13999

// is a load from <valid narrow width> to i128.

14000

if (Subtarget.isISA3_1()) {

14001

SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);

14002

if (BVOfZLoad)

14003

return BVOfZLoad;

14004

}

14005

14006

if (N->getValueType(0) != MVT::v2f64)

14007

return SDValue();

14008

14009

// Looking for:

14010

// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))

14011

if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&

14012

FirstInput.getOpcode() != ISD::UINT_TO_FP)

14013

return SDValue();

14014

if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&

14015

N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)

14016

return SDValue();

14017

if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())

14018

return SDValue();

14019

14020

SDValue Ext1 = FirstInput.getOperand(0);

14021

SDValue Ext2 = N->getOperand(1).getOperand(0);

14022

if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

14023

Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)

14024

return SDValue();

14025

14026

ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));

14027

ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));

14028

if (!Ext1Op || !Ext2Op)

14029

return SDValue();

14030

if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||

14031

Ext1.getOperand(0) != Ext2.getOperand(0))

14032

return SDValue();

14033

14034

int FirstElem = Ext1Op->getZExtValue();

14035

int SecondElem = Ext2Op->getZExtValue();

14036

int SubvecIdx;

14037

if (FirstElem == 0 && SecondElem == 1)

14038

SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;

14039

else if (FirstElem == 2 && SecondElem == 3)

14040

SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;

14041

else

14042

return SDValue();

14043

14044

SDValue SrcVec = Ext1.getOperand(0);

14045

auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?

14046

PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;

14047

return DAG.getNode(NodeType, dl, MVT::v2f64,

14048

SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));

14049

}

14050

14051

SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,

14052

DAGCombinerInfo &DCI) const {

14053

assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))

14054

N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))

14055

"Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__));

14056

14057

if (useSoftFloat() || !Subtarget.has64BitSupport())

14058

return SDValue();

14059

14060

SelectionDAG &DAG = DCI.DAG;

14061

SDLoc dl(N);

14062

SDValue Op(N, 0);

14063

14064

// Don't handle ppc_fp128 here or conversions that are out-of-range capable

14065

// from the hardware.

14066

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

14067

return SDValue();

14068

if (!Op.getOperand(0).getValueType().isSimple())

14069

return SDValue();

14070

if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||

14071

Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))

14072

return SDValue();

14073

14074

SDValue FirstOperand(Op.getOperand(0));

14075

bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&

14076

(FirstOperand.getValueType() == MVT::i8 ||

14077

FirstOperand.getValueType() == MVT::i16);

14078

if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {

14079

bool Signed = N->getOpcode() == ISD::SINT_TO_FP;

14080

bool DstDouble = Op.getValueType() == MVT::f64;

14081

unsigned ConvOp = Signed ?

14082

(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :

14083

(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);

14084

SDValue WidthConst =

14085

DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,

14086

dl, false);

14087

LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());

14088

SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };

14089

SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,

14090

DAG.getVTList(MVT::f64, MVT::Other),

14091

Ops, MVT::i8, LDN->getMemOperand());

14092

14093

// For signed conversion, we need to sign-extend the value in the VSR

14094

if (Signed) {

14095

SDValue ExtOps[] = { Ld, WidthConst };

14096

SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);

14097

return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);

14098

} else

14099

return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);

14100

}

14101

14102

14103

// For i32 intermediate values, unfortunately, the conversion functions

14104

// leave the upper 32 bits of the value are undefined. Within the set of

14105

// scalar instructions, we have no method for zero- or sign-extending the

14106

// value. Thus, we cannot handle i32 intermediate values here.

14107

if (Op.getOperand(0).getValueType() == MVT::i32)

14108

return SDValue();

14109

14110

assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__))

14111

"UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__));

14112

14113

// If we have FCFIDS, then use it when converting to single-precision.

14114

// Otherwise, convert to double-precision and then round.

14115

unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)

14116

? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS

14117

: PPCISD::FCFIDS)

14118

: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU

14119

: PPCISD::FCFID);

14120

MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)

14121

? MVT::f32

14122

: MVT::f64;

14123

14124

// If we're converting from a float, to an int, and back to a float again,

14125

// then we don't need the store/load pair at all.

14126

if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&

14127

Subtarget.hasFPCVT()) ||

14128

(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {

14129

SDValue Src = Op.getOperand(0).getOperand(0);

14130

if (Src.getValueType() == MVT::f32) {

14131

Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);

14132

DCI.AddToWorklist(Src.getNode());

14133

} else if (Src.getValueType() != MVT::f64) {

14134

// Make sure that we don't pick up a ppc_fp128 source value.

14135

return SDValue();

14136

}

14137

14138

unsigned FCTOp =

14139

Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :

14140

PPCISD::FCTIDUZ;

14141

14142

SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);

14143

SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);

14144

14145

if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {

14146

FP = DAG.getNode(ISD::FP_ROUND, dl,

14147

MVT::f32, FP, DAG.getIntPtrConstant(0, dl));

14148

DCI.AddToWorklist(FP.getNode());

14149

}

14150

14151

return FP;

14152

}

14153

14154

return SDValue();

14155

}

14156

14157

// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for

14158

// builtins) into loads with swaps.

14159

SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,

14160

DAGCombinerInfo &DCI) const {

14161

SelectionDAG &DAG = DCI.DAG;

14162

SDLoc dl(N);

14163

SDValue Chain;

14164

SDValue Base;

14165

MachineMemOperand *MMO;

14166

14167

switch (N->getOpcode()) {

14168

default:

14169

llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14169);

14170

case ISD::LOAD: {

14171

LoadSDNode *LD = cast<LoadSDNode>(N);

14172

Chain = LD->getChain();

14173

Base = LD->getBasePtr();

14174

MMO = LD->getMemOperand();

14175

// If the MMO suggests this isn't a load of a full vector, leave

14176

// things alone. For a built-in, we have to make the change for

14177

// correctness, so if there is a size problem that will be a bug.

14178

if (MMO->getSize() < 16)

14179

return SDValue();

14180

break;

14181

}

14182

case ISD::INTRINSIC_W_CHAIN: {

14183

MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);

14184

Chain = Intrin->getChain();

14185

// Similarly to the store case below, Intrin->getBasePtr() doesn't get

14186

// us what we want. Get operand 2 instead.

14187

Base = Intrin->getOperand(2);

14188

MMO = Intrin->getMemOperand();

14189

break;

14190

}

14191

}

14192

14193

MVT VecTy = N->getValueType(0).getSimpleVT();

14194

14195

// Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is

14196

// aligned and the type is a vector with elements up to 4 bytes

14197

if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&

14198

VecTy.getScalarSizeInBits() <= 32) {

14199

return SDValue();

14200

}

14201

14202

SDValue LoadOps[] = { Chain, Base };

14203

SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,

14204

DAG.getVTList(MVT::v2f64, MVT::Other),

14205

LoadOps, MVT::v2f64, MMO);

14206

14207

DCI.AddToWorklist(Load.getNode());

14208

Chain = Load.getValue(1);

14209

SDValue Swap = DAG.getNode(

14210

PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);

14211

DCI.AddToWorklist(Swap.getNode());

14212

14213

// Add a bitcast if the resulting load type doesn't match v2f64.

14214

if (VecTy != MVT::v2f64) {

14215

SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);

14216

DCI.AddToWorklist(N.getNode());

14217

// Package {bitcast value, swap's chain} to match Load's shape.

14218

return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),

14219

N, Swap.getValue(1));

14220

}

14221

14222

return Swap;

14223

}

14224

14225

// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for

14226

// builtins) into stores with swaps.

14227

SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,

14228

DAGCombinerInfo &DCI) const {

14229

SelectionDAG &DAG = DCI.DAG;

14230

SDLoc dl(N);

14231

SDValue Chain;

14232

SDValue Base;

14233

unsigned SrcOpnd;

14234

MachineMemOperand *MMO;

14235

14236

switch (N->getOpcode()) {

14237

default:

14238

llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14238);

14239

case ISD::STORE: {

14240

StoreSDNode *ST = cast<StoreSDNode>(N);

14241

Chain = ST->getChain();

14242

Base = ST->getBasePtr();

14243

MMO = ST->getMemOperand();

14244

SrcOpnd = 1;

14245

// If the MMO suggests this isn't a store of a full vector, leave

14246

// things alone. For a built-in, we have to make the change for

14247

// correctness, so if there is a size problem that will be a bug.

14248

if (MMO->getSize() < 16)

14249

return SDValue();

14250

break;

14251

}

14252

case ISD::INTRINSIC_VOID: {

14253

MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);

14254

Chain = Intrin->getChain();

14255

// Intrin->getBasePtr() oddly does not get what we want.

14256

Base = Intrin->getOperand(3);

14257

MMO = Intrin->getMemOperand();

14258

SrcOpnd = 2;

14259

break;

14260

}

14261

}

14262

14263

SDValue Src = N->getOperand(SrcOpnd);

14264

MVT VecTy = Src.getValueType().getSimpleVT();

14265

14266

// Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is

14267

// aligned and the type is a vector with elements up to 4 bytes

14268

if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&

14269

VecTy.getScalarSizeInBits() <= 32) {

14270

return SDValue();

14271

}

14272

14273

// All stores are done as v2f64 and possible bit cast.

14274

if (VecTy != MVT::v2f64) {

14275

Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);

14276

DCI.AddToWorklist(Src.getNode());

14277

}

14278

14279

SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,

14280

DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);

14281

DCI.AddToWorklist(Swap.getNode());

14282

Chain = Swap.getValue(1);

14283

SDValue StoreOps[] = { Chain, Swap, Base };

14284

SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,

14285

DAG.getVTList(MVT::Other),

14286

StoreOps, VecTy, MMO);

14287

DCI.AddToWorklist(Store.getNode());

14288

return Store;

14289

}

14290

14291

// Handle DAG combine for STORE (FP_TO_INT F).

14292

SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,

14293

DAGCombinerInfo &DCI) const {

14294

14295

SelectionDAG &DAG = DCI.DAG;

14296

SDLoc dl(N);

14297

unsigned Opcode = N->getOperand(1).getOpcode();

14298

14299

assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__))

14300

&& "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__));

14301

14302

SDValue Val = N->getOperand(1).getOperand(0);

14303

EVT Op1VT = N->getOperand(1).getValueType();

14304

EVT ResVT = Val.getValueType();

14305

14306

if (!isTypeLegal(ResVT))

14307

return SDValue();

14308

14309

// Only perform combine for conversion to i64/i32 or power9 i16/i8.

14310

bool ValidTypeForStoreFltAsInt =

14311

(Op1VT == MVT::i32 || Op1VT == MVT::i64 ||

14312

(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));

14313

14314

if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||

14315

cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)

14316

return SDValue();

14317

14318

// Extend f32 values to f64

14319

if (ResVT.getScalarSizeInBits() == 32) {

14320

Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);

14321

DCI.AddToWorklist(Val.getNode());

14322

}

14323

14324

// Set signed or unsigned conversion opcode.

14325

unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?

14326

PPCISD::FP_TO_SINT_IN_VSR :

14327

PPCISD::FP_TO_UINT_IN_VSR;

14328

14329

Val = DAG.getNode(ConvOpcode,

14330

dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);

14331

DCI.AddToWorklist(Val.getNode());

14332

14333

// Set number of bytes being converted.

14334

unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;

14335

SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),

14336

DAG.getIntPtrConstant(ByteSize, dl, false),

14337

DAG.getValueType(Op1VT) };

14338

14339

Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,

14340

DAG.getVTList(MVT::Other), Ops,

14341

cast<StoreSDNode>(N)->getMemoryVT(),

14342

cast<StoreSDNode>(N)->getMemOperand());

14343

14344

DCI.AddToWorklist(Val.getNode());

14345

return Val;

14346

}

14347

14348

static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {

14349

// Check that the source of the element keeps flipping

14350

// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).

14351

bool PrevElemFromFirstVec = Mask[0] < NumElts;

14352

for (int i = 1, e = Mask.size(); i < e; i++) {

14353

if (PrevElemFromFirstVec && Mask[i] < NumElts)

14354

return false;

14355

if (!PrevElemFromFirstVec && Mask[i] >= NumElts)

14356

return false;

14357

PrevElemFromFirstVec = !PrevElemFromFirstVec;

14358

}

14359

return true;

14360

}

14361

14362

static bool isSplatBV(SDValue Op) {

14363

if (Op.getOpcode() != ISD::BUILD_VECTOR)

14364

return false;

14365

SDValue FirstOp;

14366

14367

// Find first non-undef input.

14368

for (int i = 0, e = Op.getNumOperands(); i < e; i++) {

14369

FirstOp = Op.getOperand(i);

14370

if (!FirstOp.isUndef())

14371

break;

14372

}

14373

14374

// All inputs are undef or the same as the first non-undef input.

14375

for (int i = 1, e = Op.getNumOperands(); i < e; i++)

14376

if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())

14377

return false;

14378

return true;

14379

}

14380

14381

static SDValue isScalarToVec(SDValue Op) {

14382

if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)

14383

return Op;

14384

if (Op.getOpcode() != ISD::BITCAST)

14385

return SDValue();

14386

Op = Op.getOperand(0);

14387

if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)

14388

return Op;

14389

return SDValue();

14390

}

14391

14392

static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,

14393

int LHSMaxIdx, int RHSMinIdx,

14394

int RHSMaxIdx, int HalfVec) {

14395

for (int i = 0, e = ShuffV.size(); i < e; i++) {

14396

int Idx = ShuffV[i];

14397

if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))

14398

ShuffV[i] += HalfVec;

14399

}

14400

return;

14401

}

14402

14403

// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if

14404

// the original is:

14405

// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))

14406

// In such a case, just change the shuffle mask to extract the element

14407

// from the permuted index.

14408

static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {

14409

SDLoc dl(OrigSToV);

14410

EVT VT = OrigSToV.getValueType();

14411

assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__))

14412

"Expecting a SCALAR_TO_VECTOR here")((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__));

14413

SDValue Input = OrigSToV.getOperand(0);

14414

14415

if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

14416

ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));

14417

SDValue OrigVector = Input.getOperand(0);

14418

14419

// Can't handle non-const element indices or different vector types

14420

// for the input to the extract and the output of the scalar_to_vector.

14421

if (Idx && VT == OrigVector.getValueType()) {

14422

SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);

14423

NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();

14424

return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);

14425

}

14426

}

14427

return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,

14428

OrigSToV.getOperand(0));

14429

}

14430

14431

// On little endian subtargets, combine shuffles such as:

14432

// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b

14433

// into:

14434

// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b

14435

// because the latter can be matched to a single instruction merge.

14436

// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute

14437

// to put the value into element zero. Adjust the shuffle mask so that the

14438

// vector can remain in permuted form (to prevent a swap prior to a shuffle).

14439

SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,

14440

SelectionDAG &DAG) const {

14441

SDValue LHS = SVN->getOperand(0);

14442

SDValue RHS = SVN->getOperand(1);

14443

auto Mask = SVN->getMask();

14444

int NumElts = LHS.getValueType().getVectorNumElements();

14445

SDValue Res(SVN, 0);

14446

SDLoc dl(SVN);

14447

14448

// None of these combines are useful on big endian systems since the ISA

14449

// already has a big endian bias.

14450

if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())

14451

return Res;

14452

14453

// If this is not a shuffle of a shuffle and the first element comes from

14454

// the second vector, canonicalize to the commuted form. This will make it

14455

// more likely to match one of the single instruction patterns.

14456

if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&

14457

RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {

14458

std::swap(LHS, RHS);

14459

Res = DAG.getCommutedVectorShuffle(*SVN);

14460

Mask = cast<ShuffleVectorSDNode>(Res)->getMask();

14461

}

14462

14463

// Adjust the shuffle mask if either input vector comes from a

14464

// SCALAR_TO_VECTOR and keep the respective input vector in permuted

14465

// form (to prevent the need for a swap).

14466

SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());

14467

SDValue SToVLHS = isScalarToVec(LHS);

14468

SDValue SToVRHS = isScalarToVec(RHS);

14469

if (SToVLHS || SToVRHS) {

14470

int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()

14471

: SToVRHS.getValueType().getVectorNumElements();

14472

int NumEltsOut = ShuffV.size();

14473

14474

// Initially assume that neither input is permuted. These will be adjusted

14475

// accordingly if either input is.

14476

int LHSMaxIdx = -1;

14477

int RHSMinIdx = -1;

14478

int RHSMaxIdx = -1;

14479

int HalfVec = LHS.getValueType().getVectorNumElements() / 2;

14480

14481

// Get the permuted scalar to vector nodes for the source(s) that come from

14482

// ISD::SCALAR_TO_VECTOR.

14483

if (SToVLHS) {

14484

// Set up the values for the shuffle vector fixup.

14485

LHSMaxIdx = NumEltsOut / NumEltsIn;

14486

SToVLHS = getSToVPermuted(SToVLHS, DAG);

14487

if (SToVLHS.getValueType() != LHS.getValueType())

14488

SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);

14489

LHS = SToVLHS;

14490

}

14491

if (SToVRHS) {

14492

RHSMinIdx = NumEltsOut;

14493

RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;

14494

SToVRHS = getSToVPermuted(SToVRHS, DAG);

14495

if (SToVRHS.getValueType() != RHS.getValueType())

14496

SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);

14497

RHS = SToVRHS;

14498

}

14499

14500

// Fix up the shuffle mask to reflect where the desired element actually is.

14501

// The minimum and maximum indices that correspond to element zero for both

14502

// the LHS and RHS are computed and will control which shuffle mask entries

14503

// are to be changed. For example, if the RHS is permuted, any shuffle mask

14504

// entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by

14505

// HalfVec to refer to the corresponding element in the permuted vector.

14506

fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,

14507

HalfVec);

14508

Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

14509

14510

// We may have simplified away the shuffle. We won't be able to do anything

14511

// further with it here.

14512

if (!isa<ShuffleVectorSDNode>(Res))

14513

return Res;

14514

Mask = cast<ShuffleVectorSDNode>(Res)->getMask();

14515

}

14516

14517

// The common case after we commuted the shuffle is that the RHS is a splat

14518

// and we have elements coming in from the splat at indices that are not

14519

// conducive to using a merge.

14520

// Example:

14521

// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>

14522

if (!isSplatBV(RHS))

14523

return Res;

14524

14525

// We are looking for a mask such that all even elements are from

14526

// one vector and all odd elements from the other.

14527

if (!isAlternatingShuffMask(Mask, NumElts))

14528

return Res;

14529

14530

// Adjust the mask so we are pulling in the same index from the splat

14531

// as the index from the interesting vector in consecutive elements.

14532

// Example (even elements from first vector):

14533

// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>

14534

if (Mask[0] < NumElts)

14535

for (int i = 1, e = Mask.size(); i < e; i += 2)

14536

ShuffV[i] = (ShuffV[i - 1] + NumElts);

14537

// Example (odd elements from first vector):

14538

// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>

14539

else

14540

for (int i = 0, e = Mask.size(); i < e; i += 2)

14541

ShuffV[i] = (ShuffV[i + 1] + NumElts);

14542

14543

// If the RHS has undefs, we need to remove them since we may have created

14544

// a shuffle that adds those instead of the splat value.

14545

SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();

14546

RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);

14547

14548

Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

14549

return Res;

14550

}

14551

14552

SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,

14553

LSBaseSDNode *LSBase,

14554

DAGCombinerInfo &DCI) const {

14555

assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__))

14556

"Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__));

14557

14558

auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {

14559

auto Mask = SVN->getMask();

14560

int i = 0;

14561

auto I = Mask.rbegin();

14562

auto E = Mask.rend();

14563

14564

for (; I != E; ++I) {

14565

if (*I != i)

14566

return false;

14567

i++;

14568

}

14569

return true;

14570

};

14571

14572

SelectionDAG &DAG = DCI.DAG;

14573

EVT VT = SVN->getValueType(0);

14574

14575

if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())

14576

return SDValue();

14577

14578

// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.

14579

// See comment in PPCVSXSwapRemoval.cpp.

14580

// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.

14581

if (!Subtarget.hasP9Vector())

14582

return SDValue();

14583

14584

if(!IsElementReverse(SVN))

14585

return SDValue();

14586

14587

if (LSBase->getOpcode() == ISD::LOAD) {

14588

SDLoc dl(SVN);

14589

SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};

14590

return DAG.getMemIntrinsicNode(

14591

PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,

14592

LSBase->getMemoryVT(), LSBase->getMemOperand());

14593

}

14594

14595

if (LSBase->getOpcode() == ISD::STORE) {

14596

SDLoc dl(LSBase);

14597

SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),

14598

LSBase->getBasePtr()};

14599

return DAG.getMemIntrinsicNode(

14600

PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,

14601

LSBase->getMemoryVT(), LSBase->getMemOperand());

14602

}

14603

14604

llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14604);

14605

}

14606

14607

SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

14608

DAGCombinerInfo &DCI) const {

14609

SelectionDAG &DAG = DCI.DAG;

14610

SDLoc dl(N);

14611

switch (N->getOpcode()) {

14612

default: break;

14613

case ISD::ADD:

14614

return combineADD(N, DCI);

14615

case ISD::SHL:

14616

return combineSHL(N, DCI);

14617

case ISD::SRA:

14618

return combineSRA(N, DCI);

14619

case ISD::SRL:

14620

return combineSRL(N, DCI);

14621

case ISD::MUL:

14622

return combineMUL(N, DCI);

14623

case ISD::FMA:

14624

case PPCISD::FNMSUB:

14625

return combineFMALike(N, DCI);

14626

case PPCISD::SHL:

14627

if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.

14628

return N->getOperand(0);

14629

break;

14630

case PPCISD::SRL:

14631

if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.

14632

return N->getOperand(0);

14633

break;

14634

case PPCISD::SRA:

14635

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {

14636

if (C->isNullValue() || // 0 >>s V -> 0.

14637

C->isAllOnesValue()) // -1 >>s V -> -1.

14638

return N->getOperand(0);

14639

}

14640

break;

14641

case ISD::SIGN_EXTEND:

14642

case ISD::ZERO_EXTEND:

14643

case ISD::ANY_EXTEND:

14644

return DAGCombineExtBoolTrunc(N, DCI);

14645

case ISD::TRUNCATE:

14646

return combineTRUNCATE(N, DCI);

14647

case ISD::SETCC:

14648

if (SDValue CSCC = combineSetCC(N, DCI))

14649

return CSCC;

14650

LLVM_FALLTHROUGH[[gnu::fallthrough]];

14651

case ISD::SELECT_CC:

14652

return DAGCombineTruncBoolExt(N, DCI);

14653

case ISD::SINT_TO_FP:

14654

case ISD::UINT_TO_FP:

14655

return combineFPToIntToFP(N, DCI);

14656

case ISD::VECTOR_SHUFFLE:

14657

if (ISD::isNormalLoad(N->getOperand(0).getNode())) {

14658

LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));

14659

return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);

14660

}

14661

return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);

14662

case ISD::STORE: {

14663

14664

EVT Op1VT = N->getOperand(1).getValueType();

14665

unsigned Opcode = N->getOperand(1).getOpcode();

14666

14667

if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {

14668

SDValue Val= combineStoreFPToInt(N, DCI);

14669

if (Val)

14670

return Val;

14671

}

14672

14673

if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {

14674

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));

14675

SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);

14676

if (Val)

14677

return Val;

14678

}

14679

14680

// Turn STORE (BSWAP) -> sthbrx/stwbrx.

14681

if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&

14682

N->getOperand(1).getNode()->hasOneUse() &&

14683

(Op1VT == MVT::i32 || Op1VT == MVT::i16 ||

14684

(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {

14685

14686

// STBRX can only handle simple types and it makes no sense to store less

14687

// two bytes in byte-reversed order.

14688

EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();

14689

if (mVT.isExtended() || mVT.getSizeInBits() < 16)

14690

break;

14691

14692

SDValue BSwapOp = N->getOperand(1).getOperand(0);

14693

// Do an any-extend to 32-bits if this is a half-word input.

14694

if (BSwapOp.getValueType() == MVT::i16)

14695

BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);

14696

14697

// If the type of BSWAP operand is wider than stored memory width

14698

// it need to be shifted to the right side before STBRX.

14699

if (Op1VT.bitsGT(mVT)) {

14700

int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();

14701

BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,

14702

DAG.getConstant(Shift, dl, MVT::i32));

14703

// Need to truncate if this is a bswap of i64 stored as i32/i16.

14704

if (Op1VT == MVT::i64)

14705

BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);

14706

}

14707

14708

SDValue Ops[] = {

14709

N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)

14710

};

14711

return

14712

DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),

14713

Ops, cast<StoreSDNode>(N)->getMemoryVT(),

14714

cast<StoreSDNode>(N)->getMemOperand());

14715

}

14716

14717

// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>

14718

// So it can increase the chance of CSE constant construction.

14719

if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&

14720

isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {

14721

// Need to sign-extended to 64-bits to handle negative values.

14722

EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();

14723

uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),

14724

MemVT.getSizeInBits());

14725

SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);

14726

14727

// DAG.getTruncStore() can't be used here because it doesn't accept

14728

// the general (base + offset) addressing mode.

14729

// So we use UpdateNodeOperands and setTruncatingStore instead.

14730

DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),

14731

N->getOperand(3));

14732

cast<StoreSDNode>(N)->setTruncatingStore(true);

14733

return SDValue(N, 0);

14734

}

14735

14736

// For little endian, VSX stores require generating xxswapd/lxvd2x.

14737

// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.

14738

if (Op1VT.isSimple()) {

14739

MVT StoreVT = Op1VT.getSimpleVT();

14740

if (Subtarget.needsSwapsForVSXMemOps() &&

14741

(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||

14742

StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))

14743

return expandVSXStoreForLE(N, DCI);

14744

}

14745

break;

14746

}

14747

case ISD::LOAD: {

14748

LoadSDNode *LD = cast<LoadSDNode>(N);

14749

EVT VT = LD->getValueType(0);

14750

14751

// For little endian, VSX loads require generating lxvd2x/xxswapd.

14752

// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.

14753

if (VT.isSimple()) {

14754

MVT LoadVT = VT.getSimpleVT();

14755

if (Subtarget.needsSwapsForVSXMemOps() &&

14756

(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||

14757

LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))

14758

return expandVSXLoadForLE(N, DCI);

14759

}

14760

14761

// We sometimes end up with a 64-bit integer load, from which we extract

14762

// two single-precision floating-point numbers. This happens with

14763

// std::complex<float>, and other similar structures, because of the way we

14764

// canonicalize structure copies. However, if we lack direct moves,

14765

// then the final bitcasts from the extracted integer values to the

14766

// floating-point numbers turn into store/load pairs. Even with direct moves,

14767

// just loading the two floating-point numbers is likely better.

14768

auto ReplaceTwoFloatLoad = [&]() {

14769

if (VT != MVT::i64)

14770

return false;

14771

14772

if (LD->getExtensionType() != ISD::NON_EXTLOAD ||

14773

LD->isVolatile())

14774

return false;

14775

14776

// We're looking for a sequence like this:

14777

// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64

14778

// t16: i64 = srl t13, Constant:i32<32>

14779

// t17: i32 = truncate t16

14780

// t18: f32 = bitcast t17

14781

// t19: i32 = truncate t13

14782

// t20: f32 = bitcast t19

14783

14784

if (!LD->hasNUsesOfValue(2, 0))

14785

return false;

14786

14787

auto UI = LD->use_begin();

14788

while (UI.getUse().getResNo() != 0) ++UI;

14789

SDNode *Trunc = *UI++;

14790

while (UI.getUse().getResNo() != 0) ++UI;

14791

SDNode *RightShift = *UI;

14792

if (Trunc->getOpcode() != ISD::TRUNCATE)

14793

std::swap(Trunc, RightShift);

14794

14795

if (Trunc->getOpcode() != ISD::TRUNCATE ||

14796

Trunc->getValueType(0) != MVT::i32 ||

14797

!Trunc->hasOneUse())

14798

return false;

14799

if (RightShift->getOpcode() != ISD::SRL ||

14800

!isa<ConstantSDNode>(RightShift->getOperand(1)) ||

14801

RightShift->getConstantOperandVal(1) != 32 ||

14802

!RightShift->hasOneUse())

14803

return false;

14804

14805

SDNode *Trunc2 = *RightShift->use_begin();

14806

if (Trunc2->getOpcode() != ISD::TRUNCATE ||

14807

Trunc2->getValueType(0) != MVT::i32 ||

14808

!Trunc2->hasOneUse())

14809

return false;

14810

14811

SDNode *Bitcast = *Trunc->use_begin();

14812

SDNode *Bitcast2 = *Trunc2->use_begin();

14813

14814

if (Bitcast->getOpcode() != ISD::BITCAST ||

14815

Bitcast->getValueType(0) != MVT::f32)

14816

return false;

14817

if (Bitcast2->getOpcode() != ISD::BITCAST ||

14818

Bitcast2->getValueType(0) != MVT::f32)

14819

return false;

14820

14821

if (Subtarget.isLittleEndian())

14822

std::swap(Bitcast, Bitcast2);

14823

14824

// Bitcast has the second float (in memory-layout order) and Bitcast2

14825

// has the first one.

14826

14827

SDValue BasePtr = LD->getBasePtr();

14828

if (LD->isIndexed()) {

14829

assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__))

14830

"Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__));

14831

BasePtr =

14832

DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

14833

LD->getOffset());

14834

}

14835

14836

auto MMOFlags =

14837

LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;

14838

SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,

14839

LD->getPointerInfo(), LD->getAlignment(),

14840

MMOFlags, LD->getAAInfo());

14841

SDValue AddPtr =

14842

DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),

14843

BasePtr, DAG.getIntPtrConstant(4, dl));

14844

SDValue FloatLoad2 = DAG.getLoad(

14845

MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,

14846

LD->getPointerInfo().getWithOffset(4),

14847

MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());

14848

14849

if (LD->isIndexed()) {

14850

// Note that DAGCombine should re-form any pre-increment load(s) from

14851

// what is produced here if that makes sense.

14852

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);

14853

}

14854

14855

DCI.CombineTo(Bitcast2, FloatLoad);

14856

DCI.CombineTo(Bitcast, FloatLoad2);

14857

14858

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),

14859

SDValue(FloatLoad2.getNode(), 1));

14860

return true;

14861

};

14862

14863

if (ReplaceTwoFloatLoad())

14864

return SDValue(N, 0);

14865

14866

EVT MemVT = LD->getMemoryVT();

14867

Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());

14868

Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);

14869

if (LD->isUnindexed() && VT.isVector() &&

14870

((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&

14871

// P8 and later hardware should just use LOAD.

14872

!Subtarget.hasP8Vector() &&

14873

(VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||

14874

VT == MVT::v4f32))) &&

14875

LD->getAlign() < ABIAlignment) {

14876

// This is a type-legal unaligned Altivec load.

14877

SDValue Chain = LD->getChain();

14878

SDValue Ptr = LD->getBasePtr();

14879

bool isLittleEndian = Subtarget.isLittleEndian();

14880

14881

// This implements the loading of unaligned vectors as described in

14882

// the venerable Apple Velocity Engine overview. Specifically:

14883

// https://developer.apple.com/hardwaredrivers/ve/alignment.html

14884

// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html

14885

//

14886

// The general idea is to expand a sequence of one or more unaligned

14887

// loads into an alignment-based permutation-control instruction (lvsl

14888

// or lvsr), a series of regular vector loads (which always truncate

14889

// their input address to an aligned address), and a series of

14890

// permutations. The results of these permutations are the requested

14891

// loaded values. The trick is that the last "extra" load is not taken

14892

// from the address you might suspect (sizeof(vector) bytes after the

14893

// last requested load), but rather sizeof(vector) - 1 bytes after the

14894

// last requested vector. The point of this is to avoid a page fault if

14895

// the base address happened to be aligned. This works because if the

14896

// base address is aligned, then adding less than a full vector length

14897

// will cause the last vector in the sequence to be (re)loaded.

14898

// Otherwise, the next vector will be fetched as you might suspect was

14899

// necessary.

14900

14901

// We might be able to reuse the permutation generation from

14902

// a different base address offset from this one by an aligned amount.

14903

// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this

14904

// optimization later.

14905

Intrinsic::ID Intr, IntrLD, IntrPerm;

14906

MVT PermCntlTy, PermTy, LDTy;

14907

Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr

14908

: Intrinsic::ppc_altivec_lvsl;

14909

IntrLD = Intrinsic::ppc_altivec_lvx;

14910

IntrPerm = Intrinsic::ppc_altivec_vperm;

14911

PermCntlTy = MVT::v16i8;

14912

PermTy = MVT::v4i32;

14913

LDTy = MVT::v4i32;

14914

14915

SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);

14916

14917

// Create the new MMO for the new base load. It is like the original MMO,

14918

// but represents an area in memory almost twice the vector size centered

14919

// on the original address. If the address is unaligned, we might start

14920

// reading up to (sizeof(vector)-1) bytes below the address of the

14921

// original unaligned load.

14922

MachineFunction &MF = DAG.getMachineFunction();

14923

MachineMemOperand *BaseMMO =

14924

MF.getMachineMemOperand(LD->getMemOperand(),

14925

-(long)MemVT.getStoreSize()+1,

14926

2*MemVT.getStoreSize()-1);

14927

14928

// Create the new base load.

14929

SDValue LDXIntID =

14930

DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));

14931

SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };

14932

SDValue BaseLoad =

14933

DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,

14934

DAG.getVTList(PermTy, MVT::Other),

14935

BaseLoadOps, LDTy, BaseMMO);

14936

14937

// Note that the value of IncOffset (which is provided to the next

14938

// load's pointer info offset value, and thus used to calculate the

14939

// alignment), and the value of IncValue (which is actually used to

14940

// increment the pointer value) are different! This is because we

14941

// require the next load to appear to be aligned, even though it

14942

// is actually offset from the base pointer by a lesser amount.

14943

int IncOffset = VT.getSizeInBits() / 8;

14944

int IncValue = IncOffset;

14945

14946

// Walk (both up and down) the chain looking for another load at the real

14947

// (aligned) offset (the alignment of the other load does not matter in

14948

// this case). If found, then do not use the offset reduction trick, as

14949

// that will prevent the loads from being later combined (as they would

14950

// otherwise be duplicates).

14951

if (!findConsecutiveLoad(LD, DAG))

14952

--IncValue;

14953

14954

SDValue Increment =

14955

DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));

14956

Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);

14957

14958

MachineMemOperand *ExtraMMO =

14959

MF.getMachineMemOperand(LD->getMemOperand(),

14960

1, 2*MemVT.getStoreSize()-1);

14961

SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };

14962

SDValue ExtraLoad =

14963

DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,

14964

DAG.getVTList(PermTy, MVT::Other),

14965

ExtraLoadOps, LDTy, ExtraMMO);

14966

14967

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

14968

BaseLoad.getValue(1), ExtraLoad.getValue(1));

14969

14970

// Because vperm has a big-endian bias, we must reverse the order

14971

// of the input vectors and complement the permute control vector

14972

// when generating little endian code. We have already handled the

14973

// latter by using lvsr instead of lvsl, so just reverse BaseLoad

14974

// and ExtraLoad here.

14975

SDValue Perm;

14976

if (isLittleEndian)

14977

Perm = BuildIntrinsicOp(IntrPerm,

14978

ExtraLoad, BaseLoad, PermCntl, DAG, dl);

14979

else

14980

Perm = BuildIntrinsicOp(IntrPerm,

14981

BaseLoad, ExtraLoad, PermCntl, DAG, dl);

14982

14983

if (VT != PermTy)

14984

Perm = Subtarget.hasAltivec()

14985

? DAG.getNode(ISD::BITCAST, dl, VT, Perm)

14986

: DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,

14987

DAG.getTargetConstant(1, dl, MVT::i64));

14988

// second argument is 1 because this rounding

14989

// is always exact.

14990

14991

// The output of the permutation is our loaded result, the TokenFactor is

14992

// our new chain.

14993

DCI.CombineTo(N, Perm, TF);

14994

return SDValue(N, 0);

14995

}

14996

}

14997

break;

14998

case ISD::INTRINSIC_WO_CHAIN: {

14999

bool isLittleEndian = Subtarget.isLittleEndian();

15000

unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();

15001

Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr

15002

: Intrinsic::ppc_altivec_lvsl);

15003

if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {

15004

SDValue Add = N->getOperand(1);

15005

15006

int Bits = 4 /* 16 byte alignment */;

15007

15008

if (DAG.MaskedValueIsZero(Add->getOperand(1),

15009

APInt::getAllOnesValue(Bits /* alignment */)

15010

.zext(Add.getScalarValueSizeInBits()))) {

15011

SDNode *BasePtr = Add->getOperand(0).getNode();

15012

for (SDNode::use_iterator UI = BasePtr->use_begin(),

15013

UE = BasePtr->use_end();

15014

UI != UE; ++UI) {

15015

if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15016

cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==

15017

IID) {

15018

// We've found another LVSL/LVSR, and this address is an aligned

15019

// multiple of that one. The results will be the same, so use the

15020

// one we've just found instead.

15021

15022

return SDValue(*UI, 0);

15023

}

15024

}

15025

}

15026

15027

if (isa<ConstantSDNode>(Add->getOperand(1))) {

15028

SDNode *BasePtr = Add->getOperand(0).getNode();

15029

for (SDNode::use_iterator UI = BasePtr->use_begin(),

15030

UE = BasePtr->use_end(); UI != UE; ++UI) {

15031

if (UI->getOpcode() == ISD::ADD &&

15032

isa<ConstantSDNode>(UI->getOperand(1)) &&

15033

(cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -

15034

cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %

15035

(1ULL << Bits) == 0) {

15036

SDNode *OtherAdd = *UI;

15037

for (SDNode::use_iterator VI = OtherAdd->use_begin(),

15038

VE = OtherAdd->use_end(); VI != VE; ++VI) {

15039

if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15040

cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {

15041

return SDValue(*VI, 0);

15042

}

15043

}

15044

}

15045

}

15046

}

15047

}

15048

15049

// Combine vmaxsw/h/b(a, a's negation) to abs(a)

15050

// Expose the vabsduw/h/b opportunity for down stream

15051

if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&

15052

(IID == Intrinsic::ppc_altivec_vmaxsw ||

15053

IID == Intrinsic::ppc_altivec_vmaxsh ||

15054

IID == Intrinsic::ppc_altivec_vmaxsb)) {

15055

SDValue V1 = N->getOperand(1);

15056

SDValue V2 = N->getOperand(2);

15057

if ((V1.getSimpleValueType() == MVT::v4i32 ||

15058

V1.getSimpleValueType() == MVT::v8i16 ||

15059

V1.getSimpleValueType() == MVT::v16i8) &&

15060

V1.getSimpleValueType() == V2.getSimpleValueType()) {

15061

// (0-a, a)

15062

if (V1.getOpcode() == ISD::SUB &&

15063

ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&

15064

V1.getOperand(1) == V2) {

15065

return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);

15066

}

15067

// (a, 0-a)

15068

if (V2.getOpcode() == ISD::SUB &&

15069

ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&

15070

V2.getOperand(1) == V1) {

15071

return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);

15072

}

15073

// (x-y, y-x)

15074

if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&

15075

V1.getOperand(0) == V2.getOperand(1) &&

15076

V1.getOperand(1) == V2.getOperand(0)) {

15077

return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);

15078

}

15079

}

15080

}

15081

}

15082

15083

break;

15084

case ISD::INTRINSIC_W_CHAIN:

15085

// For little endian, VSX loads require generating lxvd2x/xxswapd.

15086

// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.

15087

if (Subtarget.needsSwapsForVSXMemOps()) {

15088

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

15089

default:

15090

break;

15091

case Intrinsic::ppc_vsx_lxvw4x:

15092

case Intrinsic::ppc_vsx_lxvd2x:

15093

return expandVSXLoadForLE(N, DCI);

15094

}

15095

}

15096

break;

15097

case ISD::INTRINSIC_VOID:

15098

// For little endian, VSX stores require generating xxswapd/stxvd2x.

15099

// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.

15100

if (Subtarget.needsSwapsForVSXMemOps()) {

15101

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

15102

default:

15103

break;

15104

case Intrinsic::ppc_vsx_stxvw4x:

15105

case Intrinsic::ppc_vsx_stxvd2x:

15106

return expandVSXStoreForLE(N, DCI);

15107

}

15108

}

15109

break;

15110

case ISD::BSWAP:

15111

// Turn BSWAP (LOAD) -> lhbrx/lwbrx.

15112

if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&

15113

N->getOperand(0).hasOneUse() &&

15114

(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||

15115

(Subtarget.hasLDBRX() && Subtarget.isPPC64() &&

15116

N->getValueType(0) == MVT::i64))) {

15117

SDValue Load = N->getOperand(0);

15118

LoadSDNode *LD = cast<LoadSDNode>(Load);

15119

// Create the byte-swapping load.

15120

SDValue Ops[] = {

15121

LD->getChain(), // Chain

15122

LD->getBasePtr(), // Ptr

15123

DAG.getValueType(N->getValueType(0)) // VT

15124

};

15125

SDValue BSLoad =

15126

DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,

15127

DAG.getVTList(N->getValueType(0) == MVT::i64 ?

15128

MVT::i64 : MVT::i32, MVT::Other),

15129

Ops, LD->getMemoryVT(), LD->getMemOperand());

15130

15131

// If this is an i16 load, insert the truncate.

15132

SDValue ResVal = BSLoad;

15133

if (N->getValueType(0) == MVT::i16)

15134

ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);

15135

15136

// First, combine the bswap away. This makes the value produced by the

15137

// load dead.

15138

DCI.CombineTo(N, ResVal);

15139

15140

// Next, combine the load away, we give it a bogus result value but a real

15141

// chain result. The result value is dead because the bswap is dead.

15142

DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));

15143

15144

// Return N so it doesn't get rechecked!

15145

return SDValue(N, 0);

15146

}

15147

break;

15148

case PPCISD::VCMP:

15149

// If a VCMPo node already exists with exactly the same operands as this

15150

// node, use its result instead of this node (VCMPo computes both a CR6 and

15151

// a normal output).

15152

//

15153

if (!N->getOperand(0).hasOneUse() &&

15154

!N->getOperand(1).hasOneUse() &&

15155

!N->getOperand(2).hasOneUse()) {

15156

15157

// Scan all of the users of the LHS, looking for VCMPo's that match.

15158

SDNode *VCMPoNode = nullptr;

15159

15160

SDNode *LHSN = N->getOperand(0).getNode();

15161

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

15162

UI != E; ++UI)

15163

if (UI->getOpcode() == PPCISD::VCMPo &&

15164

UI->getOperand(1) == N->getOperand(1) &&

15165

UI->getOperand(2) == N->getOperand(2) &&

15166

UI->getOperand(0) == N->getOperand(0)) {

15167

VCMPoNode = *UI;

15168

break;

15169

}

15170

15171

// If there is no VCMPo node, or if the flag value has a single use, don't

15172

// transform this.

15173

if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))

15174

break;

15175

15176

// Look at the (necessarily single) use of the flag value. If it has a

15177

// chain, this transformation is more complex. Note that multiple things

15178

// could use the value result, which we should ignore.

15179

SDNode *FlagUser = nullptr;

15180

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

15181

FlagUser == nullptr; ++UI) {

15182

assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15182, __PRETTY_FUNCTION__));

15183

SDNode *User = *UI;

15184

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

15185

if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {

15186

FlagUser = User;

15187

break;

15188

}

15189

}

15190

}

15191

15192

// If the user is a MFOCRF instruction, we know this is safe.

15193

// Otherwise we give up for right now.

15194

if (FlagUser->getOpcode() == PPCISD::MFOCRF)

15195

return SDValue(VCMPoNode, 0);

15196

}

15197

break;

15198

case ISD::BRCOND: {

15199

SDValue Cond = N->getOperand(1);

15200

SDValue Target = N->getOperand(2);

15201

15202

if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15203

cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==

15204

Intrinsic::loop_decrement) {

15205

15206

// We now need to make the intrinsic dead (it cannot be instruction

15207

// selected).

15208

DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));

15209

assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__))

15210

"Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__));

15211

15212

return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,

15213

N->getOperand(0), Target);

15214

}

15215

}

15216

break;

15217

case ISD::BR_CC: {

15218

// If this is a branch on an altivec predicate comparison, lower this so

15219

// that we don't have to do a MFOCRF: instead, branch directly on CR6. This

15220

// lowering is done pre-legalize, because the legalizer lowers the predicate

15221

// compare down to code that is difficult to reassemble.

15222

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

15223

SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);

15224

15225

// Sometimes the promoted value of the intrinsic is ANDed by some non-zero

15226

// value. If so, pass-through the AND to get to the intrinsic.

15227

if (LHS.getOpcode() == ISD::AND &&

15228

LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15229

cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==

15230

Intrinsic::loop_decrement &&

15231

isa<ConstantSDNode>(LHS.getOperand(1)) &&

15232

!isNullConstant(LHS.getOperand(1)))

15233

LHS = LHS.getOperand(0);

15234

15235

if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&

15236

cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==

15237

Intrinsic::loop_decrement &&

15238

isa<ConstantSDNode>(RHS)) {

15239

assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__))

15240

"Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__));

15241

15242

unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();

15243

bool isBDNZ = (CC == ISD::SETEQ && Val) ||

15244

(CC == ISD::SETNE && !Val);

15245

15246

// We now need to make the intrinsic dead (it cannot be instruction

15247

// selected).

15248

DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));

15249

assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__))

15250

"Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__));

15251

15252

return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,

15253

N->getOperand(0), N->getOperand(4));

15254

}

15255

15256

int CompareOpc;

15257

bool isDot;

15258

15259

if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

15260

isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&

15261

getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {

15262

assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15262, __PRETTY_FUNCTION__));

15263

15264

// If this is a comparison against something other than 0/1, then we know

15265

// that the condition is never/always true.

15266

unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();

15267

if (Val != 0 && Val != 1) {

15268

if (CC == ISD::SETEQ) // Cond never true, remove branch.

15269

return N->getOperand(0);

15270

// Always !=, turn it into an unconditional branch.

15271

return DAG.getNode(ISD::BR, dl, MVT::Other,

15272

N->getOperand(0), N->getOperand(4));

15273

}

15274

15275

bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);

15276

15277

// Create the PPCISD altivec 'dot' comparison node.

15278

SDValue Ops[] = {

15279

LHS.getOperand(2), // LHS of compare

15280

LHS.getOperand(3), // RHS of compare

15281

DAG.getConstant(CompareOpc, dl, MVT::i32)

15282

};

15283

EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };

15284

SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

15285

15286

// Unpack the result based on how the target uses it.

15287

PPC::Predicate CompOpc;

15288

switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {

15289

default: // Can't happen, don't crash on invalid number though.

15290

case 0: // Branch on the value of the EQ bit of CR6.

15291

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;

15292

break;

15293

case 1: // Branch on the inverted value of the EQ bit of CR6.

15294

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;

15295

break;

15296

case 2: // Branch on the value of the LT bit of CR6.

15297

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;

15298

break;

15299

case 3: // Branch on the inverted value of the LT bit of CR6.

15300

CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;

15301

break;

15302

}

15303

15304

return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),

15305

DAG.getConstant(CompOpc, dl, MVT::i32),

15306

DAG.getRegister(PPC::CR6, MVT::i32),

15307

N->getOperand(4), CompNode.getValue(1));

15308

}

15309

break;

15310

}

15311

case ISD::BUILD_VECTOR:

15312

return DAGCombineBuildVector(N, DCI);

15313

case ISD::ABS:

15314

return combineABS(N, DCI);

15315

case ISD::VSELECT:

15316

return combineVSelect(N, DCI);

15317

}

15318

15319

return SDValue();

15320

}

15321

15322

SDValue

15323

PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,

15324

SelectionDAG &DAG,

15325

SmallVectorImpl<SDNode *> &Created) const {

15326

// fold (sdiv X, pow2)

15327

EVT VT = N->getValueType(0);

15328

if (VT == MVT::i64 && !Subtarget.isPPC64())

15329

return SDValue();

15330

if ((VT != MVT::i32 && VT != MVT::i64) ||

15331

!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))

15332

return SDValue();

15333

15334

SDLoc DL(N);

15335

SDValue N0 = N->getOperand(0);

15336

15337

bool IsNegPow2 = (-Divisor).isPowerOf2();

15338

unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();

15339

SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);

15340

15341

SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);

15342

Created.push_back(Op.getNode());

15343

15344

if (IsNegPow2) {

15345

Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);

15346

Created.push_back(Op.getNode());

15347

}

15348

15349

return Op;

15350

}

15351

15352

//===----------------------------------------------------------------------===//

15353

// Inline Assembly Support

15354

//===----------------------------------------------------------------------===//

15355

15356

void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

15357

KnownBits &Known,

15358

const APInt &DemandedElts,

15359

const SelectionDAG &DAG,

15360

unsigned Depth) const {

15361

Known.resetAll();

15362

switch (Op.getOpcode()) {

15363

default: break;

15364

case PPCISD::LBRX: {

15365

// lhbrx is known to have the top bits cleared out.

15366

if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)

15367

Known.Zero = 0xFFFF0000;

15368

break;

15369

}

15370

case ISD::INTRINSIC_WO_CHAIN: {

15371

switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {

15372

default: break;

15373

case Intrinsic::ppc_altivec_vcmpbfp_p:

15374

case Intrinsic::ppc_altivec_vcmpeqfp_p:

15375

case Intrinsic::ppc_altivec_vcmpequb_p:

15376

case Intrinsic::ppc_altivec_vcmpequh_p:

15377

case Intrinsic::ppc_altivec_vcmpequw_p:

15378

case Intrinsic::ppc_altivec_vcmpequd_p:

15379

case Intrinsic::ppc_altivec_vcmpequq_p:

15380

case Intrinsic::ppc_altivec_vcmpgefp_p:

15381

case Intrinsic::ppc_altivec_vcmpgtfp_p:

15382

case Intrinsic::ppc_altivec_vcmpgtsb_p:

15383

case Intrinsic::ppc_altivec_vcmpgtsh_p:

15384

case Intrinsic::ppc_altivec_vcmpgtsw_p:

15385

case Intrinsic::ppc_altivec_vcmpgtsd_p:

15386

case Intrinsic::ppc_altivec_vcmpgtsq_p:

15387

case Intrinsic::ppc_altivec_vcmpgtub_p:

15388

case Intrinsic::ppc_altivec_vcmpgtuh_p:

15389

case Intrinsic::ppc_altivec_vcmpgtuw_p:

15390

case Intrinsic::ppc_altivec_vcmpgtud_p:

15391

case Intrinsic::ppc_altivec_vcmpgtuq_p:

15392

Known.Zero = ~1U; // All bits but the low one are known to be zero.

15393

break;

15394

}

15395

}

15396

}

15397

}

15398

15399

Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {

15400

switch (Subtarget.getCPUDirective()) {

15401

default: break;

15402

case PPC::DIR_970:

15403

case PPC::DIR_PWR4:

15404

case PPC::DIR_PWR5:

15405

case PPC::DIR_PWR5X:

15406

case PPC::DIR_PWR6:

15407

case PPC::DIR_PWR6X:

15408

case PPC::DIR_PWR7:

15409

case PPC::DIR_PWR8:

15410

case PPC::DIR_PWR9:

15411

case PPC::DIR_PWR10:

15412

case PPC::DIR_PWR_FUTURE: {

15413

if (!ML)

15414

break;

15415

15416

if (!DisableInnermostLoopAlign32) {

15417

// If the nested loop is an innermost loop, prefer to a 32-byte alignment,

15418

// so that we can decrease cache misses and branch-prediction misses.

15419

// Actual alignment of the loop will depend on the hotness check and other

15420

// logic in alignBlocks.

15421

if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())

15422

return Align(32);

15423

}

15424

15425

const PPCInstrInfo *TII = Subtarget.getInstrInfo();

15426

15427

// For small loops (between 5 and 8 instructions), align to a 32-byte

15428

// boundary so that the entire loop fits in one instruction-cache line.

15429

uint64_t LoopSize = 0;

15430

for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)

15431

for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {

15432

LoopSize += TII->getInstSizeInBytes(*J);

15433

if (LoopSize > 32)

15434

break;

15435

}

15436

15437

if (LoopSize > 16 && LoopSize <= 32)

15438

return Align(32);

15439

15440

break;

15441

}

15442

}

15443

15444

return TargetLowering::getPrefLoopAlignment(ML);

15445

}

15446

15447

/// getConstraintType - Given a constraint, return the type of

15448

/// constraint it is for this target.

15449

PPCTargetLowering::ConstraintType

15450

PPCTargetLowering::getConstraintType(StringRef Constraint) const {

15451

if (Constraint.size() == 1) {

15452

switch (Constraint[0]) {

15453

default: break;

15454

case 'b':

15455

case 'r':

15456

case 'f':

15457

case 'd':

15458

case 'v':

15459

case 'y':

15460

return C_RegisterClass;

15461

case 'Z':

15462

// FIXME: While Z does indicate a memory constraint, it specifically

15463

// indicates an r+r address (used in conjunction with the 'y' modifier

15464

// in the replacement string). Currently, we're forcing the base

15465

// register to be r0 in the asm printer (which is interpreted as zero)

15466

// and forming the complete address in the second register. This is

15467

// suboptimal.

15468

return C_Memory;

15469

}

15470

} else if (Constraint == "wc") { // individual CR bits.

15471

return C_RegisterClass;

15472

} else if (Constraint == "wa" || Constraint == "wd" ||

15473

Constraint == "wf" || Constraint == "ws" ||

15474

Constraint == "wi" || Constraint == "ww") {

15475

return C_RegisterClass; // VSX registers.

15476

}

15477

return TargetLowering::getConstraintType(Constraint);

15478

}

15479

15480

/// Examine constraint type and operand type and determine a weight value.

15481

/// This object must already have been set up with the operand type

15482

/// and the current alternative constraint selected.

15483

TargetLowering::ConstraintWeight

15484

PPCTargetLowering::getSingleConstraintMatchWeight(

15485

AsmOperandInfo &info, const char *constraint) const {

15486

ConstraintWeight weight = CW_Invalid;

15487

Value *CallOperandVal = info.CallOperandVal;

15488

// If we don't have a value, we can't do a match,

15489

// but allow it at the lowest weight.

15490

if (!CallOperandVal)

15491

return CW_Default;

15492

Type *type = CallOperandVal->getType();

15493

15494

// Look at the constraint type.

15495

if (StringRef(constraint) == "wc" && type->isIntegerTy(1))

15496

return CW_Register; // an individual CR bit.

15497

else if ((StringRef(constraint) == "wa" ||

15498

StringRef(constraint) == "wd" ||

15499

StringRef(constraint) == "wf") &&

15500

type->isVectorTy())

15501

return CW_Register;

15502

else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))

15503

return CW_Register; // just hold 64-bit integers data.

15504

else if (StringRef(constraint) == "ws" && type->isDoubleTy())

15505

return CW_Register;

15506

else if (StringRef(constraint) == "ww" && type->isFloatTy())

15507

return CW_Register;

15508

15509

switch (*constraint) {

15510

default:

15511

weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);

15512

break;

15513

case 'b':

15514

if (type->isIntegerTy())

15515

weight = CW_Register;

15516

break;

15517

case 'f':

15518

if (type->isFloatTy())

15519

weight = CW_Register;

15520

break;

15521

case 'd':

15522

if (type->isDoubleTy())

15523

weight = CW_Register;

15524

break;

15525

case 'v':

15526

if (type->isVectorTy())

15527

weight = CW_Register;

15528

break;

15529

case 'y':

15530

weight = CW_Register;

15531

break;

15532

case 'Z':

15533

weight = CW_Memory;

15534

break;

15535

}

15536

return weight;

15537

}

15538

15539

std::pair<unsigned, const TargetRegisterClass *>

15540

PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

15541

StringRef Constraint,

15542

MVT VT) const {

15543

if (Constraint.size() == 1) {

15544

// GCC RS6000 Constraint Letters

15545

switch (Constraint[0]) {

15546

case 'b': // R1-R31

15547

if (VT == MVT::i64 && Subtarget.isPPC64())

15548

return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);

15549

return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);

15550

case 'r': // R0-R31

15551

if (VT == MVT::i64 && Subtarget.isPPC64())

15552

return std::make_pair(0U, &PPC::G8RCRegClass);

15553

return std::make_pair(0U, &PPC::GPRCRegClass);

15554

// 'd' and 'f' constraints are both defined to be "the floating point

15555

// registers", where one is for 32-bit and the other for 64-bit. We don't

15556

// really care overly much here so just give them all the same reg classes.

15557

case 'd':

15558

case 'f':

15559

if (Subtarget.hasSPE()) {

15560

if (VT == MVT::f32 || VT == MVT::i32)

15561

return std::make_pair(0U, &PPC::GPRCRegClass);

15562

if (VT == MVT::f64 || VT == MVT::i64)

15563

return std::make_pair(0U, &PPC::SPERCRegClass);

15564

} else {

15565

if (VT == MVT::f32 || VT == MVT::i32)

15566

return std::make_pair(0U, &PPC::F4RCRegClass);

15567

if (VT == MVT::f64 || VT == MVT::i64)

15568

return std::make_pair(0U, &PPC::F8RCRegClass);

15569

}

15570

break;

15571

case 'v':

15572

if (Subtarget.hasAltivec())

15573

return std::make_pair(0U, &PPC::VRRCRegClass);

15574

break;

15575

case 'y': // crrc

15576

return std::make_pair(0U, &PPC::CRRCRegClass);

15577

}

15578

} else if (Constraint == "wc" && Subtarget.useCRBits()) {

15579

// An individual CR bit.

15580

return std::make_pair(0U, &PPC::CRBITRCRegClass);

15581

} else if ((Constraint == "wa" || Constraint == "wd" ||

15582

Constraint == "wf" || Constraint == "wi") &&

15583

Subtarget.hasVSX()) {

15584

return std::make_pair(0U, &PPC::VSRCRegClass);

15585

} else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {

15586

if (VT == MVT::f32 && Subtarget.hasP8Vector())

15587

return std::make_pair(0U, &PPC::VSSRCRegClass);

15588

else

15589

return std::make_pair(0U, &PPC::VSFRCRegClass);

15590

}

15591

15592

// If we name a VSX register, we can't defer to the base class because it

15593

// will not recognize the correct register (their names will be VSL{0-31}

15594

// and V{0-31} so they won't match). So we match them here.

15595

if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {

15596

int VSNum = atoi(Constraint.data() + 3);

15597

assert(VSNum >= 0 && VSNum <= 63 &&((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__))

15598

"Attempted to access a vsr out of range")((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__));

15599

if (VSNum < 32)

15600

return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);

15601

return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);

15602

}

15603

std::pair<unsigned, const TargetRegisterClass *> R =

15604

TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

15605

15606

// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers

15607

// (which we call X[0-9]+). If a 64-bit value has been requested, and a

15608

// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent

15609

// register.

15610

// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use

15611

// the AsmName field from *RegisterInfo.td, then this would not be necessary.

15612

if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&

15613

PPC::GPRCRegClass.contains(R.first))

15614

return std::make_pair(TRI->getMatchingSuperReg(R.first,

15615

PPC::sub_32, &PPC::G8RCRegClass),

15616

&PPC::G8RCRegClass);

15617

15618

// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.

15619

if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {

15620

R.first = PPC::CR0;

15621

R.second = &PPC::CRRCRegClass;

15622

}

15623

15624

return R;

15625

}

15626

15627

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

15628

/// vector. If it is invalid, don't add anything to Ops.

15629

void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

15630

std::string &Constraint,

15631

std::vector<SDValue>&Ops,

15632

SelectionDAG &DAG) const {

15633

SDValue Result;

15634

15635

// Only support length 1 constraints.

15636

if (Constraint.length() > 1) return;

15637

15638

char Letter = Constraint[0];

15639

switch (Letter) {

15640

default: break;

15641

case 'I':

15642

case 'J':

15643

case 'K':

15644

case 'L':

15645

case 'M':

15646

case 'N':

15647

case 'O':

15648

case 'P': {

15649

ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);

15650

if (!CST) return; // Must be an immediate to match.

15651

SDLoc dl(Op);

15652

int64_t Value = CST->getSExtValue();

15653

EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative

15654

// numbers are printed as such.

15655

switch (Letter) {

15656

default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15656);

15657

case 'I': // "I" is a signed 16-bit constant.

15658

if (isInt<16>(Value))

15659

Result = DAG.getTargetConstant(Value, dl, TCVT);

15660

break;

15661

case 'J': // "J" is a constant with only the high-order 16 bits nonzero.

15662

if (isShiftedUInt<16, 16>(Value))

15663

Result = DAG.getTargetConstant(Value, dl, TCVT);

15664

break;

15665

case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.

15666

if (isShiftedInt<16, 16>(Value))

15667

Result = DAG.getTargetConstant(Value, dl, TCVT);

15668

break;

15669

case 'K': // "K" is a constant with only the low-order 16 bits nonzero.

15670

if (isUInt<16>(Value))

15671

Result = DAG.getTargetConstant(Value, dl, TCVT);

15672

break;

15673

case 'M': // "M" is a constant that is greater than 31.

15674

if (Value > 31)

15675

Result = DAG.getTargetConstant(Value, dl, TCVT);

15676

break;

15677

case 'N': // "N" is a positive constant that is an exact power of two.

15678

if (Value > 0 && isPowerOf2_64(Value))

15679

Result = DAG.getTargetConstant(Value, dl, TCVT);

15680

break;

15681

case 'O': // "O" is the constant zero.

15682

if (Value == 0)

15683

Result = DAG.getTargetConstant(Value, dl, TCVT);

15684

break;

15685

case 'P': // "P" is a constant whose negation is a signed 16-bit constant.

15686

if (isInt<16>(-Value))

15687

Result = DAG.getTargetConstant(Value, dl, TCVT);

15688

break;

15689

}

15690

break;

15691

}

15692

}

15693

15694

if (Result.getNode()) {

15695

Ops.push_back(Result);

15696

return;

15697

}

15698

15699

// Handle standard constraint letters.

15700

TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

15701

}

15702

15703

// isLegalAddressingMode - Return true if the addressing mode represented

15704

// by AM is legal for this target, for a load/store of the specified type.

15705

bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,

15706

const AddrMode &AM, Type *Ty,

15707

unsigned AS,

15708

Instruction *I) const {

15709

// Vector type r+i form is supported since power9 as DQ form. We don't check

15710

// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,

15711

// imm form is preferred and the offset can be adjusted to use imm form later

15712

// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and

15713

// max offset to check legal addressing mode, we should be a little aggressive

15714

// to contain other offsets for that LSRUse.

15715

if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())

15716

return false;

15717

15718

// PPC allows a sign-extended 16-bit immediate field.

15719

if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)

15720

return false;

15721

15722

// No global is ever allowed as a base.

15723

if (AM.BaseGV)

15724

return false;

15725

15726

// PPC only support r+r,

15727

switch (AM.Scale) {

15728

case 0: // "r+i" or just "i", depending on HasBaseReg.

15729

break;

15730

case 1:

15731

if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.

15732

return false;

15733

// Otherwise we have r+r or r+i.

15734

break;

15735

case 2:

15736

if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.

15737

return false;

15738

// Allow 2*r as r+r.

15739

break;

15740

default:

15741

// No other scales are supported.

15742

return false;

15743

}

15744

15745

return true;

15746

}

15747

15748

SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,

15749

SelectionDAG &DAG) const {

15750

MachineFunction &MF = DAG.getMachineFunction();

15751

MachineFrameInfo &MFI = MF.getFrameInfo();

15752

MFI.setReturnAddressIsTaken(true);

15753

15754

if (verifyReturnAddressArgumentIsConstant(Op, DAG))

15755

return SDValue();

15756

15757

SDLoc dl(Op);

15758

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

15759

15760

// Make sure the function does not optimize away the store of the RA to

15761

// the stack.

15762

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

15763

FuncInfo->setLRStoreRequired();

15764

bool isPPC64 = Subtarget.isPPC64();

15765

auto PtrVT = getPointerTy(MF.getDataLayout());

15766

15767

if (Depth > 0) {

15768

SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);

15769

SDValue Offset =

15770

DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,

15771

isPPC64 ? MVT::i64 : MVT::i32);

15772

return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),

15773

DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),

15774

MachinePointerInfo());

15775

}

15776

15777

// Just load the return address off the stack.

15778

SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);

15779

return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,

15780

MachinePointerInfo());

15781

}

15782

15783

SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,

15784

SelectionDAG &DAG) const {

15785

SDLoc dl(Op);

15786

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

15787

15788

MachineFunction &MF = DAG.getMachineFunction();

15789

MachineFrameInfo &MFI = MF.getFrameInfo();

15790

MFI.setFrameAddressIsTaken(true);

15791

15792

EVT PtrVT = getPointerTy(MF.getDataLayout());

15793

bool isPPC64 = PtrVT == MVT::i64;

15794

15795

// Naked functions never have a frame pointer, and so we use r1. For all

15796

// other functions, this decision must be delayed until during PEI.

15797

unsigned FrameReg;

15798

if (MF.getFunction().hasFnAttribute(Attribute::Naked))

15799

FrameReg = isPPC64 ? PPC::X1 : PPC::R1;

15800

else

15801

FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;

15802

15803

SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,

15804

PtrVT);

15805

while (Depth--)

15806

FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),

15807

FrameAddr, MachinePointerInfo());

15808

return FrameAddr;

15809

}

15810

15811

// FIXME? Maybe this could be a TableGen attribute on some registers and

15812

// this table could be generated automatically from RegInfo.

15813

Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,

15814

const MachineFunction &MF) const {

15815

bool isPPC64 = Subtarget.isPPC64();

15816

15817

bool is64Bit = isPPC64 && VT == LLT::scalar(64);

15818

if (!is64Bit && VT != LLT::scalar(32))

15819

report_fatal_error("Invalid register global variable type");

15820

15821

Register Reg = StringSwitch<Register>(RegName)

15822

.Case("r1", is64Bit ? PPC::X1 : PPC::R1)

15823

.Case("r2", isPPC64 ? Register() : PPC::R2)

15824

.Case("r13", (is64Bit ? PPC::X13 : PPC::R13))

15825

.Default(Register());

15826

15827

if (Reg)

15828

return Reg;

15829

report_fatal_error("Invalid register name global variable");

15830

}

15831

15832

bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {

15833

// 32-bit SVR4 ABI access everything as got-indirect.

15834

if (Subtarget.is32BitELFABI())

15835

return true;

15836

15837

// AIX accesses everything indirectly through the TOC, which is similar to

15838

// the GOT.

15839

if (Subtarget.isAIXABI())

15840

return true;

15841

15842

CodeModel::Model CModel = getTargetMachine().getCodeModel();

15843

// If it is small or large code model, module locals are accessed

15844

// indirectly by loading their address from .toc/.got.

15845

if (CModel == CodeModel::Small || CModel == CodeModel::Large)

15846

return true;

15847

15848

// JumpTable and BlockAddress are accessed as got-indirect.

15849

if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))

15850

return true;

15851

15852

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))

15853

return Subtarget.isGVIndirectSymbol(G->getGlobal());

15854

15855

return false;

15856

}

15857

15858

bool

15859

PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

15860

// The PowerPC target isn't yet aware of offsets.

15861

return false;

15862

}

15863

15864

bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

15865

const CallInst &I,

15866

MachineFunction &MF,

15867

unsigned Intrinsic) const {

15868

switch (Intrinsic) {

15869

case Intrinsic::ppc_altivec_lvx:

15870

case Intrinsic::ppc_altivec_lvxl:

15871

case Intrinsic::ppc_altivec_lvebx:

15872

case Intrinsic::ppc_altivec_lvehx:

15873

case Intrinsic::ppc_altivec_lvewx:

15874

case Intrinsic::ppc_vsx_lxvd2x:

15875

case Intrinsic::ppc_vsx_lxvw4x:

15876

case Intrinsic::ppc_vsx_lxvd2x_be:

15877

case Intrinsic::ppc_vsx_lxvw4x_be:

15878

case Intrinsic::ppc_vsx_lxvl:

15879

case Intrinsic::ppc_vsx_lxvll: {

15880

EVT VT;

15881

switch (Intrinsic) {

15882

case Intrinsic::ppc_altivec_lvebx:

15883

VT = MVT::i8;

15884

break;

15885

case Intrinsic::ppc_altivec_lvehx:

15886

VT = MVT::i16;

15887

break;

15888

case Intrinsic::ppc_altivec_lvewx:

15889

VT = MVT::i32;

15890

break;

15891

case Intrinsic::ppc_vsx_lxvd2x:

15892

case Intrinsic::ppc_vsx_lxvd2x_be:

15893

VT = MVT::v2f64;

15894

break;

15895

default:

15896

VT = MVT::v4i32;

15897

break;

15898

}

15899

15900

Info.opc = ISD::INTRINSIC_W_CHAIN;

15901

Info.memVT = VT;

15902

Info.ptrVal = I.getArgOperand(0);

15903

Info.offset = -VT.getStoreSize()+1;

15904

Info.size = 2*VT.getStoreSize()-1;

15905

Info.align = Align(1);

15906

Info.flags = MachineMemOperand::MOLoad;

15907

return true;

15908

}

15909

case Intrinsic::ppc_altivec_stvx:

15910

case Intrinsic::ppc_altivec_stvxl:

15911

case Intrinsic::ppc_altivec_stvebx:

15912

case Intrinsic::ppc_altivec_stvehx:

15913

case Intrinsic::ppc_altivec_stvewx:

15914

case Intrinsic::ppc_vsx_stxvd2x:

15915

case Intrinsic::ppc_vsx_stxvw4x:

15916

case Intrinsic::ppc_vsx_stxvd2x_be:

15917

case Intrinsic::ppc_vsx_stxvw4x_be:

15918

case Intrinsic::ppc_vsx_stxvl:

15919

case Intrinsic::ppc_vsx_stxvll: {

15920

EVT VT;

15921

switch (Intrinsic) {

15922

case Intrinsic::ppc_altivec_stvebx:

15923

VT = MVT::i8;

15924

break;

15925

case Intrinsic::ppc_altivec_stvehx:

15926

VT = MVT::i16;

15927

break;

15928

case Intrinsic::ppc_altivec_stvewx:

15929

VT = MVT::i32;

15930

break;

15931

case Intrinsic::ppc_vsx_stxvd2x:

15932

case Intrinsic::ppc_vsx_stxvd2x_be:

15933

VT = MVT::v2f64;

15934

break;

15935

default:

15936

VT = MVT::v4i32;

15937

break;

15938

}

15939

15940

Info.opc = ISD::INTRINSIC_VOID;

15941

Info.memVT = VT;

15942

Info.ptrVal = I.getArgOperand(1);

15943

Info.offset = -VT.getStoreSize()+1;

15944

Info.size = 2*VT.getStoreSize()-1;

15945

Info.align = Align(1);

15946

Info.flags = MachineMemOperand::MOStore;

15947

return true;

15948

}

15949

default:

15950

break;

15951

}

15952

15953

return false;

15954

}

15955

15956

/// It returns EVT::Other if the type should be determined using generic

15957

/// target-independent logic.

15958

EVT PPCTargetLowering::getOptimalMemOpType(

15959

const MemOp &Op, const AttributeList &FuncAttributes) const {

15960

if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {

15961

// We should use Altivec/VSX loads and stores when available. For unaligned

15962

// addresses, unaligned VSX loads are only fast starting with the P8.

15963

if (Subtarget.hasAltivec() && Op.size() >= 16 &&

15964

(Op.isAligned(Align(16)) ||

15965

((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))

15966

return MVT::v4i32;

15967

}

15968

15969

if (Subtarget.isPPC64()) {

15970

return MVT::i64;

15971

}

15972

15973

return MVT::i32;

15974

}

15975

15976

/// Returns true if it is beneficial to convert a load of a constant

15977

/// to just the constant itself.

15978

bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

15979

Type *Ty) const {

15980

assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15980, __PRETTY_FUNCTION__));

15981

15982

unsigned BitSize = Ty->getPrimitiveSizeInBits();

15983

return !(BitSize == 0 || BitSize > 64);

15984

}

15985

15986

bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {

15987

if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())

15988

return false;

15989

unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();

15990

unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();

15991

return NumBits1 == 64 && NumBits2 == 32;

15992

}

15993

15994

bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {

15995

if (!VT1.isInteger() || !VT2.isInteger())

15996

return false;

15997

unsigned NumBits1 = VT1.getSizeInBits();

15998

unsigned NumBits2 = VT2.getSizeInBits();

15999

return NumBits1 == 64 && NumBits2 == 32;

16000

}

16001

16002

bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {

16003

// Generally speaking, zexts are not free, but they are free when they can be

16004

// folded with other operations.

16005

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {

16006

EVT MemVT = LD->getMemoryVT();

16007

if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||

16008

(Subtarget.isPPC64() && MemVT == MVT::i32)) &&

16009

(LD->getExtensionType() == ISD::NON_EXTLOAD ||

16010

LD->getExtensionType() == ISD::ZEXTLOAD))

16011

return true;

16012

}

16013

16014

// FIXME: Add other cases...

16015

// - 32-bit shifts with a zext to i64

16016

// - zext after ctlz, bswap, etc.

16017

// - zext after and by a constant mask

16018

16019

return TargetLowering::isZExtFree(Val, VT2);

16020

}

16021

16022

bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {

16023

assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__))

16024

"invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__));

16025

// Extending to float128 is not free.

16026

if (DestVT == MVT::f128)

16027

return false;

16028

return true;

16029

}

16030

16031

bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

16032

return isInt<16>(Imm) || isUInt<16>(Imm);

16033

}

16034

16035

bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {

16036

return isInt<16>(Imm) || isUInt<16>(Imm);

16037

}

16038

16039

bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,

16040

unsigned,

16041

unsigned,

16042

MachineMemOperand::Flags,

16043

bool *Fast) const {

16044

if (DisablePPCUnaligned)

16045

return false;

16046

16047

// PowerPC supports unaligned memory access for simple non-vector types.

16048

// Although accessing unaligned addresses is not as efficient as accessing

16049

// aligned addresses, it is generally more efficient than manual expansion,

16050

// and generally only traps for software emulation when crossing page

16051

// boundaries.

16052

16053

if (!VT.isSimple())

16054

return false;

16055

16056

if (VT.isFloatingPoint() && !VT.isVector() &&

16057

!Subtarget.allowsUnalignedFPAccess())

16058

return false;

16059

16060

if (VT.getSimpleVT().isVector()) {

16061

if (Subtarget.hasVSX()) {

16062

if (VT != MVT::v2f64 && VT != MVT::v2i64 &&

16063

VT != MVT::v4f32 && VT != MVT::v4i32)

16064

return false;

16065

} else {

16066

return false;

16067

}

16068

}

16069

16070

if (VT == MVT::ppcf128)

16071

return false;

16072

16073

if (Fast)

16074

*Fast = true;

16075

16076

return true;

16077

}

16078

16079

bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,

16080

SDValue C) const {

16081

// Check integral scalar types.

16082

if (!VT.isScalarInteger())

16083

return false;

16084

if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {

16085

if (!ConstNode->getAPIntValue().isSignedIntN(64))

16086

return false;

16087

// This transformation will generate >= 2 operations. But the following

16088

// cases will generate <= 2 instructions during ISEL. So exclude them.

16089

// 1. If the constant multiplier fits 16 bits, it can be handled by one

16090

// HW instruction, ie. MULLI

16091

// 2. If the multiplier after shifted fits 16 bits, an extra shift

16092

// instruction is needed than case 1, ie. MULLI and RLDICR

16093

int64_t Imm = ConstNode->getSExtValue();

16094

unsigned Shift = countTrailingZeros<uint64_t>(Imm);

16095

Imm >>= Shift;

16096

if (isInt<16>(Imm))

16097

return false;

16098

uint64_t UImm = static_cast<uint64_t>(Imm);

16099

if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||

16100

isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))

16101

return true;

16102

}

16103

return false;

16104

}

16105

16106

bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,

16107

EVT VT) const {

16108

return isFMAFasterThanFMulAndFAdd(

16109

MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));

16110

}

16111

16112

bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,

16113

Type *Ty) const {

16114

switch (Ty->getScalarType()->getTypeID()) {

16115

case Type::FloatTyID:

16116

case Type::DoubleTyID:

16117

return true;

16118

case Type::FP128TyID:

16119

return Subtarget.hasP9Vector();

16120

default:

16121

return false;

16122

}

16123

}

16124

16125

// FIXME: add more patterns which are not profitable to hoist.

16126

bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {

16127

if (!I->hasOneUse())

16128

return true;

16129

16130

Instruction *User = I->user_back();

16131

assert(User && "A single use instruction with no uses.")((User && "A single use instruction with no uses.") ?
static_cast<void> (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16131, __PRETTY_FUNCTION__));

16132

16133

switch (I->getOpcode()) {

16134

case Instruction::FMul: {

16135

// Don't break FMA, PowerPC prefers FMA.

16136

if (User->getOpcode() != Instruction::FSub &&

16137

User->getOpcode() != Instruction::FAdd)

16138

return true;

16139

16140

const TargetOptions &Options = getTargetMachine().Options;

16141

const Function *F = I->getFunction();

16142

const DataLayout &DL = F->getParent()->getDataLayout();

16143

Type *Ty = User->getOperand(0)->getType();

16144

16145

return !(

16146

isFMAFasterThanFMulAndFAdd(*F, Ty) &&

16147

isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&

16148

(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));

16149

}

16150

case Instruction::Load: {

16151

// Don't break "store (load float*)" pattern, this pattern will be combined

16152

// to "store (load int32)" in later InstCombine pass. See function

16153

// combineLoadToOperationType. On PowerPC, loading a float point takes more

16154

// cycles than loading a 32 bit integer.

16155

LoadInst *LI = cast<LoadInst>(I);

16156

// For the loads that combineLoadToOperationType does nothing, like

16157

// ordered load, it should be profitable to hoist them.

16158

// For swifterror load, it can only be used for pointer to pointer type, so

16159

// later type check should get rid of this case.

16160

if (!LI->isUnordered())

16161

return true;

16162

16163

if (User->getOpcode() != Instruction::Store)

16164

return true;

16165

16166

if (I->getType()->getTypeID() != Type::FloatTyID)

16167

return true;

16168

16169

return false;

16170

}

16171

default:

16172

return true;

16173

}

16174

return true;

16175

}

16176

16177

const MCPhysReg *

16178

PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {

16179

// LR is a callee-save register, but we must treat it as clobbered by any call

16180

// site. Hence we include LR in the scratch registers, which are in turn added

16181

// as implicit-defs for stackmaps and patchpoints. The same reasoning applies

16182

// to CTR, which is used by any indirect call.

16183

static const MCPhysReg ScratchRegs[] = {

16184

PPC::X12, PPC::LR8, PPC::CTR8, 0

16185

};

16186

16187

return ScratchRegs;

16188

}

16189

16190

Register PPCTargetLowering::getExceptionPointerRegister(

16191

const Constant *PersonalityFn) const {

16192

return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;

16193

}

16194

16195

Register PPCTargetLowering::getExceptionSelectorRegister(

16196

const Constant *PersonalityFn) const {

16197

return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;

16198

}

16199

16200

bool

16201

PPCTargetLowering::shouldExpandBuildVectorWithShuffles(

16202

EVT VT , unsigned DefinedValues) const {

16203

if (VT == MVT::v2i64)

16204

return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves

16205

16206

if (Subtarget.hasVSX())

16207

return true;

16208

16209

return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);

16210

}

16211

16212

Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {

16213

if (DisableILPPref || Subtarget.enableMachineScheduler())

16214

return TargetLowering::getSchedulingPreference(N);

16215

16216

return Sched::ILP;

16217

}

16218

16219

// Create a fast isel object.

16220

FastISel *

16221

PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,

16222

const TargetLibraryInfo *LibInfo) const {

16223

return PPC::createFastISel(FuncInfo, LibInfo);

16224

}

16225

16226

// 'Inverted' means the FMA opcode after negating one multiplicand.

16227

// For example, (fma -a b c) = (fnmsub a b c)

16228

static unsigned invertFMAOpcode(unsigned Opc) {

16229

switch (Opc) {

16230

default:

16231

llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16231);

16232

case ISD::FMA:

16233

return PPCISD::FNMSUB;

16234

case PPCISD::FNMSUB:

16235

return ISD::FMA;

16236

}

16237

}

16238

16239

SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,

16240

bool LegalOps, bool OptForSize,

16241

NegatibleCost &Cost,

16242

unsigned Depth) const {

16243

if (Depth > SelectionDAG::MaxRecursionDepth)

16244

return SDValue();

16245

16246

unsigned Opc = Op.getOpcode();

16247

EVT VT = Op.getValueType();

16248

SDNodeFlags Flags = Op.getNode()->getFlags();

16249

16250

switch (Opc) {

16251

case PPCISD::FNMSUB:

16252

if (!Op.hasOneUse() || !isTypeLegal(VT))

16253

break;

16254

16255

const TargetOptions &Options = getTargetMachine().Options;

16256

SDValue N0 = Op.getOperand(0);

16257

SDValue N1 = Op.getOperand(1);

16258

SDValue N2 = Op.getOperand(2);

16259

SDLoc Loc(Op);

16260

16261

NegatibleCost N2Cost = NegatibleCost::Expensive;

16262

SDValue NegN2 =

16263

getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);

16264

16265

if (!NegN2)

16266

return SDValue();

16267

16268

// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))

16269

// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))

16270

// These transformations may change sign of zeroes. For example,

16271

// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.

16272

if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {

16273

// Try and choose the cheaper one to negate.

16274

NegatibleCost N0Cost = NegatibleCost::Expensive;

16275

SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,

16276

N0Cost, Depth + 1);

16277

16278

NegatibleCost N1Cost = NegatibleCost::Expensive;

16279

SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,

16280

N1Cost, Depth + 1);

16281

16282

if (NegN0 && N0Cost <= N1Cost) {

16283

Cost = std::min(N0Cost, N2Cost);

16284

return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);

16285

} else if (NegN1) {

16286

Cost = std::min(N1Cost, N2Cost);

16287

return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);

16288

}

16289

}

16290

16291

// (fneg (fnmsub a b c)) => (fma a b (fneg c))

16292

if (isOperationLegal(ISD::FMA, VT)) {

16293

Cost = N2Cost;

16294

return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);

16295

}

16296

16297

break;

16298

}

16299

16300

return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,

16301

Cost, Depth);

16302

}

16303

16304

// Override to enable LOAD_STACK_GUARD lowering on Linux.

16305

bool PPCTargetLowering::useLoadStackGuardNode() const {

16306

if (!Subtarget.isTargetLinux())

16307

return TargetLowering::useLoadStackGuardNode();

16308

return true;

16309

}

16310

16311

// Override to disable global variable loading on Linux.

16312

void PPCTargetLowering::insertSSPDeclarations(Module &M) const {

16313

if (!Subtarget.isTargetLinux())

16314

return TargetLowering::insertSSPDeclarations(M);

16315

}

16316

16317

bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,

16318

bool ForCodeSize) const {

16319

if (!VT.isSimple() || !Subtarget.hasVSX())

16320

return false;

16321

16322

switch(VT.getSimpleVT().SimpleTy) {

16323

default:

16324

// For FP types that are currently not supported by PPC backend, return

16325

// false. Examples: f16, f80.

16326

return false;

16327

case MVT::f32:

16328

case MVT::f64:

16329

if (Subtarget.hasPrefixInstrs()) {

16330

// With prefixed instructions, we can materialize anything that can be

16331

// represented with a 32-bit immediate, not just positive zero.

16332

APFloat APFloatOfImm = Imm;

16333

return convertToNonDenormSingle(APFloatOfImm);

16334

}

16335

LLVM_FALLTHROUGH[[gnu::fallthrough]];

16336

case MVT::ppcf128:

16337

return Imm.isPosZero();

16338

}

16339

}

16340

16341

// For vector shift operation op, fold

16342

// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)

16343

static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,

16344

SelectionDAG &DAG) {

16345

SDValue N0 = N->getOperand(0);

16346

SDValue N1 = N->getOperand(1);

16347

EVT VT = N0.getValueType();

16348

unsigned OpSizeInBits = VT.getScalarSizeInBits();

16349

unsigned Opcode = N->getOpcode();

16350

unsigned TargetOpcode;

16351

16352

switch (Opcode) {

16353

default:

16354

llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16354);

16355

case ISD::SHL:

16356

TargetOpcode = PPCISD::SHL;

16357

break;

16358

case ISD::SRL:

16359

TargetOpcode = PPCISD::SRL;

16360

break;

16361

case ISD::SRA:

16362

TargetOpcode = PPCISD::SRA;

16363

break;

16364

}

16365

16366

if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&

16367

N1->getOpcode() == ISD::AND)

16368

if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))

16369

if (Mask->getZExtValue() == OpSizeInBits - 1)

16370

return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));

16371

16372

return SDValue();

16373

}

16374

16375

SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {

16376

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16377

return Value;

16378

16379

SDValue N0 = N->getOperand(0);

16380

ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));

16381

if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||

16382

N0.getOpcode() != ISD::SIGN_EXTEND ||

16383

N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||

16384

N->getValueType(0) != MVT::i64)

16385

return SDValue();

16386

16387

// We can't save an operation here if the value is already extended, and

16388

// the existing shift is easier to combine.

16389

SDValue ExtsSrc = N0.getOperand(0);

16390

if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&

16391

ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)

16392

return SDValue();

16393

16394

SDLoc DL(N0);

16395

SDValue ShiftBy = SDValue(CN1, 0);

16396

// We want the shift amount to be i32 on the extswli, but the shift could

16397

// have an i64.

16398

if (ShiftBy.getValueType() == MVT::i64)

16399

ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);

16400

16401

return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),

16402

ShiftBy);

16403

}

16404

16405

SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {

16406

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16407

return Value;

16408

16409

return SDValue();

16410

}

16411

16412

SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {

16413

if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))

16414

return Value;

16415

16416

return SDValue();

16417

}

16418

16419

// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))

16420

// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))

16421

// When C is zero, the equation (addi Z, -C) can be simplified to Z

16422

// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types

16423

static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,

16424

const PPCSubtarget &Subtarget) {

16425

if (!Subtarget.isPPC64())

16426

return SDValue();

16427

16428

SDValue LHS = N->getOperand(0);

16429

SDValue RHS = N->getOperand(1);

16430

16431

auto isZextOfCompareWithConstant = [](SDValue Op) {

16432

if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||

16433

Op.getValueType() != MVT::i64)

16434

return false;

16435

16436

SDValue Cmp = Op.getOperand(0);

16437

if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||

16438

Cmp.getOperand(0).getValueType() != MVT::i64)

16439

return false;

16440

16441

if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {

16442

int64_t NegConstant = 0 - Constant->getSExtValue();

16443

// Due to the limitations of the addi instruction,

16444

// -C is required to be [-32768, 32767].

16445

return isInt<16>(NegConstant);

16446

}

16447

16448

return false;

16449

};

16450

16451

bool LHSHasPattern = isZextOfCompareWithConstant(LHS);

16452

bool RHSHasPattern = isZextOfCompareWithConstant(RHS);

16453

16454

// If there is a pattern, canonicalize a zext operand to the RHS.

16455

if (LHSHasPattern && !RHSHasPattern)

16456

std::swap(LHS, RHS);

16457

else if (!LHSHasPattern && !RHSHasPattern)

16458

return SDValue();

16459

16460

SDLoc DL(N);

16461

SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);

16462

SDValue Cmp = RHS.getOperand(0);

16463

SDValue Z = Cmp.getOperand(0);

16464

auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));

16465

16466

assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16466, __PRETTY_FUNCTION__));

16467

int64_t NegConstant = 0 - Constant->getSExtValue();

16468

16469

switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {

16470

default: break;

16471

case ISD::SETNE: {

16472

// when C == 0

16473

// --> addze X, (addic Z, -1).carry

16474

// /

16475

// add X, (zext(setne Z, C))--

16476

// \ when -32768 <= -C <= 32767 && C != 0

16477

// --> addze X, (addic (addi Z, -C), -1).carry

16478

SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,

16479

DAG.getConstant(NegConstant, DL, MVT::i64));

16480

SDValue AddOrZ = NegConstant != 0 ? Add : Z;

16481

SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),

16482

AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));

16483

return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),

16484

SDValue(Addc.getNode(), 1));

16485

}

16486

case ISD::SETEQ: {

16487

// when C == 0

16488

// --> addze X, (subfic Z, 0).carry

16489

// /

16490

// add X, (zext(sete Z, C))--

16491

// \ when -32768 <= -C <= 32767 && C != 0

16492

// --> addze X, (subfic (addi Z, -C), 0).carry

16493

SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,

16494

DAG.getConstant(NegConstant, DL, MVT::i64));

16495

SDValue AddOrZ = NegConstant != 0 ? Add : Z;

16496

SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),

16497

DAG.getConstant(0, DL, MVT::i64), AddOrZ);

16498

return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),

16499

SDValue(Subc.getNode(), 1));

16500

}

16501

}

16502

16503

return SDValue();

16504

}

16505

16506

// Transform

16507

// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to

16508

// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))

16509

// In this case both C1 and C2 must be known constants.

16510

// C1+C2 must fit into a 34 bit signed integer.

16511

static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,

16512

const PPCSubtarget &Subtarget) {

16513

if (!Subtarget.isUsingPCRelativeCalls())

16514

return SDValue();

16515

16516

// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.

16517

// If we find that node try to cast the Global Address and the Constant.

16518

SDValue LHS = N->getOperand(0);

16519

SDValue RHS = N->getOperand(1);

16520

16521

if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)

16522

std::swap(LHS, RHS);

16523

16524

if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)

16525

return SDValue();

16526

16527

// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.

16528

GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));

16529

ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);

16530

16531

// Check that both casts succeeded.

16532

if (!GSDN || !ConstNode)

16533

return SDValue();

16534

16535

int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();

16536

SDLoc DL(GSDN);

16537

16538

// The signed int offset needs to fit in 34 bits.

16539

if (!isInt<34>(NewOffset))

16540

return SDValue();

16541

16542

// The new global address is a copy of the old global address except

16543

// that it has the updated Offset.

16544

SDValue GA =

16545

DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),

16546

NewOffset, GSDN->getTargetFlags());

16547

SDValue MatPCRel =

16548

DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);

16549

return MatPCRel;

16550

}

16551

16552

SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {

16553

if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))

16554

return Value;

16555

16556

if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))

16557

return Value;

16558

16559

return SDValue();

16560

}

16561

16562

// Detect TRUNCATE operations on bitcasts of float128 values.

16563

// What we are looking for here is the situtation where we extract a subset

16564

// of bits from a 128 bit float.

16565

// This can be of two forms:

16566

// 1) BITCAST of f128 feeding TRUNCATE

16567

// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE

16568

// The reason this is required is because we do not have a legal i128 type

16569

// and so we want to prevent having to store the f128 and then reload part

16570

// of it.

16571

SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,

16572

DAGCombinerInfo &DCI) const {

16573

// If we are using CRBits then try that first.

16574

if (Subtarget.useCRBits()) {

16575

// Check if CRBits did anything and return that if it did.

16576

if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))

16577

return CRTruncValue;

16578

}

16579

16580

SDLoc dl(N);

16581

SDValue Op0 = N->getOperand(0);

16582

16583

// fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)

16584

if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {

16585

EVT VT = N->getValueType(0);

16586

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16587

return SDValue();

16588

SDValue Sub = Op0.getOperand(0);

16589

if (Sub.getOpcode() == ISD::SUB) {

16590

SDValue SubOp0 = Sub.getOperand(0);

16591

SDValue SubOp1 = Sub.getOperand(1);

16592

if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&

16593

(SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {

16594

return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),

16595

SubOp1.getOperand(0),

16596

DCI.DAG.getTargetConstant(0, dl, MVT::i32));

16597

}

16598

}

16599

}

16600

16601

// Looking for a truncate of i128 to i64.

16602

if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)

16603

return SDValue();

16604

16605

int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;

16606

16607

// SRL feeding TRUNCATE.

16608

if (Op0.getOpcode() == ISD::SRL) {

16609

ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));

16610

// The right shift has to be by 64 bits.

16611

if (!ConstNode || ConstNode->getZExtValue() != 64)

16612

return SDValue();

16613

16614

// Switch the element number to extract.

16615

EltToExtract = EltToExtract ? 0 : 1;

16616

// Update Op0 past the SRL.

16617

Op0 = Op0.getOperand(0);

16618

}

16619

16620

// BITCAST feeding a TRUNCATE possibly via SRL.

16621

if (Op0.getOpcode() == ISD::BITCAST &&

16622

Op0.getValueType() == MVT::i128 &&

16623

Op0.getOperand(0).getValueType() == MVT::f128) {

16624

SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));

16625

return DCI.DAG.getNode(

16626

ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,

16627

DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));

16628

}

16629

return SDValue();

16630

}

16631

16632

SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {

16633

SelectionDAG &DAG = DCI.DAG;

16634

16635

ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));

16636

if (!ConstOpOrElement)

16637

return SDValue();

16638

16639

// An imul is usually smaller than the alternative sequence for legal type.

16640

if (DAG.getMachineFunction().getFunction().hasMinSize() &&

16641

isOperationLegal(ISD::MUL, N->getValueType(0)))

16642

return SDValue();

16643

16644

auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {

16645

switch (this->Subtarget.getCPUDirective()) {

16646

default:

16647

// TODO: enhance the condition for subtarget before pwr8

16648

return false;

16649

case PPC::DIR_PWR8:

16650

// type mul add shl

16651

// scalar 4 1 1

16652

// vector 7 2 2

16653

return true;

16654

case PPC::DIR_PWR9:

16655

case PPC::DIR_PWR10:

16656

case PPC::DIR_PWR_FUTURE:

16657

// type mul add shl

16658

// scalar 5 2 2

16659

// vector 7 2 2

16660

16661

// The cycle RATIO of related operations are showed as a table above.

16662

// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both

16663

// scalar and vector type. For 2 instrs patterns, add/sub + shl

16664

// are 4, it is always profitable; but for 3 instrs patterns

16665

// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.

16666

// So we should only do it for vector type.

16667

return IsAddOne && IsNeg ? VT.isVector() : true;

16668

}

16669

};

16670

16671

EVT VT = N->getValueType(0);

16672

SDLoc DL(N);

16673

16674

const APInt &MulAmt = ConstOpOrElement->getAPIntValue();

16675

bool IsNeg = MulAmt.isNegative();

16676

APInt MulAmtAbs = MulAmt.abs();

16677

16678

if ((MulAmtAbs - 1).isPowerOf2()) {

16679

// (mul x, 2^N + 1) => (add (shl x, N), x)

16680

// (mul x, -(2^N + 1)) => -(add (shl x, N), x)

16681

16682

if (!IsProfitable(IsNeg, true, VT))

16683

return SDValue();

16684

16685

SDValue Op0 = N->getOperand(0);

16686

SDValue Op1 =

16687

DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),

16688

DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));

16689

SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);

16690

16691

if (!IsNeg)

16692

return Res;

16693

16694

return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);

16695

} else if ((MulAmtAbs + 1).isPowerOf2()) {

16696

// (mul x, 2^N - 1) => (sub (shl x, N), x)

16697

// (mul x, -(2^N - 1)) => (sub x, (shl x, N))

16698

16699

if (!IsProfitable(IsNeg, false, VT))

16700

return SDValue();

16701

16702

SDValue Op0 = N->getOperand(0);

16703

SDValue Op1 =

16704

DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),

16705

DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));

16706

16707

if (!IsNeg)

16708

return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);

16709

else

16710

return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);

16711

16712

} else {

16713

return SDValue();

16714

}

16715

}

16716

16717

// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this

16718

// in combiner since we need to check SD flags and other subtarget features.

16719

SDValue PPCTargetLowering::combineFMALike(SDNode *N,

16720

DAGCombinerInfo &DCI) const {

16721

SDValue N0 = N->getOperand(0);

16722

SDValue N1 = N->getOperand(1);

16723

SDValue N2 = N->getOperand(2);

16724

SDNodeFlags Flags = N->getFlags();

16725

EVT VT = N->getValueType(0);

16726

SelectionDAG &DAG = DCI.DAG;

16727

const TargetOptions &Options = getTargetMachine().Options;

16728

unsigned Opc = N->getOpcode();

16729

bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();

16730

bool LegalOps = !DCI.isBeforeLegalizeOps();

16731

SDLoc Loc(N);

16732

16733

if (!isOperationLegal(ISD::FMA, VT))

16734

return SDValue();

16735

16736

// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0

16737

// since (fnmsub a b c)=-0 while c-ab=+0.

16738

if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)

16739

return SDValue();

16740

16741

// (fma (fneg a) b c) => (fnmsub a b c)

16742

// (fnmsub (fneg a) b c) => (fma a b c)

16743

if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))

16744

return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);

16745

16746

// (fma a (fneg b) c) => (fnmsub a b c)

16747

// (fnmsub a (fneg b) c) => (fma a b c)

16748

if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))

16749

return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);

16750

16751

return SDValue();

16752

}

16753

16754

bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {

16755

// Only duplicate to increase tail-calls for the 64bit SysV ABIs.

16756

if (!Subtarget.is64BitELFABI())

16757

return false;

16758

16759

// If not a tail call then no need to proceed.

16760

if (!CI->isTailCall())

16761

return false;

16762

16763

// If sibling calls have been disabled and tail-calls aren't guaranteed

16764

// there is no reason to duplicate.

16765

auto &TM = getTargetMachine();

16766

if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)

16767

return false;

16768

16769

// Can't tail call a function called indirectly, or if it has variadic args.

16770

const Function *Callee = CI->getCalledFunction();

16771

if (!Callee || Callee->isVarArg())

16772

return false;

16773

16774

// Make sure the callee and caller calling conventions are eligible for tco.

16775

const Function *Caller = CI->getParent()->getParent();

16776

if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),

16777

CI->getCallingConv()))

16778

return false;

16779

16780

// If the function is local then we have a good chance at tail-calling it

16781

return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);

16782

}

16783

16784

bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {

16785

if (!Subtarget.hasVSX())

16786

return false;

16787

if (Subtarget.hasP9Vector() && VT == MVT::f128)

16788

return true;

16789

return VT == MVT::f32 || VT == MVT::f64 ||

16790

VT == MVT::v4f32 || VT == MVT::v2f64;

16791

}

16792

16793

bool PPCTargetLowering::

16794

isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {

16795

const Value *Mask = AndI.getOperand(1);

16796

// If the mask is suitable for andi. or andis. we should sink the and.

16797

if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {

16798

// Can't handle constants wider than 64-bits.

16799

if (CI->getBitWidth() > 64)

16800

return false;

16801

int64_t ConstVal = CI->getZExtValue();

16802

return isUInt<16>(ConstVal) ||

16803

(isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));

16804

}

16805

16806

// For non-constant masks, we can always use the record-form and.

16807

return true;

16808

}

16809

16810

// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)

16811

// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)

16812

// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)

16813

// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)

16814

// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32

16815

SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {

16816

assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16816, __PRETTY_FUNCTION__));

16817

assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))

16818

"Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__));

16819

EVT VT = N->getValueType(0);

16820

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16821

return SDValue();

16822

16823

SelectionDAG &DAG = DCI.DAG;

16824

SDLoc dl(N);

16825

if (N->getOperand(0).getOpcode() == ISD::SUB) {

16826

// Even for signed integers, if it's known to be positive (as signed

16827

// integer) due to zero-extended inputs.

16828

unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();

16829

unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();

16830

if ((SubOpcd0 == ISD::ZERO_EXTEND ||

16831

SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&

16832

(SubOpcd1 == ISD::ZERO_EXTEND ||

16833

SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {

16834

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),

16835

N->getOperand(0)->getOperand(0),

16836

N->getOperand(0)->getOperand(1),

16837

DAG.getTargetConstant(0, dl, MVT::i32));

16838

}

16839

16840

// For type v4i32, it can be optimized with xvnegsp + vabsduw

16841

if (N->getOperand(0).getValueType() == MVT::v4i32 &&

16842

N->getOperand(0).hasOneUse()) {

16843

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),

16844

N->getOperand(0)->getOperand(0),

16845

N->getOperand(0)->getOperand(1),

16846

DAG.getTargetConstant(1, dl, MVT::i32));

16847

}

16848

}

16849

16850

return SDValue();

16851

}

16852

16853

// For type v4i32/v8ii16/v16i8, transform

16854

// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)

16855

// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)

16856

// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)

16857

// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)

16858

SDValue PPCTargetLowering::combineVSelect(SDNode *N,

16859

DAGCombinerInfo &DCI) const {

16860

assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16860, __PRETTY_FUNCTION__));

16861

assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))

16862

"Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__));

16863

16864

SelectionDAG &DAG = DCI.DAG;

16865

SDLoc dl(N);

16866

SDValue Cond = N->getOperand(0);

16867

SDValue TrueOpnd = N->getOperand(1);

16868

SDValue FalseOpnd = N->getOperand(2);

16869

EVT VT = N->getOperand(1).getValueType();

16870

16871

if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||

16872

FalseOpnd.getOpcode() != ISD::SUB)

16873

return SDValue();

16874

16875

// ABSD only available for type v4i32/v8i16/v16i8

16876

if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)

16877

return SDValue();

16878

16879

// At least to save one more dependent computation

16880

if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))

16881

return SDValue();

16882

16883

ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

16884

16885

// Can only handle unsigned comparison here

16886

switch (CC) {

16887

default:

16888

return SDValue();

16889

case ISD::SETUGT:

16890

case ISD::SETUGE:

16891

break;

16892

case ISD::SETULT:

16893

case ISD::SETULE:

16894

std::swap(TrueOpnd, FalseOpnd);

16895

break;

16896

}

16897

16898

SDValue CmpOpnd1 = Cond.getOperand(0);

16899

SDValue CmpOpnd2 = Cond.getOperand(1);

16900

16901

// SETCC CmpOpnd1 CmpOpnd2 cond

16902

// TrueOpnd = CmpOpnd1 - CmpOpnd2

16903

// FalseOpnd = CmpOpnd2 - CmpOpnd1

16904

if (TrueOpnd.getOperand(0) == CmpOpnd1 &&

16905

TrueOpnd.getOperand(1) == CmpOpnd2 &&

16906

FalseOpnd.getOperand(0) == CmpOpnd2 &&

16907

FalseOpnd.getOperand(1) == CmpOpnd1) {

16908

return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),

16909

CmpOpnd1, CmpOpnd2,

16910

DAG.getTargetConstant(0, dl, MVT::i32));

16911

}

16912

16913

return SDValue();

16914

}

File:	llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:	line 9265, column 36 Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Bug Summary

Annotated Source Code