/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp

Bug Summary

File:	lib/Target/ARM/ARMISelLowering.cpp
Location:	line 8580, column 24
Description:	The result of the '<<' expression is undefined

Annotated Source Code

//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file defines the interfaces that ARM uses to lower LLVM code into a

// selection DAG.

//===----------------------------------------------------------------------===//

#include "ARMISelLowering.h"

#include "ARMCallingConv.h"

#include "ARMConstantPoolValue.h"

#include "ARMMachineFunctionInfo.h"

#include "ARMPerfectShuffle.h"

#include "ARMSubtarget.h"

#include "ARMTargetMachine.h"

#include "ARMTargetObjectFile.h"

#include "MCTargetDesc/ARMAddressingModes.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/IntrinsicLowering.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineJumpTableInfo.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/SelectionDAG.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Type.h"

#include "llvm/MC/MCSectionMachO.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Target/TargetOptions.h"

#include <utility>

using namespace llvm;

#define DEBUG_TYPE"arm-isel" "arm-isel"

STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = { "arm-isel", "Number of tail calls"
, 0, 0 };

STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = { "arm-isel", "Number of GAs materialized with movw + movt"
, 0, 0 };

STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = { "arm-isel", "Number of loops generated for byval arguments"
, 0, 0 };

cl::opt<bool>

EnableARMLongCalls("arm-long-calls", cl::Hidden,

cl::desc("Generate calls via indirect call instructions"),

cl::init(false));

static cl::opt<bool>

ARMInterworking("arm-interworking", cl::Hidden,

cl::desc("Enable / disable ARM interworking (for debugging only)"),

cl::init(true));

namespace {

class ARMCCState : public CCState {

public:

ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,

SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,

ParmContext PC)

: CCState(CC, isVarArg, MF, locs, C) {

assert(((PC == Call) || (PC == Prologue)) &&((((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call"
"or prologue generation.") ? static_cast<void> (0) : __assert_fail
("((PC == Call) || (PC == Prologue)) && \"ARMCCState users must specify whether their context is call\" \"or prologue generation.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 79, __PRETTY_FUNCTION__))

"ARMCCState users must specify whether their context is call"((((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call"
"or prologue generation.") ? static_cast<void> (0) : __assert_fail
("((PC == Call) || (PC == Prologue)) && \"ARMCCState users must specify whether their context is call\" \"or prologue generation.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 79, __PRETTY_FUNCTION__))

"or prologue generation.")((((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call"
"or prologue generation.") ? static_cast<void> (0) : __assert_fail
("((PC == Call) || (PC == Prologue)) && \"ARMCCState users must specify whether their context is call\" \"or prologue generation.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 79, __PRETTY_FUNCTION__));

CallOrPrologue = PC;

}

};

}

// The APCS parameter registers.

static const MCPhysReg GPRArgRegs[] = {

ARM::R0, ARM::R1, ARM::R2, ARM::R3

};

void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,

MVT PromotedBitwiseVT) {

if (VT != PromotedLdStVT) {

setOperationAction(ISD::LOAD, VT, Promote);

AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);

setOperationAction(ISD::STORE, VT, Promote);

AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);

}

100

MVT ElemTy = VT.getVectorElementType();

101

if (ElemTy != MVT::i64 && ElemTy != MVT::f64)

102

setOperationAction(ISD::SETCC, VT, Custom);

103

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

104

setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

105

if (ElemTy == MVT::i32) {

106

setOperationAction(ISD::SINT_TO_FP, VT, Custom);

107

setOperationAction(ISD::UINT_TO_FP, VT, Custom);

108

setOperationAction(ISD::FP_TO_SINT, VT, Custom);

109

setOperationAction(ISD::FP_TO_UINT, VT, Custom);

110

} else {

111

setOperationAction(ISD::SINT_TO_FP, VT, Expand);

112

setOperationAction(ISD::UINT_TO_FP, VT, Expand);

113

setOperationAction(ISD::FP_TO_SINT, VT, Expand);

114

setOperationAction(ISD::FP_TO_UINT, VT, Expand);

115

}

116

setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

117

setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

118

setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);

119

setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

120

setOperationAction(ISD::SELECT, VT, Expand);

121

setOperationAction(ISD::SELECT_CC, VT, Expand);

122

setOperationAction(ISD::VSELECT, VT, Expand);

123

setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

124

if (VT.isInteger()) {

125

setOperationAction(ISD::SHL, VT, Custom);

126

setOperationAction(ISD::SRA, VT, Custom);

127

setOperationAction(ISD::SRL, VT, Custom);

128

}

129

130

// Promote all bit-wise operations.

131

if (VT.isInteger() && VT != PromotedBitwiseVT) {

132

setOperationAction(ISD::AND, VT, Promote);

133

AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);

134

setOperationAction(ISD::OR, VT, Promote);

135

AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);

136

setOperationAction(ISD::XOR, VT, Promote);

137

AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);

138

}

139

140

// Neon does not support vector divide/remainder operations.

141

setOperationAction(ISD::SDIV, VT, Expand);

142

setOperationAction(ISD::UDIV, VT, Expand);

143

setOperationAction(ISD::FDIV, VT, Expand);

144

setOperationAction(ISD::SREM, VT, Expand);

145

setOperationAction(ISD::UREM, VT, Expand);

146

setOperationAction(ISD::FREM, VT, Expand);

147

}

148

149

void ARMTargetLowering::addDRTypeForNEON(MVT VT) {

150

addRegisterClass(VT, &ARM::DPRRegClass);

151

addTypeForNEON(VT, MVT::f64, MVT::v2i32);

152

}

153

154

void ARMTargetLowering::addQRTypeForNEON(MVT VT) {

155

addRegisterClass(VT, &ARM::DPairRegClass);

156

addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);

157

}

158

159

ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)

160

: TargetLowering(TM) {

161

Subtarget = &TM.getSubtarget<ARMSubtarget>();

162

RegInfo = TM.getSubtargetImpl()->getRegisterInfo();

163

Itins = TM.getSubtargetImpl()->getInstrItineraryData();

164

165

setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

166

167

if (Subtarget->isTargetMachO()) {

168

// Uses VFP for Thumb libfuncs if available.

169

if (Subtarget->isThumb() && Subtarget->hasVFP2() &&

170

Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {

171

// Single-precision floating-point arithmetic.

172

setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");

173

setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");

174

setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");

175

setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");

176

177

// Double-precision floating-point arithmetic.

178

setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");

179

setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");

180

setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");

181

setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");

182

183

// Single-precision comparisons.

184

setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");

185

setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");

186

setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");

187

setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");

188

setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");

189

setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");

190

setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");

191

setLibcallName(RTLIB::O_F32, "__unordsf2vfp");

192

193

setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);

194

setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);

195

setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);

196

setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);

197

setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);

198

setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);

199

setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);

200

setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);

201

202

// Double-precision comparisons.

203

setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");

204

setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");

205

setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");

206

setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");

207

setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");

208

setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");

209

setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");

210

setLibcallName(RTLIB::O_F64, "__unorddf2vfp");

211

212

setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);

213

setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);

214

setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);

215

setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);

216

setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);

217

setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);

218

setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);

219

setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);

220

221

// Floating-point to integer conversions.

222

// i64 conversions are done via library routines even when generating VFP

223

// instructions, so use the same ones.

224

setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");

225

setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");

226

setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");

227

setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");

228

229

// Conversions between floating types.

230

setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");

231

setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");

232

233

// Integer to floating-point conversions.

234

// i64 conversions are done via library routines even when generating VFP

235

// instructions, so use the same ones.

236

// FIXME: There appears to be some naming inconsistency in ARM libgcc:

237

// e.g., __floatunsidf vs. __floatunssidfvfp.

238

setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");

239

setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");

240

setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");

241

setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");

242

}

243

}

244

245

// These libcalls are not available in 32-bit.

246

setLibcallName(RTLIB::SHL_I128, nullptr);

247

setLibcallName(RTLIB::SRL_I128, nullptr);

248

setLibcallName(RTLIB::SRA_I128, nullptr);

249

250

if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&

251

!Subtarget->isTargetWindows()) {

252

static const struct {

253

const RTLIB::Libcall Op;

254

const char * const Name;

255

const CallingConv::ID CC;

256

const ISD::CondCode Cond;

257

} LibraryCalls[] = {

258

// Double-precision floating-point arithmetic helper functions

259

// RTABI chapter 4.1.2, Table 2

260

{ RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

261

{ RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

262

{ RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

263

{ RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

264

265

// Double-precision floating-point comparison helper functions

266

// RTABI chapter 4.1.2, Table 3

267

{ RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },

268

{ RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },

269

{ RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },

270

{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },

271

{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },

272

{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },

273

{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },

274

{ RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },

275

276

// Single-precision floating-point arithmetic helper functions

277

// RTABI chapter 4.1.2, Table 4

278

{ RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

279

{ RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

280

{ RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

281

{ RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

282

283

// Single-precision floating-point comparison helper functions

284

// RTABI chapter 4.1.2, Table 5

285

{ RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },

286

{ RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },

287

{ RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },

288

{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },

289

{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },

290

{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },

291

{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },

292

{ RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },

293

294

// Floating-point to integer conversions.

295

// RTABI chapter 4.1.2, Table 6

296

{ RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

297

{ RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

298

{ RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

299

{ RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

300

{ RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

301

{ RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

302

{ RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

303

{ RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

304

305

// Conversions between floating types.

306

// RTABI chapter 4.1.2, Table 7

307

{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

308

{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

309

{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

310

311

// Integer to floating-point conversions.

312

// RTABI chapter 4.1.2, Table 8

313

{ RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

314

{ RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

315

{ RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

316

{ RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

317

{ RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

318

{ RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

319

{ RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

320

{ RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

321

322

// Long long helper functions

323

// RTABI chapter 4.2, Table 9

324

{ RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

325

{ RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

326

{ RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

327

{ RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

328

329

// Integer division functions

330

// RTABI chapter 4.3.1

331

{ RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

332

{ RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

333

{ RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

334

{ RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

335

{ RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

336

{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

337

{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

338

{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

339

340

// Memory operations

341

// RTABI chapter 4.3.4

342

{ RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

343

{ RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

344

{ RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

345

};

346

347

for (const auto &LC : LibraryCalls) {

348

setLibcallName(LC.Op, LC.Name);

349

setLibcallCallingConv(LC.Op, LC.CC);

350

if (LC.Cond != ISD::SETCC_INVALID)

351

setCmpLibcallCC(LC.Op, LC.Cond);

352

}

353

}

354

355

if (Subtarget->isTargetWindows()) {

356

static const struct {

357

const RTLIB::Libcall Op;

358

const char * const Name;

359

const CallingConv::ID CC;

360

} LibraryCalls[] = {

361

{ RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },

362

{ RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },

363

{ RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },

364

{ RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },

365

{ RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },

366

{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },

367

{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },

368

{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },

369

};

370

371

for (const auto &LC : LibraryCalls) {

372

setLibcallName(LC.Op, LC.Name);

373

setLibcallCallingConv(LC.Op, LC.CC);

374

}

375

}

376

377

// Use divmod compiler-rt calls for iOS 5.0 and later.

378

if (Subtarget->getTargetTriple().isiOS() &&

379

!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {

380

setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");

381

setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");

382

}

383

384

// The half <-> float conversion functions are always soft-float, but are

385

// needed for some targets which use a hard-float calling convention by

386

// default.

387

if (Subtarget->isAAPCS_ABI()) {

388

setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);

389

setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);

390

setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);

391

} else {

392

setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);

393

setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);

394

setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);

395

}

396

397

if (Subtarget->isThumb1Only())

398

addRegisterClass(MVT::i32, &ARM::tGPRRegClass);

399

else

400

addRegisterClass(MVT::i32, &ARM::GPRRegClass);

401

if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&

402

!Subtarget->isThumb1Only()) {

403

addRegisterClass(MVT::f32, &ARM::SPRRegClass);

404

addRegisterClass(MVT::f64, &ARM::DPRRegClass);

405

}

406

407

for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

408

VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {

409

for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

410

InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)

411

setTruncStoreAction((MVT::SimpleValueType)VT,

412

(MVT::SimpleValueType)InnerVT, Expand);

413

setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);

414

setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);

415

setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);

416

417

setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);

418

setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);

419

setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);

420

setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);

421

422

setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);

423

}

424

425

setOperationAction(ISD::ConstantFP, MVT::f32, Custom);

426

setOperationAction(ISD::ConstantFP, MVT::f64, Custom);

427

428

if (Subtarget->hasNEON()) {

429

addDRTypeForNEON(MVT::v2f32);

430

addDRTypeForNEON(MVT::v8i8);

431

addDRTypeForNEON(MVT::v4i16);

432

addDRTypeForNEON(MVT::v2i32);

433

addDRTypeForNEON(MVT::v1i64);

434

435

addQRTypeForNEON(MVT::v4f32);

436

addQRTypeForNEON(MVT::v2f64);

437

addQRTypeForNEON(MVT::v16i8);

438

addQRTypeForNEON(MVT::v8i16);

439

addQRTypeForNEON(MVT::v4i32);

440

addQRTypeForNEON(MVT::v2i64);

441

442

// v2f64 is legal so that QR subregs can be extracted as f64 elements, but

443

// neither Neon nor VFP support any arithmetic operations on it.

444

// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively

445

// supported for v4f32.

446

setOperationAction(ISD::FADD, MVT::v2f64, Expand);

447

setOperationAction(ISD::FSUB, MVT::v2f64, Expand);

448

setOperationAction(ISD::FMUL, MVT::v2f64, Expand);

449

// FIXME: Code duplication: FDIV and FREM are expanded always, see

450

// ARMTargetLowering::addTypeForNEON method for details.

451

setOperationAction(ISD::FDIV, MVT::v2f64, Expand);

452

setOperationAction(ISD::FREM, MVT::v2f64, Expand);

453

// FIXME: Create unittest.

454

// In another words, find a way when "copysign" appears in DAG with vector

455

// operands.

456

setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);

457

// FIXME: Code duplication: SETCC has custom operation action, see

458

// ARMTargetLowering::addTypeForNEON method for details.

459

setOperationAction(ISD::SETCC, MVT::v2f64, Expand);

460

// FIXME: Create unittest for FNEG and for FABS.

461

setOperationAction(ISD::FNEG, MVT::v2f64, Expand);

462

setOperationAction(ISD::FABS, MVT::v2f64, Expand);

463

setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);

464

setOperationAction(ISD::FSIN, MVT::v2f64, Expand);

465

setOperationAction(ISD::FCOS, MVT::v2f64, Expand);

466

setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);

467

setOperationAction(ISD::FPOW, MVT::v2f64, Expand);

468

setOperationAction(ISD::FLOG, MVT::v2f64, Expand);

469

setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);

470

setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);

471

setOperationAction(ISD::FEXP, MVT::v2f64, Expand);

472

setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);

473

// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.

474

setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);

475

setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);

476

setOperationAction(ISD::FRINT, MVT::v2f64, Expand);

477

setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);

478

setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);

479

setOperationAction(ISD::FMA, MVT::v2f64, Expand);

480

481

setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);

482

setOperationAction(ISD::FSIN, MVT::v4f32, Expand);

483

setOperationAction(ISD::FCOS, MVT::v4f32, Expand);

484

setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);

485

setOperationAction(ISD::FPOW, MVT::v4f32, Expand);

486

setOperationAction(ISD::FLOG, MVT::v4f32, Expand);

487

setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);

488

setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);

489

setOperationAction(ISD::FEXP, MVT::v4f32, Expand);

490

setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);

491

setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);

492

setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);

493

setOperationAction(ISD::FRINT, MVT::v4f32, Expand);

494

setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);

495

setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);

496

497

// Mark v2f32 intrinsics.

498

setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);

499

setOperationAction(ISD::FSIN, MVT::v2f32, Expand);

500

setOperationAction(ISD::FCOS, MVT::v2f32, Expand);

501

setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);

502

setOperationAction(ISD::FPOW, MVT::v2f32, Expand);

503

setOperationAction(ISD::FLOG, MVT::v2f32, Expand);

504

setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);

505

setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);

506

setOperationAction(ISD::FEXP, MVT::v2f32, Expand);

507

setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);

508

setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);

509

setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);

510

setOperationAction(ISD::FRINT, MVT::v2f32, Expand);

511

setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);

512

setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);

513

514

// Neon does not support some operations on v1i64 and v2i64 types.

515

setOperationAction(ISD::MUL, MVT::v1i64, Expand);

516

// Custom handling for some quad-vector types to detect VMULL.

517

setOperationAction(ISD::MUL, MVT::v8i16, Custom);

518

setOperationAction(ISD::MUL, MVT::v4i32, Custom);

519

setOperationAction(ISD::MUL, MVT::v2i64, Custom);

520

// Custom handling for some vector types to avoid expensive expansions

521

setOperationAction(ISD::SDIV, MVT::v4i16, Custom);

522

setOperationAction(ISD::SDIV, MVT::v8i8, Custom);

523

setOperationAction(ISD::UDIV, MVT::v4i16, Custom);

524

setOperationAction(ISD::UDIV, MVT::v8i8, Custom);

525

setOperationAction(ISD::SETCC, MVT::v1i64, Expand);

526

setOperationAction(ISD::SETCC, MVT::v2i64, Expand);

527

// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with

528

// a destination type that is wider than the source, and nor does

529

// it have a FP_TO_[SU]INT instruction with a narrower destination than

530

// source.

531

setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);

532

setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);

533

setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);

534

setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);

535

536

setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);

537

setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);

538

539

// NEON does not have single instruction CTPOP for vectors with element

540

// types wider than 8-bits. However, custom lowering can leverage the

541

// v8i8/v16i8 vcnt instruction.

542

setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);

543

setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);

544

setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);

545

setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);

546

547

// NEON only has FMA instructions as of VFP4.

548

if (!Subtarget->hasVFP4()) {

549

setOperationAction(ISD::FMA, MVT::v2f32, Expand);

550

setOperationAction(ISD::FMA, MVT::v4f32, Expand);

551

}

552

553

setTargetDAGCombine(ISD::INTRINSIC_VOID);

554

setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);

555

setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

556

setTargetDAGCombine(ISD::SHL);

557

setTargetDAGCombine(ISD::SRL);

558

setTargetDAGCombine(ISD::SRA);

559

setTargetDAGCombine(ISD::SIGN_EXTEND);

560

setTargetDAGCombine(ISD::ZERO_EXTEND);

561

setTargetDAGCombine(ISD::ANY_EXTEND);

562

setTargetDAGCombine(ISD::SELECT_CC);

563

setTargetDAGCombine(ISD::BUILD_VECTOR);

564

setTargetDAGCombine(ISD::VECTOR_SHUFFLE);

565

setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);

566

setTargetDAGCombine(ISD::STORE);

567

setTargetDAGCombine(ISD::FP_TO_SINT);

568

setTargetDAGCombine(ISD::FP_TO_UINT);

569

setTargetDAGCombine(ISD::FDIV);

570

setTargetDAGCombine(ISD::LOAD);

571

572

// It is legal to extload from v4i8 to v4i16 or v4i32.

573

MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,

574

MVT::v4i16, MVT::v2i16,

575

MVT::v2i32};

576

for (unsigned i = 0; i < 6; ++i) {

577

setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);

578

setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);

579

setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);

580

}

581

}

582

583

// ARM and Thumb2 support UMLAL/SMLAL.

584

if (!Subtarget->isThumb1Only())

585

setTargetDAGCombine(ISD::ADDC);

586

587

if (Subtarget->isFPOnlySP()) {

588

// When targetting a floating-point unit with only single-precision

589

// operations, f64 is legal for the few double-precision instructions which

590

// are present However, no double-precision operations other than moves,

591

// loads and stores are provided by the hardware.

592

setOperationAction(ISD::FADD, MVT::f64, Expand);

593

setOperationAction(ISD::FSUB, MVT::f64, Expand);

594

setOperationAction(ISD::FMUL, MVT::f64, Expand);

595

setOperationAction(ISD::FMA, MVT::f64, Expand);

596

setOperationAction(ISD::FDIV, MVT::f64, Expand);

597

setOperationAction(ISD::FREM, MVT::f64, Expand);

598

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

599

setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);

600

setOperationAction(ISD::FNEG, MVT::f64, Expand);

601

setOperationAction(ISD::FABS, MVT::f64, Expand);

602

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

603

setOperationAction(ISD::FSIN, MVT::f64, Expand);

604

setOperationAction(ISD::FCOS, MVT::f64, Expand);

605

setOperationAction(ISD::FPOWI, MVT::f64, Expand);

606

setOperationAction(ISD::FPOW, MVT::f64, Expand);

607

setOperationAction(ISD::FLOG, MVT::f64, Expand);

608

setOperationAction(ISD::FLOG2, MVT::f64, Expand);

609

setOperationAction(ISD::FLOG10, MVT::f64, Expand);

610

setOperationAction(ISD::FEXP, MVT::f64, Expand);

611

setOperationAction(ISD::FEXP2, MVT::f64, Expand);

612

setOperationAction(ISD::FCEIL, MVT::f64, Expand);

613

setOperationAction(ISD::FTRUNC, MVT::f64, Expand);

614

setOperationAction(ISD::FRINT, MVT::f64, Expand);

615

setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);

616

setOperationAction(ISD::FFLOOR, MVT::f64, Expand);

617

setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);

618

setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);

619

}

620

621

computeRegisterProperties();

622

623

// ARM does not have floating-point extending loads.

624

setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);

625

setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);

626

627

// ... or truncating stores

628

setTruncStoreAction(MVT::f64, MVT::f32, Expand);

629

setTruncStoreAction(MVT::f32, MVT::f16, Expand);

630

setTruncStoreAction(MVT::f64, MVT::f16, Expand);

631

632

// ARM does not have i1 sign extending load.

633

setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);

634

635

// ARM supports all 4 flavors of integer indexed load / store.

636

if (!Subtarget->isThumb1Only()) {

637

for (unsigned im = (unsigned)ISD::PRE_INC;

638

im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

639

setIndexedLoadAction(im, MVT::i1, Legal);

640

setIndexedLoadAction(im, MVT::i8, Legal);

641

setIndexedLoadAction(im, MVT::i16, Legal);

642

setIndexedLoadAction(im, MVT::i32, Legal);

643

setIndexedStoreAction(im, MVT::i1, Legal);

644

setIndexedStoreAction(im, MVT::i8, Legal);

645

setIndexedStoreAction(im, MVT::i16, Legal);

646

setIndexedStoreAction(im, MVT::i32, Legal);

647

}

648

}

649

650

setOperationAction(ISD::SADDO, MVT::i32, Custom);

651

setOperationAction(ISD::UADDO, MVT::i32, Custom);

652

setOperationAction(ISD::SSUBO, MVT::i32, Custom);

653

setOperationAction(ISD::USUBO, MVT::i32, Custom);

654

655

// i64 operation support.

656

setOperationAction(ISD::MUL, MVT::i64, Expand);

657

setOperationAction(ISD::MULHU, MVT::i32, Expand);

658

if (Subtarget->isThumb1Only()) {

659

setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

660

setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);

661

}

662

if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()

663

|| (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))

664

setOperationAction(ISD::MULHS, MVT::i32, Expand);

665

666

setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);

667

setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);

668

setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

669

setOperationAction(ISD::SRL, MVT::i64, Custom);

670

setOperationAction(ISD::SRA, MVT::i64, Custom);

671

672

if (!Subtarget->isThumb1Only()) {

673

// FIXME: We should do this for Thumb1 as well.

674

setOperationAction(ISD::ADDC, MVT::i32, Custom);

675

setOperationAction(ISD::ADDE, MVT::i32, Custom);

676

setOperationAction(ISD::SUBC, MVT::i32, Custom);

677

setOperationAction(ISD::SUBE, MVT::i32, Custom);

678

}

679

680

// ARM does not have ROTL.

681

setOperationAction(ISD::ROTL, MVT::i32, Expand);

682

setOperationAction(ISD::CTTZ, MVT::i32, Custom);

683

setOperationAction(ISD::CTPOP, MVT::i32, Expand);

684

if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())

685

setOperationAction(ISD::CTLZ, MVT::i32, Expand);

686

687

// These just redirect to CTTZ and CTLZ on ARM.

688

setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);

689

setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);

690

691

setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

692

693

// Only ARMv6 has BSWAP.

694

if (!Subtarget->hasV6Ops())

695

setOperationAction(ISD::BSWAP, MVT::i32, Expand);

696

697

if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&

698

!(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {

699

// These are expanded into libcalls if the cpu doesn't have HW divider.

700

setOperationAction(ISD::SDIV, MVT::i32, Expand);

701

setOperationAction(ISD::UDIV, MVT::i32, Expand);

702

}

703

704

// FIXME: Also set divmod for SREM on EABI

705

setOperationAction(ISD::SREM, MVT::i32, Expand);

706

setOperationAction(ISD::UREM, MVT::i32, Expand);

707

// Register based DivRem for AEABI (RTABI 4.2)

708

if (Subtarget->isTargetAEABI()) {

709

setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");

710

setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");

711

setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");

712

setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");

713

setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");

714

setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");

715

setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");

716

setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");

717

718

setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);

719

setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);

720

setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);

721

setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);

722

setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);

723

setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);

724

setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);

725

setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);

726

727

setOperationAction(ISD::SDIVREM, MVT::i32, Custom);

728

setOperationAction(ISD::UDIVREM, MVT::i32, Custom);

729

} else {

730

setOperationAction(ISD::SDIVREM, MVT::i32, Expand);

731

setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

732

}

733

734

setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

735

setOperationAction(ISD::ConstantPool, MVT::i32, Custom);

736

setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);

737

setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);

738

setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

739

740

setOperationAction(ISD::TRAP, MVT::Other, Legal);

741

742

// Use the default implementation.

743

setOperationAction(ISD::VASTART, MVT::Other, Custom);

744

setOperationAction(ISD::VAARG, MVT::Other, Expand);

745

setOperationAction(ISD::VACOPY, MVT::Other, Expand);

746

setOperationAction(ISD::VAEND, MVT::Other, Expand);

747

setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);

748

setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

749

750

if (!Subtarget->isTargetMachO()) {

751

// Non-MachO platforms may return values in these registers via the

752

// personality function.

753

setExceptionPointerRegister(ARM::R0);

754

setExceptionSelectorRegister(ARM::R1);

755

}

756

757

if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())

758

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);

759

else

760

setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);

761

762

// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use

763

// the default expansion. If we are targeting a single threaded system,

764

// then set them all for expand so we can lower them later into their

765

// non-atomic form.

766

if (TM.Options.ThreadModel == ThreadModel::Single)

767

setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);

768

else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {

769

// ATOMIC_FENCE needs custom lowering; the others should have been expanded

770

// to ldrex/strex loops already.

771

setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);

772

773

// On v8, we have particularly efficient implementations of atomic fences

774

// if they can be combined with nearby atomic loads and stores.

775

if (!Subtarget->hasV8Ops()) {

776

// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.

777

setInsertFencesForAtomic(true);

778

}

779

} else {

780

// If there's anything we can use as a barrier, go through custom lowering

781

// for ATOMIC_FENCE.

782

setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,

783

Subtarget->hasAnyDataBarrier() ? Custom : Expand);

784

785

// Set them all for expansion, which will force libcalls.

786

setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);

787

setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);

788

setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);

789

setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);

790

setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);

791

setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);

792

setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);

793

setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);

794

setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);

795

setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);

796

setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);

797

setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);

798

// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the

799

// Unordered/Monotonic case.

800

setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);

801

setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);

802

}

803

804

setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

805

806

// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.

807

if (!Subtarget->hasV6Ops()) {

808

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);

809

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);

810

}

811

setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

812

813

if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&

814

!Subtarget->isThumb1Only()) {

815

// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR

816

// iff target supports vfp2.

817

setOperationAction(ISD::BITCAST, MVT::i64, Custom);

818

setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);

819

}

820

821

// We want to custom lower some of our intrinsics.

822

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

823

if (Subtarget->isTargetDarwin()) {

824

setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);

825

setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

826

setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");

827

}

828

829

setOperationAction(ISD::SETCC, MVT::i32, Expand);

830

setOperationAction(ISD::SETCC, MVT::f32, Expand);

831

setOperationAction(ISD::SETCC, MVT::f64, Expand);

832

setOperationAction(ISD::SELECT, MVT::i32, Custom);

833

setOperationAction(ISD::SELECT, MVT::f32, Custom);

834

setOperationAction(ISD::SELECT, MVT::f64, Custom);

835

setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);

836

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

837

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

838

839

setOperationAction(ISD::BRCOND, MVT::Other, Expand);

840

setOperationAction(ISD::BR_CC, MVT::i32, Custom);

841

setOperationAction(ISD::BR_CC, MVT::f32, Custom);

842

setOperationAction(ISD::BR_CC, MVT::f64, Custom);

843

setOperationAction(ISD::BR_JT, MVT::Other, Custom);

844

845

// We don't support sin/cos/fmod/copysign/pow

846

setOperationAction(ISD::FSIN, MVT::f64, Expand);

847

setOperationAction(ISD::FSIN, MVT::f32, Expand);

848

setOperationAction(ISD::FCOS, MVT::f32, Expand);

849

setOperationAction(ISD::FCOS, MVT::f64, Expand);

850

setOperationAction(ISD::FSINCOS, MVT::f64, Expand);

851

setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

852

setOperationAction(ISD::FREM, MVT::f64, Expand);

853

setOperationAction(ISD::FREM, MVT::f32, Expand);

854

if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&

855

!Subtarget->isThumb1Only()) {

856

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);

857

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

858

}

859

setOperationAction(ISD::FPOW, MVT::f64, Expand);

860

setOperationAction(ISD::FPOW, MVT::f32, Expand);

861

862

if (!Subtarget->hasVFP4()) {

863

setOperationAction(ISD::FMA, MVT::f64, Expand);

864

setOperationAction(ISD::FMA, MVT::f32, Expand);

865

}

866

867

// Various VFP goodness

868

if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {

869

// int <-> fp are custom expanded into bit_convert + ARMISD ops.

870

if (Subtarget->hasVFP2()) {

871

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

872

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);

873

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

874

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

875

}

876

877

// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.

878

if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {

879

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);

880

setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);

881

}

882

883

// fp16 is a special v7 extension that adds f16 <-> f32 conversions.

884

if (!Subtarget->hasFP16()) {

885

setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);

886

setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

887

}

888

}

889

890

// Combine sin / cos into one node or libcall if possible.

891

if (Subtarget->hasSinCos()) {

892

setLibcallName(RTLIB::SINCOS_F32, "sincosf");

893

setLibcallName(RTLIB::SINCOS_F64, "sincos");

894

if (Subtarget->getTargetTriple().isiOS()) {

895

// For iOS, we don't want to the normal expansion of a libcall to

896

// sincos. We want to issue a libcall to __sincos_stret.

897

setOperationAction(ISD::FSINCOS, MVT::f64, Custom);

898

setOperationAction(ISD::FSINCOS, MVT::f32, Custom);

899

}

900

}

901

902

// FP-ARMv8 implements a lot of rounding-like FP operations.

903

if (Subtarget->hasFPARMv8()) {

904

setOperationAction(ISD::FFLOOR, MVT::f32, Legal);

905

setOperationAction(ISD::FCEIL, MVT::f32, Legal);

906

setOperationAction(ISD::FROUND, MVT::f32, Legal);

907

setOperationAction(ISD::FTRUNC, MVT::f32, Legal);

908

setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);

909

setOperationAction(ISD::FRINT, MVT::f32, Legal);

910

if (!Subtarget->isFPOnlySP()) {

911

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

912

setOperationAction(ISD::FCEIL, MVT::f64, Legal);

913

setOperationAction(ISD::FROUND, MVT::f64, Legal);

914

setOperationAction(ISD::FTRUNC, MVT::f64, Legal);

915

setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);

916

setOperationAction(ISD::FRINT, MVT::f64, Legal);

917

}

918

}

919

// We have target-specific dag combine patterns for the following nodes:

920

// ARMISD::VMOVRRD - No need to call setTargetDAGCombine

921

setTargetDAGCombine(ISD::ADD);

922

setTargetDAGCombine(ISD::SUB);

923

setTargetDAGCombine(ISD::MUL);

924

setTargetDAGCombine(ISD::AND);

925

setTargetDAGCombine(ISD::OR);

926

setTargetDAGCombine(ISD::XOR);

927

928

if (Subtarget->hasV6Ops())

929

setTargetDAGCombine(ISD::SRL);

930

931

setStackPointerRegisterToSaveRestore(ARM::SP);

932

933

if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||

934

!Subtarget->hasVFP2())

935

setSchedulingPreference(Sched::RegPressure);

936

else

937

setSchedulingPreference(Sched::Hybrid);

938

939

//// temporary - rewrite interface to use type

940

MaxStoresPerMemset = 8;

941

MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;

942

MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores

943

MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;

944

MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores

945

MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;

946

947

// On ARM arguments smaller than 4 bytes are extended, so all arguments

948

// are at least 4 bytes aligned.

949

setMinStackArgumentAlignment(4);

950

951

// Prefer likely predicted branches to selects on out-of-order cores.

952

PredictableSelectIsExpensive = Subtarget->isLikeA9();

953

954

setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);

955

}

956

957

// FIXME: It might make sense to define the representative register class as the

958

// nearest super-register that has a non-null superset. For example, DPR_VFP2 is

959

// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,

960

// SPR's representative would be DPR_VFP2. This should work well if register

961

// pressure tracking were modified such that a register use would increment the

962

// pressure of the register class's representative and all of it's super

963

// classes' representatives transitively. We have not implemented this because

964

// of the difficulty prior to coalescing of modeling operand register classes

965

// due to the common occurrence of cross class copies and subregister insertions

966

// and extractions.

967

std::pair<const TargetRegisterClass*, uint8_t>

968

ARMTargetLowering::findRepresentativeClass(MVT VT) const{

969

const TargetRegisterClass *RRC = nullptr;

970

uint8_t Cost = 1;

971

switch (VT.SimpleTy) {

972

default:

973

return TargetLowering::findRepresentativeClass(VT);

974

// Use DPR as representative register class for all floating point

975

// and vector types. Since there are 32 SPR registers and 32 DPR registers so

976

// the cost is 1 for both f32 and f64.

977

case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:

978

case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:

979

RRC = &ARM::DPRRegClass;

980

// When NEON is used for SP, only half of the register file is available

981

// because operations that define both SP and DP results will be constrained

982

// to the VFP2 class (D0-D15). We currently model this constraint prior to

983

// coalescing by double-counting the SP regs. See the FIXME above.

984

if (Subtarget->useNEONForSinglePrecisionFP())

985

Cost = 2;

986

break;

987

case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:

988

case MVT::v4f32: case MVT::v2f64:

989

RRC = &ARM::DPRRegClass;

990

Cost = 2;

991

break;

992

case MVT::v4i64:

993

RRC = &ARM::DPRRegClass;

994

Cost = 4;

995

break;

996

case MVT::v8i64:

997

RRC = &ARM::DPRRegClass;

998

Cost = 8;

999

break;

1000

}

1001

return std::make_pair(RRC, Cost);

1002

}

1003

1004

const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {

1005

switch (Opcode) {

1006

default: return nullptr;

1007

case ARMISD::Wrapper: return "ARMISD::Wrapper";

1008

case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";

1009

case ARMISD::WrapperJT: return "ARMISD::WrapperJT";

1010

case ARMISD::CALL: return "ARMISD::CALL";

1011

case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";

1012

case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";

1013

case ARMISD::tCALL: return "ARMISD::tCALL";

1014

case ARMISD::BRCOND: return "ARMISD::BRCOND";

1015

case ARMISD::BR_JT: return "ARMISD::BR_JT";

1016

case ARMISD::BR2_JT: return "ARMISD::BR2_JT";

1017

case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";

1018

case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";

1019

case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";

1020

case ARMISD::CMP: return "ARMISD::CMP";

1021

case ARMISD::CMN: return "ARMISD::CMN";

1022

case ARMISD::CMPZ: return "ARMISD::CMPZ";

1023

case ARMISD::CMPFP: return "ARMISD::CMPFP";

1024

case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";

1025

case ARMISD::BCC_i64: return "ARMISD::BCC_i64";

1026

case ARMISD::FMSTAT: return "ARMISD::FMSTAT";

1027

1028

case ARMISD::CMOV: return "ARMISD::CMOV";

1029

1030

case ARMISD::RBIT: return "ARMISD::RBIT";

1031

1032

case ARMISD::FTOSI: return "ARMISD::FTOSI";

1033

case ARMISD::FTOUI: return "ARMISD::FTOUI";

1034

case ARMISD::SITOF: return "ARMISD::SITOF";

1035

case ARMISD::UITOF: return "ARMISD::UITOF";

1036

1037

case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";

1038

case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";

1039

case ARMISD::RRX: return "ARMISD::RRX";

1040

1041

case ARMISD::ADDC: return "ARMISD::ADDC";

1042

case ARMISD::ADDE: return "ARMISD::ADDE";

1043

case ARMISD::SUBC: return "ARMISD::SUBC";

1044

case ARMISD::SUBE: return "ARMISD::SUBE";

1045

1046

case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";

1047

case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";

1048

1049

case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";

1050

case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";

1051

1052

case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";

1053

1054

case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";

1055

1056

case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";

1057

1058

case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";

1059

1060

case ARMISD::PRELOAD: return "ARMISD::PRELOAD";

1061

1062

case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";

1063

1064

case ARMISD::VCEQ: return "ARMISD::VCEQ";

1065

case ARMISD::VCEQZ: return "ARMISD::VCEQZ";

1066

case ARMISD::VCGE: return "ARMISD::VCGE";

1067

case ARMISD::VCGEZ: return "ARMISD::VCGEZ";

1068

case ARMISD::VCLEZ: return "ARMISD::VCLEZ";

1069

case ARMISD::VCGEU: return "ARMISD::VCGEU";

1070

case ARMISD::VCGT: return "ARMISD::VCGT";

1071

case ARMISD::VCGTZ: return "ARMISD::VCGTZ";

1072

case ARMISD::VCLTZ: return "ARMISD::VCLTZ";

1073

case ARMISD::VCGTU: return "ARMISD::VCGTU";

1074

case ARMISD::VTST: return "ARMISD::VTST";

1075

1076

case ARMISD::VSHL: return "ARMISD::VSHL";

1077

case ARMISD::VSHRs: return "ARMISD::VSHRs";

1078

case ARMISD::VSHRu: return "ARMISD::VSHRu";

1079

case ARMISD::VRSHRs: return "ARMISD::VRSHRs";

1080

case ARMISD::VRSHRu: return "ARMISD::VRSHRu";

1081

case ARMISD::VRSHRN: return "ARMISD::VRSHRN";

1082

case ARMISD::VQSHLs: return "ARMISD::VQSHLs";

1083

case ARMISD::VQSHLu: return "ARMISD::VQSHLu";

1084

case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";

1085

case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";

1086

case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";

1087

case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";

1088

case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";

1089

case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";

1090

case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";

1091

case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";

1092

case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";

1093

case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";

1094

case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";

1095

case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";

1096

case ARMISD::VDUP: return "ARMISD::VDUP";

1097

case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";

1098

case ARMISD::VEXT: return "ARMISD::VEXT";

1099

case ARMISD::VREV64: return "ARMISD::VREV64";

1100

case ARMISD::VREV32: return "ARMISD::VREV32";

1101

case ARMISD::VREV16: return "ARMISD::VREV16";

1102

case ARMISD::VZIP: return "ARMISD::VZIP";

1103

case ARMISD::VUZP: return "ARMISD::VUZP";

1104

case ARMISD::VTRN: return "ARMISD::VTRN";

1105

case ARMISD::VTBL1: return "ARMISD::VTBL1";

1106

case ARMISD::VTBL2: return "ARMISD::VTBL2";

1107

case ARMISD::VMULLs: return "ARMISD::VMULLs";

1108

case ARMISD::VMULLu: return "ARMISD::VMULLu";

1109

case ARMISD::UMLAL: return "ARMISD::UMLAL";

1110

case ARMISD::SMLAL: return "ARMISD::SMLAL";

1111

case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";

1112

case ARMISD::FMAX: return "ARMISD::FMAX";

1113

case ARMISD::FMIN: return "ARMISD::FMIN";

1114

case ARMISD::VMAXNM: return "ARMISD::VMAX";

1115

case ARMISD::VMINNM: return "ARMISD::VMIN";

1116

case ARMISD::BFI: return "ARMISD::BFI";

1117

case ARMISD::VORRIMM: return "ARMISD::VORRIMM";

1118

case ARMISD::VBICIMM: return "ARMISD::VBICIMM";

1119

case ARMISD::VBSL: return "ARMISD::VBSL";

1120

case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";

1121

case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";

1122

case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";

1123

case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";

1124

case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";

1125

case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";

1126

case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";

1127

case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";

1128

case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";

1129

case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";

1130

case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";

1131

case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";

1132

case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";

1133

case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";

1134

case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";

1135

case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";

1136

case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";

1137

case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";

1138

case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";

1139

case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";

1140

}

1141

}

1142

1143

EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {

1144

if (!VT.isVector()) return getPointerTy();

1145

return VT.changeVectorElementTypeToInteger();

1146

}

1147

1148

/// getRegClassFor - Return the register class that should be used for the

1149

/// specified value type.

1150

const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {

1151

// Map v4i64 to QQ registers but do not make the type legal. Similarly map

1152

// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to

1153

// load / store 4 to 8 consecutive D registers.

1154

if (Subtarget->hasNEON()) {

1155

if (VT == MVT::v4i64)

1156

return &ARM::QQPRRegClass;

1157

if (VT == MVT::v8i64)

1158

return &ARM::QQQQPRRegClass;

1159

}

1160

return TargetLowering::getRegClassFor(VT);

1161

}

1162

1163

// Create a fast isel object.

1164

FastISel *

1165

ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,

1166

const TargetLibraryInfo *libInfo) const {

1167

return ARM::createFastISel(funcInfo, libInfo);

1168

}

1169

1170

/// getMaximalGlobalOffset - Returns the maximal possible offset which can

1171

/// be used for loads / stores from the global.

1172

unsigned ARMTargetLowering::getMaximalGlobalOffset() const {

1173

return (Subtarget->isThumb1Only() ? 127 : 4095);

1174

}

1175

1176

Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {

1177

unsigned NumVals = N->getNumValues();

1178

if (!NumVals)

1179

return Sched::RegPressure;

1180

1181

for (unsigned i = 0; i != NumVals; ++i) {

1182

EVT VT = N->getValueType(i);

1183

if (VT == MVT::Glue || VT == MVT::Other)

1184

continue;

1185

if (VT.isFloatingPoint() || VT.isVector())

1186

return Sched::ILP;

1187

}

1188

1189

if (!N->isMachineOpcode())

1190

return Sched::RegPressure;

1191

1192

// Load are scheduled for latency even if there instruction itinerary

1193

// is not available.

1194

const TargetInstrInfo *TII =

1195

getTargetMachine().getSubtargetImpl()->getInstrInfo();

1196

const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());

1197

1198

if (MCID.getNumDefs() == 0)

1199

return Sched::RegPressure;

1200

if (!Itins->isEmpty() &&

1201

Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)

1202

return Sched::ILP;

1203

1204

return Sched::RegPressure;

1205

}

1206

1207

//===----------------------------------------------------------------------===//

1208

// Lowering Code

1209

//===----------------------------------------------------------------------===//

1210

1211

/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC

1212

static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {

1213

switch (CC) {

1214

default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1214);

1215

case ISD::SETNE: return ARMCC::NE;

1216

case ISD::SETEQ: return ARMCC::EQ;

1217

case ISD::SETGT: return ARMCC::GT;

1218

case ISD::SETGE: return ARMCC::GE;

1219

case ISD::SETLT: return ARMCC::LT;

1220

case ISD::SETLE: return ARMCC::LE;

1221

case ISD::SETUGT: return ARMCC::HI;

1222

case ISD::SETUGE: return ARMCC::HS;

1223

case ISD::SETULT: return ARMCC::LO;

1224

case ISD::SETULE: return ARMCC::LS;

1225

}

1226

}

1227

1228

/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.

1229

static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,

1230

ARMCC::CondCodes &CondCode2) {

1231

CondCode2 = ARMCC::AL;

1232

switch (CC) {

1233

default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1233);

1234

case ISD::SETEQ:

1235

case ISD::SETOEQ: CondCode = ARMCC::EQ; break;

1236

case ISD::SETGT:

1237

case ISD::SETOGT: CondCode = ARMCC::GT; break;

1238

case ISD::SETGE:

1239

case ISD::SETOGE: CondCode = ARMCC::GE; break;

1240

case ISD::SETOLT: CondCode = ARMCC::MI; break;

1241

case ISD::SETOLE: CondCode = ARMCC::LS; break;

1242

case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;

1243

case ISD::SETO: CondCode = ARMCC::VC; break;

1244

case ISD::SETUO: CondCode = ARMCC::VS; break;

1245

case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;

1246

case ISD::SETUGT: CondCode = ARMCC::HI; break;

1247

case ISD::SETUGE: CondCode = ARMCC::PL; break;

1248

case ISD::SETLT:

1249

case ISD::SETULT: CondCode = ARMCC::LT; break;

1250

case ISD::SETLE:

1251

case ISD::SETULE: CondCode = ARMCC::LE; break;

1252

case ISD::SETNE:

1253

case ISD::SETUNE: CondCode = ARMCC::NE; break;

1254

}

1255

}

1256

1257

//===----------------------------------------------------------------------===//

1258

// Calling Convention Implementation

1259

//===----------------------------------------------------------------------===//

1260

1261

#include "ARMGenCallingConv.inc"

1262

1263

/// getEffectiveCallingConv - Get the effective calling convention, taking into

1264

/// account presence of floating point hardware and calling convention

1265

/// limitations, such as support for variadic functions.

1266

CallingConv::ID

1267

ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,

1268

bool isVarArg) const {

1269

switch (CC) {

1270

default:

1271

llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1271);

1272

case CallingConv::ARM_AAPCS:

1273

case CallingConv::ARM_APCS:

1274

case CallingConv::GHC:

1275

return CC;

1276

case CallingConv::ARM_AAPCS_VFP:

1277

return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;

1278

case CallingConv::C:

1279

if (!Subtarget->isAAPCS_ABI())

1280

return CallingConv::ARM_APCS;

1281

else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&

1282

getTargetMachine().Options.FloatABIType == FloatABI::Hard &&

1283

!isVarArg)

1284

return CallingConv::ARM_AAPCS_VFP;

1285

else

1286

return CallingConv::ARM_AAPCS;

1287

case CallingConv::Fast:

1288

if (!Subtarget->isAAPCS_ABI()) {

1289

if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)

1290

return CallingConv::Fast;

1291

return CallingConv::ARM_APCS;

1292

} else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)

1293

return CallingConv::ARM_AAPCS_VFP;

1294

else

1295

return CallingConv::ARM_AAPCS;

1296

}

1297

}

1298

1299

/// CCAssignFnForNode - Selects the correct CCAssignFn for the given

1300

/// CallingConvention.

1301

CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,

1302

bool Return,

1303

bool isVarArg) const {

1304

switch (getEffectiveCallingConv(CC, isVarArg)) {

1305

default:

1306

1307

case CallingConv::ARM_APCS:

1308

return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);

1309

case CallingConv::ARM_AAPCS:

1310

return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);

1311

case CallingConv::ARM_AAPCS_VFP:

1312

return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);

1313

case CallingConv::Fast:

1314

return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);

1315

case CallingConv::GHC:

1316

return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);

1317

}

1318

}

1319

1320

/// LowerCallResult - Lower the result values of a call into the

1321

/// appropriate copies out of appropriate physical registers.

1322

SDValue

1323

ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,

1324

CallingConv::ID CallConv, bool isVarArg,

1325

const SmallVectorImpl<ISD::InputArg> &Ins,

1326

SDLoc dl, SelectionDAG &DAG,

1327

SmallVectorImpl<SDValue> &InVals,

1328

bool isThisReturn, SDValue ThisVal) const {

1329

1330

// Assign locations to each value returned by this call.

1331

SmallVector<CCValAssign, 16> RVLocs;

1332

ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

1333

*DAG.getContext(), Call);

1334

CCInfo.AnalyzeCallResult(Ins,

1335

CCAssignFnForNode(CallConv, /* Return*/ true,

1336

isVarArg));

1337

1338

// Copy all of the result registers out of their specified physreg.

1339

for (unsigned i = 0; i != RVLocs.size(); ++i) {

1340

CCValAssign VA = RVLocs[i];

1341

1342

// Pass 'this' value directly from the argument to return value, to avoid

1343

// reg unit interference

1344

if (i == 0 && isThisReturn) {

1345

assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1346, __PRETTY_FUNCTION__))

1346

"unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1346, __PRETTY_FUNCTION__));

1347

InVals.push_back(ThisVal);

1348

continue;

1349

}

1350

1351

SDValue Val;

1352

if (VA.needsCustom()) {

1353

// Handle f64 or half of a v2f64.

1354

SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

1355

InFlag);

1356

Chain = Lo.getValue(1);

1357

InFlag = Lo.getValue(2);

1358

VA = RVLocs[++i]; // skip ahead to next loc

1359

SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

1360

InFlag);

1361

Chain = Hi.getValue(1);

1362

InFlag = Hi.getValue(2);

1363

if (!Subtarget->isLittle())

1364

std::swap (Lo, Hi);

1365

Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

1366

1367

if (VA.getLocVT() == MVT::v2f64) {

1368

SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);

1369

Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,

1370

DAG.getConstant(0, MVT::i32));

1371

1372

VA = RVLocs[++i]; // skip ahead to next loc

1373

Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);

1374

Chain = Lo.getValue(1);

1375

InFlag = Lo.getValue(2);

1376

VA = RVLocs[++i]; // skip ahead to next loc

1377

Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);

1378

Chain = Hi.getValue(1);

1379

InFlag = Hi.getValue(2);

1380

if (!Subtarget->isLittle())

1381

std::swap (Lo, Hi);

1382

Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

1383

Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,

1384

DAG.getConstant(1, MVT::i32));

1385

}

1386

} else {

1387

Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),

1388

InFlag);

1389

Chain = Val.getValue(1);

1390

InFlag = Val.getValue(2);

1391

}

1392

1393

switch (VA.getLocInfo()) {

1394

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1394);

1395

case CCValAssign::Full: break;

1396

case CCValAssign::BCvt:

1397

Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);

1398

break;

1399

}

1400

1401

InVals.push_back(Val);

1402

}

1403

1404

return Chain;

1405

}

1406

1407

/// LowerMemOpCallTo - Store the argument to the stack.

1408

SDValue

1409

ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,

1410

SDValue StackPtr, SDValue Arg,

1411

SDLoc dl, SelectionDAG &DAG,

1412

const CCValAssign &VA,

1413

ISD::ArgFlagsTy Flags) const {

1414

unsigned LocMemOffset = VA.getLocMemOffset();

1415

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);

1416

PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);

1417

return DAG.getStore(Chain, dl, Arg, PtrOff,

1418

MachinePointerInfo::getStack(LocMemOffset),

1419

false, false, 0);

1420

}

1421

1422

void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,

1423

SDValue Chain, SDValue &Arg,

1424

RegsToPassVector &RegsToPass,

1425

CCValAssign &VA, CCValAssign &NextVA,

1426

SDValue &StackPtr,

1427

SmallVectorImpl<SDValue> &MemOpChains,

1428

ISD::ArgFlagsTy Flags) const {

1429

1430

SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,

1431

DAG.getVTList(MVT::i32, MVT::i32), Arg);

1432

unsigned id = Subtarget->isLittle() ? 0 : 1;

1433

RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));

1434

1435

if (NextVA.isRegLoc())

1436

RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));

1437

else {

1438

assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1438, __PRETTY_FUNCTION__));

1439

if (!StackPtr.getNode())

1440

StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());

1441

1442

MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),

1443

dl, DAG, NextVA,

1444

Flags));

1445

}

1446

}

1447

1448

/// LowerCall - Lowering a call into a callseq_start <-

1449

/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter

1450

/// nodes.

1451

SDValue

1452

ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

1453

SmallVectorImpl<SDValue> &InVals) const {

1454

SelectionDAG &DAG = CLI.DAG;

1455

SDLoc &dl = CLI.DL;

1456

SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

1457

SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

1458

SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

1459

SDValue Chain = CLI.Chain;

1460

SDValue Callee = CLI.Callee;

1461

bool &isTailCall = CLI.IsTailCall;

1462

CallingConv::ID CallConv = CLI.CallConv;

1463

bool doesNotRet = CLI.DoesNotReturn;

1464

bool isVarArg = CLI.IsVarArg;

1465

1466

MachineFunction &MF = DAG.getMachineFunction();

1467

bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();

1468

bool isThisReturn = false;

1469

bool isSibCall = false;

1470

1471

// Disable tail calls if they're not supported.

1472

if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)

1473

isTailCall = false;

1474

1475

if (isTailCall) {

1476

// Check if it's really possible to do a tail call.

1477

isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,

1478

isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),

1479

Outs, OutVals, Ins, DAG);

1480

if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())

1481

report_fatal_error("failed to perform tail call elimination on a call "

1482

"site marked musttail");

1483

// We don't support GuaranteedTailCallOpt for ARM, only automatically

1484

// detected sibcalls.

1485

if (isTailCall) {

1486

++NumTailCalls;

1487

isSibCall = true;

1488

}

1489

}

1490

1491

// Analyze operands of the call, assigning locations to each operand.

1492

SmallVector<CCValAssign, 16> ArgLocs;

1493

ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

1494

*DAG.getContext(), Call);

1495

CCInfo.AnalyzeCallOperands(Outs,

1496

CCAssignFnForNode(CallConv, /* Return*/ false,

1497

isVarArg));

1498

1499

// Get a count of how many bytes are to be pushed on the stack.

1500

unsigned NumBytes = CCInfo.getNextStackOffset();

1501

1502

// For tail calls, memory operands are available in our caller's stack.

1503

if (isSibCall)

1504

NumBytes = 0;

1505

1506

// Adjust the stack pointer for the new arguments...

1507

// These operations are automatically eliminated by the prolog/epilog pass

1508

if (!isSibCall)

1509

Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),

1510

dl);

1511

1512

SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());

1513

1514

RegsToPassVector RegsToPass;

1515

SmallVector<SDValue, 8> MemOpChains;

1516

1517

// Walk the register/memloc assignments, inserting copies/loads. In the case

1518

// of tail call optimization, arguments are handled later.

1519

for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();

1520

i != e;

1521

++i, ++realArgIdx) {

1522

CCValAssign &VA = ArgLocs[i];

1523

SDValue Arg = OutVals[realArgIdx];

1524

ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

1525

bool isByVal = Flags.isByVal();

1526

1527

// Promote the value if needed.

1528

switch (VA.getLocInfo()) {

1529

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1529);

1530

case CCValAssign::Full: break;

1531

case CCValAssign::SExt:

1532

Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

1533

break;

1534

case CCValAssign::ZExt:

1535

Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

1536

break;

1537

case CCValAssign::AExt:

1538

Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);

1539

break;

1540

case CCValAssign::BCvt:

1541

Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

1542

break;

1543

}

1544

1545

// f64 and v2f64 might be passed in i32 pairs and must be split into pieces

1546

if (VA.needsCustom()) {

1547

if (VA.getLocVT() == MVT::v2f64) {

1548

SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

1549

DAG.getConstant(0, MVT::i32));

1550

SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

1551

DAG.getConstant(1, MVT::i32));

1552

1553

PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,

1554

VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);

1555

1556

VA = ArgLocs[++i]; // skip ahead to next loc

1557

if (VA.isRegLoc()) {

1558

PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,

1559

VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);

1560

} else {

1561

assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1561, __PRETTY_FUNCTION__));

1562

1563

MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,

1564

dl, DAG, VA, Flags));

1565

}

1566

} else {

1567

PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],

1568

StackPtr, MemOpChains, Flags);

1569

}

1570

} else if (VA.isRegLoc()) {

1571

if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {

1572

assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1573, __PRETTY_FUNCTION__))

1573

"unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1573, __PRETTY_FUNCTION__));

1574

assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__))

1575

"unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__));

1576

isThisReturn = true;

1577

}

1578

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

1579

} else if (isByVal) {

1580

1581

unsigned offset = 0;

1582

1583

// True if this byval aggregate will be split between registers

1584

// and memory.

1585

unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();

1586

unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();

1587

1588

if (CurByValIdx < ByValArgsCount) {

1589

1590

unsigned RegBegin, RegEnd;

1591

CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);

1592

1593

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

1594

unsigned int i, j;

1595

for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {

1596

SDValue Const = DAG.getConstant(4*i, MVT::i32);

1597

SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

1598

SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,

1599

MachinePointerInfo(),

1600

false, false, false,

1601

DAG.InferPtrAlignment(AddArg));

1602

MemOpChains.push_back(Load.getValue(1));

1603

RegsToPass.push_back(std::make_pair(j, Load));

1604

}

1605

1606

// If parameter size outsides register area, "offset" value

1607

// helps us to calculate stack slot for remained part properly.

1608

offset = RegEnd - RegBegin;

1609

1610

CCInfo.nextInRegsParam();

1611

}

1612

1613

if (Flags.getByValSize() > 4*offset) {

1614

unsigned LocMemOffset = VA.getLocMemOffset();

1615

SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);

1616

SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,

1617

StkPtrOff);

1618

SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);

1619

SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);

1620

SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,

1621

MVT::i32);

1622

SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);

1623

1624

SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

1625

SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};

1626

MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,

1627

Ops));

1628

}

1629

} else if (!isSibCall) {

1630

1631

1632

MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,

1633

dl, DAG, VA, Flags));

1634

}

1635

}

1636

1637

if (!MemOpChains.empty())

1638

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

1639

1640

// Build a sequence of copy-to-reg nodes chained together with token chain

1641

// and flag operands which copy the outgoing args into the appropriate regs.

1642

SDValue InFlag;

1643

// Tail call byval lowering might overwrite argument registers so in case of

1644

// tail call optimization the copies to registers are lowered later.

1645

if (!isTailCall)

1646

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

1647

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

1648

RegsToPass[i].second, InFlag);

1649

InFlag = Chain.getValue(1);

1650

}

1651

1652

// For tail calls lower the arguments to the 'real' stack slot.

1653

if (isTailCall) {

1654

// Force all the incoming stack arguments to be loaded from the stack

1655

// before any new outgoing arguments are stored to the stack, because the

1656

// outgoing stack slots may alias the incoming argument stack slots, and

1657

// the alias isn't otherwise explicit. This is slightly more conservative

1658

// than necessary, because it means that each store effectively depends

1659

// on every argument instead of just those arguments it would clobber.

1660

1661

// Do not flag preceding copytoreg stuff together with the following stuff.

1662

InFlag = SDValue();

1663

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {

1664

Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,

1665

RegsToPass[i].second, InFlag);

1666

InFlag = Chain.getValue(1);

1667

}

1668

InFlag = SDValue();

1669

}

1670

1671

// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every

1672

// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol

1673

// node so that legalize doesn't hack it.

1674

bool isDirect = false;

1675

bool isARMFunc = false;

1676

bool isLocalARMFunc = false;

1677

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1678

1679

if (EnableARMLongCalls) {

1680

assert((Subtarget->isTargetWindows() ||(((Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel
() == Reloc::Static) && "long-calls with non-static relocation model!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel() == Reloc::Static) && \"long-calls with non-static relocation model!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1682, __PRETTY_FUNCTION__))

1681

getTargetMachine().getRelocationModel() == Reloc::Static) &&(((Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel
() == Reloc::Static) && "long-calls with non-static relocation model!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel() == Reloc::Static) && \"long-calls with non-static relocation model!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1682, __PRETTY_FUNCTION__))

1682

"long-calls with non-static relocation model!")(((Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel
() == Reloc::Static) && "long-calls with non-static relocation model!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel() == Reloc::Static) && \"long-calls with non-static relocation model!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1682, __PRETTY_FUNCTION__));

1683

// Handle a global address or an external symbol. If it's not one of

1684

// those, the target's already in a register, so we don't need to do

1685

// anything extra.

1686

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

1687

const GlobalValue *GV = G->getGlobal();

1688

// Create a constant pool entry for the callee address

1689

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

1690

ARMConstantPoolValue *CPV =

1691

ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);

1692

1693

// Get the address of the callee into a register

1694

SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

1695

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

1696

Callee = DAG.getLoad(getPointerTy(), dl,

1697

DAG.getEntryNode(), CPAddr,

1698

MachinePointerInfo::getConstantPool(),

1699

false, false, false, 0);

1700

} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {

1701

const char *Sym = S->getSymbol();

1702

1703

// Create a constant pool entry for the callee address

1704

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

1705

ARMConstantPoolValue *CPV =

1706

ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,

1707

ARMPCLabelIndex, 0);

1708

// Get the address of the callee into a register

1709

SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

1710

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

1711

Callee = DAG.getLoad(getPointerTy(), dl,

1712

DAG.getEntryNode(), CPAddr,

1713

MachinePointerInfo::getConstantPool(),

1714

false, false, false, 0);

1715

}

1716

} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

1717

const GlobalValue *GV = G->getGlobal();

1718

isDirect = true;

1719

bool isExt = GV->isDeclaration() || GV->isWeakForLinker();

1720

bool isStub = (isExt && Subtarget->isTargetMachO()) &&

1721

getTargetMachine().getRelocationModel() != Reloc::Static;

1722

isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());

1723

// ARM call to a local ARM function is predicable.

1724

isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);

1725

// tBX takes a register source operand.

1726

if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {

1727

assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1727, __PRETTY_FUNCTION__));

1728

Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),

1729

DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),

1730

0, ARMII::MO_NONLAZY));

1731

Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,

1732

MachinePointerInfo::getGOT(), false, false, true, 0);

1733

} else if (Subtarget->isTargetCOFF()) {

1734

assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1735, __PRETTY_FUNCTION__))

1735

"Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1735, __PRETTY_FUNCTION__));

1736

unsigned TargetFlags = GV->hasDLLImportStorageClass()

1737

? ARMII::MO_DLLIMPORT

1738

: ARMII::MO_NO_FLAG;

1739

Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,

1740

TargetFlags);

1741

if (GV->hasDLLImportStorageClass())

1742

Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),

1743

DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),

1744

Callee), MachinePointerInfo::getGOT(),

1745

false, false, false, 0);

1746

} else {

1747

// On ELF targets for PIC code, direct calls should go through the PLT

1748

unsigned OpFlags = 0;

1749

if (Subtarget->isTargetELF() &&

1750

getTargetMachine().getRelocationModel() == Reloc::PIC_)

1751

OpFlags = ARMII::MO_PLT;

1752

Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);

1753

}

1754

} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

1755

isDirect = true;

1756

bool isStub = Subtarget->isTargetMachO() &&

1757

getTargetMachine().getRelocationModel() != Reloc::Static;

1758

isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());

1759

// tBX takes a register source operand.

1760

const char *Sym = S->getSymbol();

1761

if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {

1762

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

1763

ARMConstantPoolValue *CPV =

1764

ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,

1765

ARMPCLabelIndex, 4);

1766

SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

1767

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

1768

Callee = DAG.getLoad(getPointerTy(), dl,

1769

DAG.getEntryNode(), CPAddr,

1770

MachinePointerInfo::getConstantPool(),

1771

false, false, false, 0);

1772

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

1773

Callee = DAG.getNode(ARMISD::PIC_ADD, dl,

1774

getPointerTy(), Callee, PICLabel);

1775

} else {

1776

unsigned OpFlags = 0;

1777

// On ELF targets for PIC code, direct calls should go through the PLT

1778

if (Subtarget->isTargetELF() &&

1779

getTargetMachine().getRelocationModel() == Reloc::PIC_)

1780

OpFlags = ARMII::MO_PLT;

1781

Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);

1782

}

1783

}

1784

1785

// FIXME: handle tail calls differently.

1786

unsigned CallOpc;

1787

bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(

1788

AttributeSet::FunctionIndex, Attribute::MinSize);

1789

if (Subtarget->isThumb()) {

1790

if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())

1791

CallOpc = ARMISD::CALL_NOLINK;

1792

else

1793

CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;

1794

} else {

1795

if (!isDirect && !Subtarget->hasV5TOps())

1796

CallOpc = ARMISD::CALL_NOLINK;

1797

else if (doesNotRet && isDirect && Subtarget->hasRAS() &&

1798

// Emit regular call when code size is the priority

1799

!HasMinSizeAttr)

1800

// "mov lr, pc; b _foo" to avoid confusing the RSP

1801

CallOpc = ARMISD::CALL_NOLINK;

1802

else

1803

CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;

1804

}

1805

1806

std::vector<SDValue> Ops;

1807

Ops.push_back(Chain);

1808

Ops.push_back(Callee);

1809

1810

// Add argument registers to the end of the list so that they are known live

1811

// into the call.

1812

for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

1813

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

1814

RegsToPass[i].second.getValueType()));

1815

1816

// Add a register mask operand representing the call-preserved registers.

1817

if (!isTailCall) {

1818

const uint32_t *Mask;

1819

const TargetRegisterInfo *TRI =

1820

getTargetMachine().getSubtargetImpl()->getRegisterInfo();

1821

const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);

1822

if (isThisReturn) {

1823

// For 'this' returns, use the R0-preserving mask if applicable

1824

Mask = ARI->getThisReturnPreservedMask(CallConv);

1825

if (!Mask) {

1826

// Set isThisReturn to false if the calling convention is not one that

1827

// allows 'returned' to be modeled in this way, so LowerCallResult does

1828

// not try to pass 'this' straight through

1829

isThisReturn = false;

1830

Mask = ARI->getCallPreservedMask(CallConv);

1831

}

1832

} else

1833

Mask = ARI->getCallPreservedMask(CallConv);

1834

1835

assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1835, __PRETTY_FUNCTION__));

1836

Ops.push_back(DAG.getRegisterMask(Mask));

1837

}

1838

1839

if (InFlag.getNode())

1840

Ops.push_back(InFlag);

1841

1842

SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

1843

if (isTailCall)

1844

return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);

1845

1846

// Returns a chain and a flag for retval copy to use.

1847

Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);

1848

InFlag = Chain.getValue(1);

1849

1850

Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),

1851

DAG.getIntPtrConstant(0, true), InFlag, dl);

1852

if (!Ins.empty())

1853

InFlag = Chain.getValue(1);

1854

1855

// Handle result values, copying them out of physregs into vregs that we

1856

// return.

1857

return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,

1858

InVals, isThisReturn,

1859

isThisReturn ? OutVals[0] : SDValue());

1860

}

1861

1862

/// HandleByVal - Every parameter *after* a byval parameter is passed

1863

/// on the stack. Remember the next parameter register to allocate,

1864

/// and then confiscate the rest of the parameter registers to insure

1865

/// this.

1866

void

1867

ARMTargetLowering::HandleByVal(

1868

CCState *State, unsigned &size, unsigned Align) const {

1869

unsigned reg = State->AllocateReg(GPRArgRegs, 4);

1870

assert((State->getCallOrPrologue() == Prologue ||(((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue
() == Call) && "unhandled ParmContext") ? static_cast
<void> (0) : __assert_fail ("(State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && \"unhandled ParmContext\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1872, __PRETTY_FUNCTION__))

1871

State->getCallOrPrologue() == Call) &&(((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue
() == Call) && "unhandled ParmContext") ? static_cast
<void> (0) : __assert_fail ("(State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && \"unhandled ParmContext\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1872, __PRETTY_FUNCTION__))

1872

"unhandled ParmContext")(((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue
() == Call) && "unhandled ParmContext") ? static_cast
<void> (0) : __assert_fail ("(State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && \"unhandled ParmContext\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1872, __PRETTY_FUNCTION__));

1873

1874

if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {

1875

if (Subtarget->isAAPCS_ABI() && Align > 4) {

1876

unsigned AlignInRegs = Align / 4;

1877

unsigned Waste = (ARM::R4 - reg) % AlignInRegs;

1878

for (unsigned i = 0; i < Waste; ++i)

1879

reg = State->AllocateReg(GPRArgRegs, 4);

1880

}

1881

if (reg != 0) {

1882

unsigned excess = 4 * (ARM::R4 - reg);

1883

1884

// Special case when NSAA != SP and parameter size greater than size of

1885

// all remained GPR regs. In that case we can't split parameter, we must

1886

// send it to stack. We also must set NCRN to R4, so waste all

1887

// remained registers.

1888

const unsigned NSAAOffset = State->getNextStackOffset();

1889

if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {

1890

while (State->AllocateReg(GPRArgRegs, 4))

1891

;

1892

return;

1893

}

1894

1895

// First register for byval parameter is the first register that wasn't

1896

// allocated before this method call, so it would be "reg".

1897

// If parameter is small enough to be saved in range [reg, r4), then

1898

// the end (first after last) register would be reg + param-size-in-regs,

1899

// else parameter would be splitted between registers and stack,

1900

// end register would be r4 in this case.

1901

unsigned ByValRegBegin = reg;

1902

unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;

1903

State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);

1904

// Note, first register is allocated in the beginning of function already,

1905

// allocate remained amount of registers we need.

1906

for (unsigned i = reg+1; i != ByValRegEnd; ++i)

1907

State->AllocateReg(GPRArgRegs, 4);

1908

// A byval parameter that is split between registers and memory needs its

1909

// size truncated here.

1910

// In the case where the entire structure fits in registers, we set the

1911

// size in memory to zero.

1912

if (size < excess)

1913

size = 0;

1914

else

1915

size -= excess;

1916

}

1917

}

1918

}

1919

1920

/// MatchingStackOffset - Return true if the given stack call argument is

1921

/// already available in the same position (relatively) of the caller's

1922

/// incoming argument stack.

1923

static

1924

bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,

1925

MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,

1926

const TargetInstrInfo *TII) {

1927

unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;

1928

int FI = INT_MAX2147483647;

1929

if (Arg.getOpcode() == ISD::CopyFromReg) {

1930

unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();

1931

if (!TargetRegisterInfo::isVirtualRegister(VR))

1932

return false;

1933

MachineInstr *Def = MRI->getVRegDef(VR);

1934

if (!Def)

1935

return false;

1936

if (!Flags.isByVal()) {

1937

if (!TII->isLoadFromStackSlot(Def, FI))

1938

return false;

1939

} else {

1940

return false;

1941

}

1942

} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {

1943

if (Flags.isByVal())

1944

// ByVal argument is passed in as a pointer but it's now being

1945

// dereferenced. e.g.

1946

// define @foo(%struct.X* %A) {

1947

// tail call @bar(%struct.X* byval %A)

1948

// }

1949

return false;

1950

SDValue Ptr = Ld->getBasePtr();

1951

FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);

1952

if (!FINode)

1953

return false;

1954

FI = FINode->getIndex();

1955

} else

1956

return false;

1957

1958

assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != 2147483647", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 1958, __PRETTY_FUNCTION__));

1959

if (!MFI->isFixedObjectIndex(FI))

1960

return false;

1961

return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);

1962

}

1963

1964

/// IsEligibleForTailCallOptimization - Check whether the call is eligible

1965

/// for tail call optimization. Targets which want to do tail call

1966

/// optimization should implement this function.

1967

bool

1968

ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,

1969

CallingConv::ID CalleeCC,

1970

bool isVarArg,

1971

bool isCalleeStructRet,

1972

bool isCallerStructRet,

1973

const SmallVectorImpl<ISD::OutputArg> &Outs,

1974

const SmallVectorImpl<SDValue> &OutVals,

1975

const SmallVectorImpl<ISD::InputArg> &Ins,

1976

SelectionDAG& DAG) const {

1977

const Function *CallerF = DAG.getMachineFunction().getFunction();

1978

CallingConv::ID CallerCC = CallerF->getCallingConv();

1979

bool CCMatch = CallerCC == CalleeCC;

1980

1981

// Look for obvious safe cases to perform tail call optimization that do not

1982

// require ABI changes. This is what gcc calls sibcall.

1983

1984

// Do not sibcall optimize vararg calls unless the call site is not passing

1985

// any arguments.

1986

if (isVarArg && !Outs.empty())

1987

return false;

1988

1989

// Exception-handling functions need a special set of instructions to indicate

1990

// a return to the hardware. Tail-calling another function would probably

1991

// break this.

1992

if (CallerF->hasFnAttribute("interrupt"))

1993

return false;

1994

1995

// Also avoid sibcall optimization if either caller or callee uses struct

1996

// return semantics.

1997

if (isCalleeStructRet || isCallerStructRet)

1998

return false;

1999

2000

// FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::

2001

// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as

2002

// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation

2003

// support in the assembler and linker to be used. This would need to be

2004

// fixed to fully support tail calls in Thumb1.

2005

2006

// Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take

2007

// LR. This means if we need to reload LR, it takes an extra instructions,

2008

// which outweighs the value of the tail call; but here we don't know yet

2009

// whether LR is going to be used. Probably the right approach is to

2010

// generate the tail call here and turn it back into CALL/RET in

2011

// emitEpilogue if LR is used.

2012

2013

// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,

2014

// but we need to make sure there are enough registers; the only valid

2015

// registers are the 4 used for parameters. We don't currently do this

2016

// case.

2017

if (Subtarget->isThumb1Only())

2018

return false;

2019

2020

// Externally-defined functions with weak linkage should not be

2021

// tail-called on ARM when the OS does not support dynamic

2022

// pre-emption of symbols, as the AAELF spec requires normal calls

2023

// to undefined weak functions to be replaced with a NOP or jump to the

2024

// next instruction. The behaviour of branch instructions in this

2025

// situation (as used for tail calls) is implementation-defined, so we

2026

// cannot rely on the linker replacing the tail call with a return.

2027

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

2028

const GlobalValue *GV = G->getGlobal();

2029

if (GV->hasExternalWeakLinkage())

2030

return false;

2031

}

2032

2033

// If the calling conventions do not match, then we'd better make sure the

2034

// results are returned in the same way as what the caller expects.

2035

if (!CCMatch) {

2036

SmallVector<CCValAssign, 16> RVLocs1;

2037

ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,

2038

*DAG.getContext(), Call);

2039

CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));

2040

2041

SmallVector<CCValAssign, 16> RVLocs2;

2042

ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,

2043

*DAG.getContext(), Call);

2044

CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));

2045

2046

if (RVLocs1.size() != RVLocs2.size())

2047

return false;

2048

for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {

2049

if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())

2050

return false;

2051

if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())

2052

return false;

2053

if (RVLocs1[i].isRegLoc()) {

2054

if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())

2055

return false;

2056

} else {

2057

if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())

2058

return false;

2059

}

2060

}

2061

}

2062

2063

// If Caller's vararg or byval argument has been split between registers and

2064

// stack, do not perform tail call, since part of the argument is in caller's

2065

// local frame.

2066

const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().

2067

getInfo<ARMFunctionInfo>();

2068

if (AFI_Caller->getArgRegsSaveSize())

2069

return false;

2070

2071

// If the callee takes no arguments then go on to check the results of the

2072

// call.

2073

if (!Outs.empty()) {

2074

// Check if stack adjustment is needed. For now, do not do this if any

2075

// argument is passed on the stack.

2076

SmallVector<CCValAssign, 16> ArgLocs;

2077

ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,

2078

*DAG.getContext(), Call);

2079

CCInfo.AnalyzeCallOperands(Outs,

2080

CCAssignFnForNode(CalleeCC, false, isVarArg));

2081

if (CCInfo.getNextStackOffset()) {

2082

MachineFunction &MF = DAG.getMachineFunction();

2083

2084

// Check if the arguments are already laid out in the right way as

2085

// the caller's fixed stack objects.

2086

MachineFrameInfo *MFI = MF.getFrameInfo();

2087

const MachineRegisterInfo *MRI = &MF.getRegInfo();

2088

const TargetInstrInfo *TII =

2089

getTargetMachine().getSubtargetImpl()->getInstrInfo();

2090

for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();

2091

i != e;

2092

++i, ++realArgIdx) {

2093

CCValAssign &VA = ArgLocs[i];

2094

EVT RegVT = VA.getLocVT();

2095

SDValue Arg = OutVals[realArgIdx];

2096

ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

2097

if (VA.getLocInfo() == CCValAssign::Indirect)

2098

return false;

2099

if (VA.needsCustom()) {

2100

// f64 and vector types are split into multiple registers or

2101

// register/stack-slot combinations. The types will not match

2102

// the registers; give up on memory f64 refs until we figure

2103

// out what to do about this.

2104

if (!VA.isRegLoc())

2105

return false;

2106

if (!ArgLocs[++i].isRegLoc())

2107

return false;

2108

if (RegVT == MVT::v2f64) {

2109

if (!ArgLocs[++i].isRegLoc())

2110

return false;

2111

if (!ArgLocs[++i].isRegLoc())

2112

return false;

2113

}

2114

} else if (!VA.isRegLoc()) {

2115

if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,

2116

MFI, MRI, TII))

2117

return false;

2118

}

2119

}

2120

}

2121

}

2122

2123

return true;

2124

}

2125

2126

bool

2127

ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,

2128

MachineFunction &MF, bool isVarArg,

2129

const SmallVectorImpl<ISD::OutputArg> &Outs,

2130

LLVMContext &Context) const {

2131

SmallVector<CCValAssign, 16> RVLocs;

2132

CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);

2133

return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,

2134

isVarArg));

2135

}

2136

2137

static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,

2138

SDLoc DL, SelectionDAG &DAG) {

2139

const MachineFunction &MF = DAG.getMachineFunction();

2140

const Function *F = MF.getFunction();

2141

2142

StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();

2143

2144

// See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset

2145

// version of the "preferred return address". These offsets affect the return

2146

// instruction if this is a return from PL1 without hypervisor extensions.

2147

// IRQ/FIQ: +4 "subs pc, lr, #4"

2148

// SWI: 0 "subs pc, lr, #0"

2149

// ABORT: +4 "subs pc, lr, #4"

2150

// UNDEF: +4/+2 "subs pc, lr, #0"

2151

// UNDEF varies depending on where the exception came from ARM or Thumb

2152

// mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.

2153

2154

int64_t LROffset;

2155

if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||

2156

IntKind == "ABORT")

2157

LROffset = 4;

2158

else if (IntKind == "SWI" || IntKind == "UNDEF")

2159

LROffset = 0;

2160

else

2161

report_fatal_error("Unsupported interrupt attribute. If present, value "

2162

"must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");

2163

2164

RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));

2165

2166

return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);

2167

}

2168

2169

SDValue

2170

ARMTargetLowering::LowerReturn(SDValue Chain,

2171

CallingConv::ID CallConv, bool isVarArg,

2172

const SmallVectorImpl<ISD::OutputArg> &Outs,

2173

const SmallVectorImpl<SDValue> &OutVals,

2174

SDLoc dl, SelectionDAG &DAG) const {

2175

2176

// CCValAssign - represent the assignment of the return value to a location.

2177

SmallVector<CCValAssign, 16> RVLocs;

2178

2179

// CCState - Info about the registers and stack slots.

2180

ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

2181

*DAG.getContext(), Call);

2182

2183

// Analyze outgoing return values.

2184

CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,

2185

isVarArg));

2186

2187

SDValue Flag;

2188

SmallVector<SDValue, 4> RetOps;

2189

RetOps.push_back(Chain); // Operand #0 = Chain (updated below)

2190

bool isLittleEndian = Subtarget->isLittle();

2191

2192

MachineFunction &MF = DAG.getMachineFunction();

2193

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2194

AFI->setReturnRegsCount(RVLocs.size());

2195

2196

// Copy the result values into the output registers.

2197

for (unsigned i = 0, realRVLocIdx = 0;

2198

i != RVLocs.size();

2199

++i, ++realRVLocIdx) {

2200

CCValAssign &VA = RVLocs[i];

2201

assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2201, __PRETTY_FUNCTION__));

2202

2203

SDValue Arg = OutVals[realRVLocIdx];

2204

2205

switch (VA.getLocInfo()) {

2206

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2206);

2207

case CCValAssign::Full: break;

2208

case CCValAssign::BCvt:

2209

Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

2210

break;

2211

}

2212

2213

if (VA.needsCustom()) {

2214

if (VA.getLocVT() == MVT::v2f64) {

2215

// Extract the first half and return it in two registers.

2216

SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

2217

DAG.getConstant(0, MVT::i32));

2218

SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,

2219

DAG.getVTList(MVT::i32, MVT::i32), Half);

2220

2221

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

2222

HalfGPRs.getValue(isLittleEndian ? 0 : 1),

2223

Flag);

2224

Flag = Chain.getValue(1);

2225

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

2226

VA = RVLocs[++i]; // skip ahead to next loc

2227

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

2228

HalfGPRs.getValue(isLittleEndian ? 1 : 0),

2229

Flag);

2230

Flag = Chain.getValue(1);

2231

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

2232

VA = RVLocs[++i]; // skip ahead to next loc

2233

2234

// Extract the 2nd half and fall through to handle it as an f64 value.

2235

Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

2236

DAG.getConstant(1, MVT::i32));

2237

}

2238

// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is

2239

// available.

2240

SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,

2241

DAG.getVTList(MVT::i32, MVT::i32), Arg);

2242

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

2243

fmrrd.getValue(isLittleEndian ? 0 : 1),

2244

Flag);

2245

Flag = Chain.getValue(1);

2246

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

2247

VA = RVLocs[++i]; // skip ahead to next loc

2248

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

2249

fmrrd.getValue(isLittleEndian ? 1 : 0),

2250

Flag);

2251

} else

2252

Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);

2253

2254

// Guarantee that all emitted copies are

2255

// stuck together, avoiding something bad.

2256

Flag = Chain.getValue(1);

2257

RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

2258

}

2259

2260

// Update chain and glue.

2261

RetOps[0] = Chain;

2262

if (Flag.getNode())

2263

RetOps.push_back(Flag);

2264

2265

// CPUs which aren't M-class use a special sequence to return from

2266

// exceptions (roughly, any instruction setting pc and cpsr simultaneously,

2267

// though we use "subs pc, lr, #N").

2268

2269

// M-class CPUs actually use a normal return sequence with a special

2270

// (hardware-provided) value in LR, so the normal code path works.

2271

if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&

2272

!Subtarget->isMClass()) {

2273

if (Subtarget->isThumb1Only())

2274

report_fatal_error("interrupt attribute is not supported in Thumb1");

2275

return LowerInterruptReturn(RetOps, dl, DAG);

2276

}

2277

2278

return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);

2279

}

2280

2281

bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {

2282

if (N->getNumValues() != 1)

2283

return false;

2284

if (!N->hasNUsesOfValue(1, 0))

2285

return false;

2286

2287

SDValue TCChain = Chain;

2288

SDNode *Copy = *N->use_begin();

2289

if (Copy->getOpcode() == ISD::CopyToReg) {

2290

// If the copy has a glue operand, we conservatively assume it isn't safe to

2291

// perform a tail call.

2292

if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)

2293

return false;

2294

TCChain = Copy->getOperand(0);

2295

} else if (Copy->getOpcode() == ARMISD::VMOVRRD) {

2296

SDNode *VMov = Copy;

2297

// f64 returned in a pair of GPRs.

2298

SmallPtrSet<SDNode*, 2> Copies;

2299

for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();

2300

UI != UE; ++UI) {

2301

if (UI->getOpcode() != ISD::CopyToReg)

2302

return false;

2303

Copies.insert(*UI);

2304

}

2305

if (Copies.size() > 2)

2306

return false;

2307

2308

for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();

2309

UI != UE; ++UI) {

2310

SDValue UseChain = UI->getOperand(0);

2311

if (Copies.count(UseChain.getNode()))

2312

// Second CopyToReg

2313

Copy = *UI;

2314

else {

2315

// We are at the top of this chain.

2316

// If the copy has a glue operand, we conservatively assume it

2317

// isn't safe to perform a tail call.

2318

if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)

2319

return false;

2320

// First CopyToReg

2321

TCChain = UseChain;

2322

}

2323

}

2324

} else if (Copy->getOpcode() == ISD::BITCAST) {

2325

// f32 returned in a single GPR.

2326

if (!Copy->hasOneUse())

2327

return false;

2328

Copy = *Copy->use_begin();

2329

if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))

2330

return false;

2331

// If the copy has a glue operand, we conservatively assume it isn't safe to

2332

// perform a tail call.

2333

if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)

2334

return false;

2335

TCChain = Copy->getOperand(0);

2336

} else {

2337

return false;

2338

}

2339

2340

bool HasRet = false;

2341

for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();

2342

UI != UE; ++UI) {

2343

if (UI->getOpcode() != ARMISD::RET_FLAG &&

2344

UI->getOpcode() != ARMISD::INTRET_FLAG)

2345

return false;

2346

HasRet = true;

2347

}

2348

2349

if (!HasRet)

2350

return false;

2351

2352

Chain = TCChain;

2353

return true;

2354

}

2355

2356

bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {

2357

if (!Subtarget->supportsTailCall())

2358

return false;

2359

2360

if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)

2361

return false;

2362

2363

return !Subtarget->isThumb1Only();

2364

}

2365

2366

// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as

2367

// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is

2368

// one of the above mentioned nodes. It has to be wrapped because otherwise

2369

// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only

2370

// be used to form addressing mode. These wrapped nodes will be selected

2371

// into MOVi.

2372

static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {

2373

EVT PtrVT = Op.getValueType();

2374

// FIXME there is no actual debug info here

2375

SDLoc dl(Op);

2376

ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

2377

SDValue Res;

2378

if (CP->isMachineConstantPoolEntry())

2379

Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,

2380

CP->getAlignment());

2381

else

2382

Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,

2383

CP->getAlignment());

2384

return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);

2385

}

2386

2387

unsigned ARMTargetLowering::getJumpTableEncoding() const {

2388

return MachineJumpTableInfo::EK_Inline;

2389

}

2390

2391

SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,

2392

SelectionDAG &DAG) const {

2393

MachineFunction &MF = DAG.getMachineFunction();

2394

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2395

unsigned ARMPCLabelIndex = 0;

2396

SDLoc DL(Op);

2397

EVT PtrVT = getPointerTy();

2398

const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();

2399

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

2400

SDValue CPAddr;

2401

if (RelocM == Reloc::Static) {

2402

CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);

2403

} else {

2404

unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;

2405

ARMPCLabelIndex = AFI->createPICLabelUId();

2406

ARMConstantPoolValue *CPV =

2407

ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,

2408

ARMCP::CPBlockAddress, PCAdj);

2409

CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2410

}

2411

CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);

2412

SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,

2413

MachinePointerInfo::getConstantPool(),

2414

false, false, false, 0);

2415

if (RelocM == Reloc::Static)

2416

return Result;

2417

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

2418

return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);

2419

}

2420

2421

// Lower ISD::GlobalTLSAddress using the "general dynamic" model

2422

SDValue

2423

ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,

2424

SelectionDAG &DAG) const {

2425

SDLoc dl(GA);

2426

EVT PtrVT = getPointerTy();

2427

unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;

2428

MachineFunction &MF = DAG.getMachineFunction();

2429

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2430

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

2431

ARMConstantPoolValue *CPV =

2432

ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,

2433

ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);

2434

SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2435

Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);

2436

Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,

2437

MachinePointerInfo::getConstantPool(),

2438

false, false, false, 0);

2439

SDValue Chain = Argument.getValue(1);

2440

2441

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

2442

Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);

2443

2444

// call __tls_get_addr.

2445

ArgListTy Args;

2446

ArgListEntry Entry;

2447

Entry.Node = Argument;

2448

Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());

2449

Args.push_back(Entry);

2450

2451

// FIXME: is there useful debug info available here?

2452

TargetLowering::CallLoweringInfo CLI(DAG);

2453

CLI.setDebugLoc(dl).setChain(Chain)

2454

.setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),

2455

DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),

2456

0);

2457

2458

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

2459

return CallResult.first;

2460

}

2461

2462

// Lower ISD::GlobalTLSAddress using the "initial exec" or

2463

// "local exec" model.

2464

SDValue

2465

ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,

2466

SelectionDAG &DAG,

2467

TLSModel::Model model) const {

2468

const GlobalValue *GV = GA->getGlobal();

2469

SDLoc dl(GA);

2470

SDValue Offset;

2471

SDValue Chain = DAG.getEntryNode();

2472

EVT PtrVT = getPointerTy();

2473

// Get the Thread Pointer

2474

SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

2475

2476

if (model == TLSModel::InitialExec) {

2477

MachineFunction &MF = DAG.getMachineFunction();

2478

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2479

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

2480

// Initial exec model.

2481

unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;

2482

ARMConstantPoolValue *CPV =

2483

ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,

2484

ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,

2485

true);

2486

Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2487

Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);

2488

Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,

2489

MachinePointerInfo::getConstantPool(),

2490

false, false, false, 0);

2491

Chain = Offset.getValue(1);

2492

2493

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

2494

Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);

2495

2496

Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,

2497

MachinePointerInfo::getConstantPool(),

2498

false, false, false, 0);

2499

} else {

2500

// local exec model

2501

assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2501, __PRETTY_FUNCTION__));

2502

ARMConstantPoolValue *CPV =

2503

ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);

2504

Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2505

Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);

2506

Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,

2507

MachinePointerInfo::getConstantPool(),

2508

false, false, false, 0);

2509

}

2510

2511

// The address of the thread local variable is the add of the thread

2512

// pointer with the offset of the variable.

2513

return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);

2514

}

2515

2516

SDValue

2517

ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {

2518

// TODO: implement the "local dynamic" model

2519

assert(Subtarget->isTargetELF() &&((Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"TLS not implemented for non-ELF targets\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2520, __PRETTY_FUNCTION__))

2520

"TLS not implemented for non-ELF targets")((Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"TLS not implemented for non-ELF targets\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2520, __PRETTY_FUNCTION__));

2521

GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

2522

2523

TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());

2524

2525

switch (model) {

2526

case TLSModel::GeneralDynamic:

2527

case TLSModel::LocalDynamic:

2528

return LowerToTLSGeneralDynamicModel(GA, DAG);

2529

case TLSModel::InitialExec:

2530

case TLSModel::LocalExec:

2531

return LowerToTLSExecModels(GA, DAG, model);

2532

}

2533

llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2533);

2534

}

2535

2536

SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

2537

SelectionDAG &DAG) const {

2538

EVT PtrVT = getPointerTy();

2539

SDLoc dl(Op);

2540

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

2541

if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {

2542

bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();

2543

ARMConstantPoolValue *CPV =

2544

ARMConstantPoolConstant::Create(GV,

2545

UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);

2546

SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2547

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

2548

SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),

2549

CPAddr,

2550

MachinePointerInfo::getConstantPool(),

2551

false, false, false, 0);

2552

SDValue Chain = Result.getValue(1);

2553

SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);

2554

Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);

2555

if (!UseGOTOFF)

2556

Result = DAG.getLoad(PtrVT, dl, Chain, Result,

2557

MachinePointerInfo::getGOT(),

2558

false, false, false, 0);

2559

return Result;

2560

}

2561

2562

// If we have T2 ops, we can materialize the address directly via movt/movw

2563

// pair. This is always cheaper.

2564

if (Subtarget->useMovt(DAG.getMachineFunction())) {

2565

++NumMovwMovt;

2566

// FIXME: Once remat is capable of dealing with instructions with register

2567

// operands, expand this into two nodes.

2568

return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,

2569

DAG.getTargetGlobalAddress(GV, dl, PtrVT));

2570

} else {

2571

SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);

2572

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

2573

return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,

2574

MachinePointerInfo::getConstantPool(),

2575

false, false, false, 0);

2576

}

2577

}

2578

2579

SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,

2580

SelectionDAG &DAG) const {

2581

EVT PtrVT = getPointerTy();

2582

SDLoc dl(Op);

2583

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

2584

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

2585

2586

if (Subtarget->useMovt(DAG.getMachineFunction()))

2587

++NumMovwMovt;

2588

2589

// FIXME: Once remat is capable of dealing with instructions with register

2590

// operands, expand this into multiple nodes

2591

unsigned Wrapper =

2592

RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;

2593

2594

SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);

2595

SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);

2596

2597

if (Subtarget->GVIsIndirectSymbol(GV, RelocM))

2598

Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,

2599

MachinePointerInfo::getGOT(), false, false, false, 0);

2600

return Result;

2601

}

2602

2603

SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,

2604

SelectionDAG &DAG) const {

2605

assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2605, __PRETTY_FUNCTION__));

2606

assert(Subtarget->useMovt(DAG.getMachineFunction()) &&((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2607, __PRETTY_FUNCTION__))

2607

"Windows on ARM expects to use movw/movt")((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2607, __PRETTY_FUNCTION__));

2608

2609

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

2610

const ARMII::TOF TargetFlags =

2611

(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);

2612

EVT PtrVT = getPointerTy();

2613

SDValue Result;

2614

SDLoc DL(Op);

2615

2616

++NumMovwMovt;

2617

2618

// FIXME: Once remat is capable of dealing with instructions with register

2619

// operands, expand this into two nodes.

2620

Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,

2621

DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,

2622

TargetFlags));

2623

if (GV->hasDLLImportStorageClass())

2624

Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,

2625

MachinePointerInfo::getGOT(), false, false, false, 0);

2626

return Result;

2627

}

2628

2629

SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,

2630

SelectionDAG &DAG) const {

2631

assert(Subtarget->isTargetELF() &&((Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"GLOBAL OFFSET TABLE not implemented for non-ELF targets\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2632, __PRETTY_FUNCTION__))

2632

"GLOBAL OFFSET TABLE not implemented for non-ELF targets")((Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"GLOBAL OFFSET TABLE not implemented for non-ELF targets\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2632, __PRETTY_FUNCTION__));

2633

MachineFunction &MF = DAG.getMachineFunction();

2634

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2635

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

2636

EVT PtrVT = getPointerTy();

2637

SDLoc dl(Op);

2638

unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;

2639

ARMConstantPoolValue *CPV =

2640

ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",

2641

ARMPCLabelIndex, PCAdj);

2642

SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2643

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

2644

SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,

2645

MachinePointerInfo::getConstantPool(),

2646

false, false, false, 0);

2647

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

2648

return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);

2649

}

2650

2651

SDValue

2652

ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {

2653

SDLoc dl(Op);

2654

SDValue Val = DAG.getConstant(0, MVT::i32);

2655

return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,

2656

DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),

2657

Op.getOperand(1), Val);

2658

}

2659

2660

SDValue

2661

ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {

2662

SDLoc dl(Op);

2663

return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),

2664

Op.getOperand(1), DAG.getConstant(0, MVT::i32));

2665

}

2666

2667

SDValue

2668

ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,

2669

const ARMSubtarget *Subtarget) const {

2670

unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

2671

SDLoc dl(Op);

2672

switch (IntNo) {

2673

default: return SDValue(); // Don't custom lower most intrinsics.

2674

case Intrinsic::arm_rbit: {

2675

assert(Op.getOperand(1).getValueType() == MVT::i32 &&((Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i32 && \"RBIT intrinsic must have i32 type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2676, __PRETTY_FUNCTION__))

2676

"RBIT intrinsic must have i32 type!")((Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i32 && \"RBIT intrinsic must have i32 type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2676, __PRETTY_FUNCTION__));

2677

return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));

2678

}

2679

case Intrinsic::arm_thread_pointer: {

2680

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2681

return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

2682

}

2683

case Intrinsic::eh_sjlj_lsda: {

2684

MachineFunction &MF = DAG.getMachineFunction();

2685

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2686

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

2687

EVT PtrVT = getPointerTy();

2688

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

2689

SDValue CPAddr;

2690

unsigned PCAdj = (RelocM != Reloc::PIC_)

2691

? 0 : (Subtarget->isThumb() ? 4 : 8);

2692

ARMConstantPoolValue *CPV =

2693

ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,

2694

ARMCP::CPLSDA, PCAdj);

2695

CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);

2696

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

2697

SDValue Result =

2698

DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,

2699

MachinePointerInfo::getConstantPool(),

2700

false, false, false, 0);

2701

2702

if (RelocM == Reloc::PIC_) {

2703

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);

2704

Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);

2705

}

2706

return Result;

2707

}

2708

case Intrinsic::arm_neon_vmulls:

2709

case Intrinsic::arm_neon_vmullu: {

2710

unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)

2711

? ARMISD::VMULLs : ARMISD::VMULLu;

2712

return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

2713

Op.getOperand(1), Op.getOperand(2));

2714

}

2715

}

2716

}

2717

2718

static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,

2719

const ARMSubtarget *Subtarget) {

2720

// FIXME: handle "fence singlethread" more efficiently.

2721

SDLoc dl(Op);

2722

if (!Subtarget->hasDataBarrier()) {

2723

// Some ARMv6 cpus can support data barriers with an mcr instruction.

2724

// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get

2725

// here.

2726

assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2727, __PRETTY_FUNCTION__))

2727

"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2727, __PRETTY_FUNCTION__));

2728

return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),

2729

DAG.getConstant(0, MVT::i32));

2730

}

2731

2732

ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));

2733

AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());

2734

ARM_MB::MemBOpt Domain = ARM_MB::ISH;

2735

if (Subtarget->isMClass()) {

2736

// Only a full system barrier exists in the M-class architectures.

2737

Domain = ARM_MB::SY;

2738

} else if (Subtarget->isSwift() && Ord == Release) {

2739

// Swift happens to implement ISHST barriers in a way that's compatible with

2740

// Release semantics but weaker than ISH so we'd be fools not to use

2741

// it. Beware: other processors probably don't!

2742

Domain = ARM_MB::ISHST;

2743

}

2744

2745

return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),

2746

DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),

2747

DAG.getConstant(Domain, MVT::i32));

2748

}

2749

2750

static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,

2751

const ARMSubtarget *Subtarget) {

2752

// ARM pre v5TE and Thumb1 does not have preload instructions.

2753

if (!(Subtarget->isThumb2() ||

2754

(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))

2755

// Just preserve the chain.

2756

return Op.getOperand(0);

2757

2758

SDLoc dl(Op);

2759

unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;

2760

if (!isRead &&

2761

(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))

2762

// ARMv7 with MP extension has PLDW.

2763

return Op.getOperand(0);

2764

2765

unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();

2766

if (Subtarget->isThumb()) {

2767

// Invert the bits.

2768

isRead = ~isRead & 1;

2769

isData = ~isData & 1;

2770

}

2771

2772

return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),

2773

Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),

2774

DAG.getConstant(isData, MVT::i32));

2775

}

2776

2777

static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {

2778

MachineFunction &MF = DAG.getMachineFunction();

2779

ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();

2780

2781

// vastart just stores the address of the VarArgsFrameIndex slot into the

2782

// memory location argument.

2783

SDLoc dl(Op);

2784

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

2785

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

2786

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

2787

return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),

2788

MachinePointerInfo(SV), false, false, 0);

2789

}

2790

2791

SDValue

2792

ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,

2793

SDValue &Root, SelectionDAG &DAG,

2794

SDLoc dl) const {

2795

MachineFunction &MF = DAG.getMachineFunction();

2796

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2797

2798

const TargetRegisterClass *RC;

2799

if (AFI->isThumb1OnlyFunction())

2800

RC = &ARM::tGPRRegClass;

2801

else

2802

RC = &ARM::GPRRegClass;

2803

2804

// Transform the arguments stored in physical registers into virtual ones.

2805

unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);

2806

SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);

2807

2808

SDValue ArgValue2;

2809

if (NextVA.isMemLoc()) {

2810

MachineFrameInfo *MFI = MF.getFrameInfo();

2811

int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);

2812

2813

// Create load node to retrieve arguments from the stack.

2814

SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

2815

ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,

2816

MachinePointerInfo::getFixedStack(FI),

2817

false, false, false, 0);

2818

} else {

2819

Reg = MF.addLiveIn(NextVA.getLocReg(), RC);

2820

ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);

2821

}

2822

if (!Subtarget->isLittle())

2823

std::swap (ArgValue, ArgValue2);

2824

return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);

2825

}

2826

2827

void

2828

ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,

2829

unsigned InRegsParamRecordIdx,

2830

unsigned ArgSize,

2831

unsigned &ArgRegsSize,

2832

unsigned &ArgRegsSaveSize)

2833

const {

2834

unsigned NumGPRs;

2835

if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {

2836

unsigned RBegin, REnd;

2837

CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);

2838

NumGPRs = REnd - RBegin;

2839

} else {

2840

unsigned int firstUnalloced;

2841

firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,

2842

sizeof(GPRArgRegs) /

2843

sizeof(GPRArgRegs[0]));

2844

NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;

2845

}

2846

2847

unsigned Align = MF.getTarget()

2848

.getSubtargetImpl()

2849

->getFrameLowering()

2850

->getStackAlignment();

2851

ArgRegsSize = NumGPRs * 4;

2852

2853

// If parameter is split between stack and GPRs...

2854

if (NumGPRs && Align > 4 &&

2855

(ArgRegsSize < ArgSize ||

2856

InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {

2857

// Add padding for part of param recovered from GPRs. For example,

2858

// if Align == 8, its last byte must be at address K*8 - 1.

2859

// We need to do it, since remained (stack) part of parameter has

2860

// stack alignment, and we need to "attach" "GPRs head" without gaps

2861

// to it:

2862

// Stack:

2863

// |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...

2864

// [ [padding] [GPRs head] ] [ Tail passed via stack ....

2865

2866

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2867

unsigned Padding =

2868

OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);

2869

ArgRegsSaveSize = ArgRegsSize + Padding;

2870

} else

2871

// We don't need to extend regs save size for byval parameters if they

2872

// are passed via GPRs only.

2873

ArgRegsSaveSize = ArgRegsSize;

2874

}

2875

2876

// The remaining GPRs hold either the beginning of variable-argument

2877

// data, or the beginning of an aggregate passed by value (usually

2878

// byval). Either way, we allocate stack slots adjacent to the data

2879

// provided by our caller, and store the unallocated registers there.

2880

// If this is a variadic function, the va_list pointer will begin with

2881

// these values; otherwise, this reassembles a (byval) structure that

2882

// was split between registers and memory.

2883

// Return: The frame index registers were stored into.

2884

int

2885

ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,

2886

SDLoc dl, SDValue &Chain,

2887

const Value *OrigArg,

2888

unsigned InRegsParamRecordIdx,

2889

unsigned OffsetFromOrigArg,

2890

unsigned ArgOffset,

2891

unsigned ArgSize,

2892

bool ForceMutable,

2893

unsigned ByValStoreOffset,

2894

unsigned TotalArgRegsSaveSize) const {

2895

2896

// Currently, two use-cases possible:

2897

// Case #1. Non-var-args function, and we meet first byval parameter.

2898

// Setup first unallocated register as first byval register;

2899

// eat all remained registers

2900

// (these two actions are performed by HandleByVal method).

2901

// Then, here, we initialize stack frame with

2902

// "store-reg" instructions.

2903

// Case #2. Var-args function, that doesn't contain byval parameters.

2904

// The same: eat all remained unallocated registers,

2905

// initialize stack frame.

2906

2907

MachineFunction &MF = DAG.getMachineFunction();

2908

MachineFrameInfo *MFI = MF.getFrameInfo();

2909

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2910

unsigned firstRegToSaveIndex, lastRegToSaveIndex;

2911

unsigned RBegin, REnd;

2912

if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {

2913

CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);

2914

firstRegToSaveIndex = RBegin - ARM::R0;

2915

lastRegToSaveIndex = REnd - ARM::R0;

2916

} else {

2917

firstRegToSaveIndex = CCInfo.getFirstUnallocated

2918

(GPRArgRegs, array_lengthof(GPRArgRegs));

2919

lastRegToSaveIndex = 4;

2920

}

2921

2922

unsigned ArgRegsSize, ArgRegsSaveSize;

2923

computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,

2924

ArgRegsSize, ArgRegsSaveSize);

2925

2926

// Store any by-val regs to their spots on the stack so that they may be

2927

// loaded by deferencing the result of formal parameter pointer or va_next.

2928

// Note: once stack area for byval/varargs registers

2929

// was initialized, it can't be initialized again.

2930

if (ArgRegsSaveSize) {

2931

unsigned Padding = ArgRegsSaveSize - ArgRegsSize;

2932

2933

if (Padding) {

2934

assert(AFI->getStoredByValParamsPadding() == 0 &&((AFI->getStoredByValParamsPadding() == 0 && "The only parameter may be padded."
) ? static_cast<void> (0) : __assert_fail ("AFI->getStoredByValParamsPadding() == 0 && \"The only parameter may be padded.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__))

2935

"The only parameter may be padded.")((AFI->getStoredByValParamsPadding() == 0 && "The only parameter may be padded."
) ? static_cast<void> (0) : __assert_fail ("AFI->getStoredByValParamsPadding() == 0 && \"The only parameter may be padded.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__));

2936

AFI->setStoredByValParamsPadding(Padding);

2937

}

2938

2939

int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,

2940

Padding +

2941

ByValStoreOffset -

2942

(int64_t)TotalArgRegsSaveSize,

2943

false);

2944

SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());

2945

if (Padding) {

2946

MFI->CreateFixedObject(Padding,

2947

ArgOffset + ByValStoreOffset -

2948

(int64_t)ArgRegsSaveSize,

2949

false);

2950

}

2951

2952

SmallVector<SDValue, 4> MemOps;

2953

for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;

2954

++firstRegToSaveIndex, ++i) {

2955

const TargetRegisterClass *RC;

2956

if (AFI->isThumb1OnlyFunction())

2957

RC = &ARM::tGPRRegClass;

2958

else

2959

RC = &ARM::GPRRegClass;

2960

2961

unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);

2962

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);

2963

SDValue Store =

2964

DAG.getStore(Val.getValue(1), dl, Val, FIN,

2965

MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),

2966

false, false, 0);

2967

MemOps.push_back(Store);

2968

FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,

2969

DAG.getConstant(4, getPointerTy()));

2970

}

2971

2972

AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());

2973

2974

if (!MemOps.empty())

2975

Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

2976

return FrameIndex;

2977

} else {

2978

if (ArgSize == 0) {

2979

// We cannot allocate a zero-byte object for the first variadic argument,

2980

// so just make up a size.

2981

ArgSize = 4;

2982

}

2983

// This will point to the next argument passed via stack.

2984

return MFI->CreateFixedObject(

2985

ArgSize, ArgOffset, !ForceMutable);

2986

}

2987

}

2988

2989

// Setup stack frame, the va_list pointer will start from.

2990

void

2991

ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,

2992

SDLoc dl, SDValue &Chain,

2993

unsigned ArgOffset,

2994

unsigned TotalArgRegsSaveSize,

2995

bool ForceMutable) const {

2996

MachineFunction &MF = DAG.getMachineFunction();

2997

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

2998

2999

// Try to store any remaining integer argument regs

3000

// to their spots on the stack so that they may be loaded by deferencing

3001

// the result of va_next.

3002

// If there is no regs to be stored, just point address after last

3003

// argument passed via stack.

3004

int FrameIndex =

3005

StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,

3006

CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,

3007

0, TotalArgRegsSaveSize);

3008

3009

AFI->setVarArgsFrameIndex(FrameIndex);

3010

}

3011

3012

SDValue

3013

ARMTargetLowering::LowerFormalArguments(SDValue Chain,

3014

CallingConv::ID CallConv, bool isVarArg,

3015

const SmallVectorImpl<ISD::InputArg>

3016

&Ins,

3017

SDLoc dl, SelectionDAG &DAG,

3018

SmallVectorImpl<SDValue> &InVals)

3019

const {

3020

MachineFunction &MF = DAG.getMachineFunction();

3021

MachineFrameInfo *MFI = MF.getFrameInfo();

3022

3023

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

3024

3025

// Assign locations to all of the incoming arguments.

3026

SmallVector<CCValAssign, 16> ArgLocs;

3027

ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

3028

*DAG.getContext(), Prologue);

3029

CCInfo.AnalyzeFormalArguments(Ins,

3030

CCAssignFnForNode(CallConv, /* Return*/ false,

3031

isVarArg));

3032

3033

SmallVector<SDValue, 16> ArgValues;

3034

int lastInsIndex = -1;

3035

SDValue ArgValue;

3036

Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();

3037

unsigned CurArgIdx = 0;

3038

3039

// Initially ArgRegsSaveSize is zero.

3040

// Then we increase this value each time we meet byval parameter.

3041

// We also increase this value in case of varargs function.

3042

AFI->setArgRegsSaveSize(0);

3043

3044

unsigned ByValStoreOffset = 0;

3045

unsigned TotalArgRegsSaveSize = 0;

3046

unsigned ArgRegsSaveSizeMaxAlign = 4;

3047

3048

// Calculate the amount of stack space that we need to allocate to store

3049

// byval and variadic arguments that are passed in registers.

3050

// We need to know this before we allocate the first byval or variadic

3051

// argument, as they will be allocated a stack slot below the CFA (Canonical

3052

// Frame Address, the stack pointer at entry to the function).

3053

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

3054

CCValAssign &VA = ArgLocs[i];

3055

if (VA.isMemLoc()) {

3056

int index = VA.getValNo();

3057

if (index != lastInsIndex) {

3058

ISD::ArgFlagsTy Flags = Ins[index].Flags;

3059

if (Flags.isByVal()) {

3060

unsigned ExtraArgRegsSize;

3061

unsigned ExtraArgRegsSaveSize;

3062

computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),

3063

Flags.getByValSize(),

3064

ExtraArgRegsSize, ExtraArgRegsSaveSize);

3065

3066

TotalArgRegsSaveSize += ExtraArgRegsSaveSize;

3067

if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)

3068

ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();

3069

CCInfo.nextInRegsParam();

3070

}

3071

lastInsIndex = index;

3072

}

3073

}

3074

}

3075

CCInfo.rewindByValRegsInfo();

3076

lastInsIndex = -1;

3077

if (isVarArg && MFI->hasVAStart()) {

3078

unsigned ExtraArgRegsSize;

3079

unsigned ExtraArgRegsSaveSize;

3080

computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,

3081

ExtraArgRegsSize, ExtraArgRegsSaveSize);

3082

TotalArgRegsSaveSize += ExtraArgRegsSaveSize;

3083

}

3084

// If the arg regs save area contains N-byte aligned values, the

3085

// bottom of it must be at least N-byte aligned.

3086

TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);

3087

TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);

3088

3089

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

3090

CCValAssign &VA = ArgLocs[i];

3091

std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);

3092

CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;

3093

// Arguments stored in registers.

3094

if (VA.isRegLoc()) {

3095

EVT RegVT = VA.getLocVT();

3096

3097

if (VA.needsCustom()) {

3098

// f64 and vector types are split up into multiple registers or

3099

// combinations of registers and stack slots.

3100

if (VA.getLocVT() == MVT::v2f64) {

3101

SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],

3102

Chain, DAG, dl);

3103

VA = ArgLocs[++i]; // skip ahead to next loc

3104

SDValue ArgValue2;

3105

if (VA.isMemLoc()) {

3106

int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);

3107

SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

3108

ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,

3109

MachinePointerInfo::getFixedStack(FI),

3110

false, false, false, 0);

3111

} else {

3112

ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],

3113

Chain, DAG, dl);

3114

}

3115

ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);

3116

ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,

3117

ArgValue, ArgValue1, DAG.getIntPtrConstant(0));

3118

ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,

3119

ArgValue, ArgValue2, DAG.getIntPtrConstant(1));

3120

} else

3121

ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);

3122

3123

} else {

3124

const TargetRegisterClass *RC;

3125

3126

if (RegVT == MVT::f32)

3127

RC = &ARM::SPRRegClass;

3128

else if (RegVT == MVT::f64)

3129

RC = &ARM::DPRRegClass;

3130

else if (RegVT == MVT::v2f64)

3131

RC = &ARM::QPRRegClass;

3132

else if (RegVT == MVT::i32)

3133

RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass

3134

: &ARM::GPRRegClass;

3135

else

3136

llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3136);

3137

3138

// Transform the arguments in physical registers into virtual ones.

3139

unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);

3140

ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);

3141

}

3142

3143

// If this is an 8 or 16-bit value, it is really passed promoted

3144

// to 32 bits. Insert an assert[sz]ext to capture this, then

3145

// truncate to the right size.

3146

switch (VA.getLocInfo()) {

3147

default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3147);

3148

case CCValAssign::Full: break;

3149

case CCValAssign::BCvt:

3150

ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);

3151

break;

3152

case CCValAssign::SExt:

3153

ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,

3154

DAG.getValueType(VA.getValVT()));

3155

ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);

3156

break;

3157

case CCValAssign::ZExt:

3158

ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,

3159

DAG.getValueType(VA.getValVT()));

3160

ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);

3161

break;

3162

}

3163

3164

InVals.push_back(ArgValue);

3165

3166

} else { // VA.isRegLoc()

3167

3168

// sanity check

3169

3170

assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3170, __PRETTY_FUNCTION__));

3171

3172

int index = ArgLocs[i].getValNo();

3173

3174

// Some Ins[] entries become multiple ArgLoc[] entries.

3175

// Process them only once.

3176

if (index != lastInsIndex)

3177

{

3178

ISD::ArgFlagsTy Flags = Ins[index].Flags;

3179

// FIXME: For now, all byval parameter objects are marked mutable.

3180

// This can be changed with more analysis.

3181

// In case of tail call optimization mark all arguments mutable.

3182

// Since they could be overwritten by lowering of arguments in case of

3183

// a tail call.

3184

if (Flags.isByVal()) {

3185

unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();

3186

3187

ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());

3188

int FrameIndex = StoreByValRegs(

3189

CCInfo, DAG, dl, Chain, CurOrigArg,

3190

CurByValIndex,

3191

Ins[VA.getValNo()].PartOffset,

3192

VA.getLocMemOffset(),

3193

Flags.getByValSize(),

3194

true /*force mutable frames*/,

3195

ByValStoreOffset,

3196

TotalArgRegsSaveSize);

3197

ByValStoreOffset += Flags.getByValSize();

3198

ByValStoreOffset = std::min(ByValStoreOffset, 16U);

3199

InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));

3200

CCInfo.nextInRegsParam();

3201

} else {

3202

unsigned FIOffset = VA.getLocMemOffset();

3203

int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,

3204

FIOffset, true);

3205

3206

// Create load nodes to retrieve arguments from the stack.

3207

SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

3208

InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,

3209

MachinePointerInfo::getFixedStack(FI),

3210

false, false, false, 0));

3211

}

3212

lastInsIndex = index;

3213

}

3214

}

3215

}

3216

3217

// varargs

3218

if (isVarArg && MFI->hasVAStart())

3219

VarArgStyleRegisters(CCInfo, DAG, dl, Chain,

3220

CCInfo.getNextStackOffset(),

3221

TotalArgRegsSaveSize);

3222

3223

AFI->setArgumentStackSize(CCInfo.getNextStackOffset());

3224

3225

return Chain;

3226

}

3227

3228

/// isFloatingPointZero - Return true if this is +0.0.

3229

static bool isFloatingPointZero(SDValue Op) {

3230

if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

3231

return CFP->getValueAPF().isPosZero();

3232

else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {

3233

// Maybe this has already been legalized into the constant pool?

3234

if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {

3235

SDValue WrapperOp = Op.getOperand(1).getOperand(0);

3236

if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))

3237

if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

3238

return CFP->getValueAPF().isPosZero();

3239

}

3240

} else if (Op->getOpcode() == ISD::BITCAST &&

3241

Op->getValueType(0) == MVT::f64) {

3242

// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)

3243

// created by LowerConstantFP().

3244

SDValue BitcastOp = Op->getOperand(0);

3245

if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {

3246

SDValue MoveOp = BitcastOp->getOperand(0);

3247

if (MoveOp->getOpcode() == ISD::TargetConstant &&

3248

cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {

3249

return true;

3250

}

3251

}

3252

}

3253

return false;

3254

}

3255

3256

/// Returns appropriate ARM CMP (cmp) and corresponding condition code for

3257

/// the given operands.

3258

SDValue

3259

ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,

3260

SDValue &ARMcc, SelectionDAG &DAG,

3261

SDLoc dl) const {

3262

if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {

3263

unsigned C = RHSC->getZExtValue();

3264

if (!isLegalICmpImmediate(C)) {

3265

// Constant does not fit, try adjusting it by one?

3266

switch (CC) {

3267

default: break;

3268

case ISD::SETLT:

3269

case ISD::SETGE:

3270

if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {

3271

CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;

3272

RHS = DAG.getConstant(C-1, MVT::i32);

3273

}

3274

break;

3275

case ISD::SETULT:

3276

case ISD::SETUGE:

3277

if (C != 0 && isLegalICmpImmediate(C-1)) {

3278

CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;

3279

RHS = DAG.getConstant(C-1, MVT::i32);

3280

}

3281

break;

3282

case ISD::SETLE:

3283

case ISD::SETGT:

3284

if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {

3285

CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;

3286

RHS = DAG.getConstant(C+1, MVT::i32);

3287

}

3288

break;

3289

case ISD::SETULE:

3290

case ISD::SETUGT:

3291

if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {

3292

CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;

3293

RHS = DAG.getConstant(C+1, MVT::i32);

3294

}

3295

break;

3296

}

3297

}

3298

}

3299

3300

ARMCC::CondCodes CondCode = IntCCToARMCC(CC);

3301

ARMISD::NodeType CompareType;

3302

switch (CondCode) {

3303

default:

3304

CompareType = ARMISD::CMP;

3305

break;

3306

case ARMCC::EQ:

3307

case ARMCC::NE:

3308

// Uses only Z Flag

3309

CompareType = ARMISD::CMPZ;

3310

break;

3311

}

3312

ARMcc = DAG.getConstant(CondCode, MVT::i32);

3313

return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);

3314

}

3315

3316

/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.

3317

SDValue

3318

ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,

3319

SDLoc dl) const {

3320

assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)((!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64
) ? static_cast<void> (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3320, __PRETTY_FUNCTION__));

3321

SDValue Cmp;

3322

if (!isFloatingPointZero(RHS))

3323

Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);

3324

else

3325

Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);

3326

return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);

3327

}

3328

3329

/// duplicateCmp - Glue values can have only one use, so this function

3330

/// duplicates a comparison node.

3331

SDValue

3332

ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {

3333

unsigned Opc = Cmp.getOpcode();

3334

SDLoc DL(Cmp);

3335

if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)

3336

return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));

3337

3338

assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3338, __PRETTY_FUNCTION__));

3339

Cmp = Cmp.getOperand(0);

3340

Opc = Cmp.getOpcode();

3341

if (Opc == ARMISD::CMPFP)

3342

Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));

3343

else {

3344

assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3344, __PRETTY_FUNCTION__));

3345

Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));

3346

}

3347

return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);

3348

}

3349

3350

std::pair<SDValue, SDValue>

3351

ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,

3352

SDValue &ARMcc) const {

3353

assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3353, __PRETTY_FUNCTION__));

3354

3355

SDValue Value, OverflowCmp;

3356

SDValue LHS = Op.getOperand(0);

3357

SDValue RHS = Op.getOperand(1);

3358

3359

3360

// FIXME: We are currently always generating CMPs because we don't support

3361

// generating CMN through the backend. This is not as good as the natural

3362

// CMP case because it causes a register dependency and cannot be folded

3363

// later.

3364

3365

switch (Op.getOpcode()) {

3366

default:

3367

llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3367);

3368

case ISD::SADDO:

3369

ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);

3370

Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);

3371

OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);

3372

break;

3373

case ISD::UADDO:

3374

ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);

3375

Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);

3376

OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);

3377

break;

3378

case ISD::SSUBO:

3379

ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);

3380

Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);

3381

OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);

3382

break;

3383

case ISD::USUBO:

3384

ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);

3385

Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);

3386

OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);

3387

break;

3388

} // switch (...)

3389

3390

return std::make_pair(Value, OverflowCmp);

3391

}

3392

3393

3394

SDValue

3395

ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {

3396

// Let legalize expand this if it isn't a legal type yet.

3397

if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))

3398

return SDValue();

3399

3400

SDValue Value, OverflowCmp;

3401

SDValue ARMcc;

3402

std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);

3403

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3404

// We use 0 and 1 as false and true values.

3405

SDValue TVal = DAG.getConstant(1, MVT::i32);

3406

SDValue FVal = DAG.getConstant(0, MVT::i32);

3407

EVT VT = Op.getValueType();

3408

3409

SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,

3410

ARMcc, CCR, OverflowCmp);

3411

3412

SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);

3413

return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);

3414

}

3415

3416

3417

SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {

3418

SDValue Cond = Op.getOperand(0);

3419

SDValue SelectTrue = Op.getOperand(1);

3420

SDValue SelectFalse = Op.getOperand(2);

3421

SDLoc dl(Op);

3422

unsigned Opc = Cond.getOpcode();

3423

3424

if (Cond.getResNo() == 1 &&

3425

(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||

3426

Opc == ISD::USUBO)) {

3427

if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))

3428

return SDValue();

3429

3430

SDValue Value, OverflowCmp;

3431

SDValue ARMcc;

3432

std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);

3433

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3434

EVT VT = Op.getValueType();

3435

3436

return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,

3437

OverflowCmp, DAG);

3438

}

3439

3440

// Convert:

3441

3442

// (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)

3443

// (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)

3444

3445

if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {

3446

const ConstantSDNode *CMOVTrue =

3447

dyn_cast<ConstantSDNode>(Cond.getOperand(0));

3448

const ConstantSDNode *CMOVFalse =

3449

dyn_cast<ConstantSDNode>(Cond.getOperand(1));

3450

3451

if (CMOVTrue && CMOVFalse) {

3452

unsigned CMOVTrueVal = CMOVTrue->getZExtValue();

3453

unsigned CMOVFalseVal = CMOVFalse->getZExtValue();

3454

3455

SDValue True;

3456

SDValue False;

3457

if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {

3458

True = SelectTrue;

3459

False = SelectFalse;

3460

} else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {

3461

True = SelectFalse;

3462

False = SelectTrue;

3463

}

3464

3465

if (True.getNode() && False.getNode()) {

3466

EVT VT = Op.getValueType();

3467

SDValue ARMcc = Cond.getOperand(2);

3468

SDValue CCR = Cond.getOperand(3);

3469

SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);

3470

assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3470, __PRETTY_FUNCTION__));

3471

return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);

3472

}

3473

}

3474

}

3475

3476

// ARM's BooleanContents value is UndefinedBooleanContent. Mask out the

3477

// undefined bits before doing a full-word comparison with zero.

3478

Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,

3479

DAG.getConstant(1, Cond.getValueType()));

3480

3481

return DAG.getSelectCC(dl, Cond,

3482

DAG.getConstant(0, Cond.getValueType()),

3483

SelectTrue, SelectFalse, ISD::SETNE);

3484

}

3485

3486

static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {

3487

if (CC == ISD::SETNE)

3488

return ISD::SETEQ;

3489

return ISD::getSetCCInverse(CC, true);

3490

}

3491

3492

static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,

3493

bool &swpCmpOps, bool &swpVselOps) {

3494

// Start by selecting the GE condition code for opcodes that return true for

3495

// 'equality'

3496

if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||

3497

CC == ISD::SETULE)

3498

CondCode = ARMCC::GE;

3499

3500

// and GT for opcodes that return false for 'equality'.

3501

else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||

3502

CC == ISD::SETULT)

3503

CondCode = ARMCC::GT;

3504

3505

// Since we are constrained to GE/GT, if the opcode contains 'less', we need

3506

// to swap the compare operands.

3507

if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||

3508

CC == ISD::SETULT)

3509

swpCmpOps = true;

3510

3511

// Both GT and GE are ordered comparisons, and return false for 'unordered'.

3512

// If we have an unordered opcode, we need to swap the operands to the VSEL

3513

// instruction (effectively negating the condition).

3514

3515

// This also has the effect of swapping which one of 'less' or 'greater'

3516

// returns true, so we also swap the compare operands. It also switches

3517

// whether we return true for 'equality', so we compensate by picking the

3518

// opposite condition code to our original choice.

3519

if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||

3520

CC == ISD::SETUGT) {

3521

swpCmpOps = !swpCmpOps;

3522

swpVselOps = !swpVselOps;

3523

CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;

3524

}

3525

3526

// 'ordered' is 'anything but unordered', so use the VS condition code and

3527

// swap the VSEL operands.

3528

if (CC == ISD::SETO) {

3529

CondCode = ARMCC::VS;

3530

swpVselOps = true;

3531

}

3532

3533

// 'unordered or not equal' is 'anything but equal', so use the EQ condition

3534

// code and swap the VSEL operands.

3535

if (CC == ISD::SETUNE) {

3536

CondCode = ARMCC::EQ;

3537

swpVselOps = true;

3538

}

3539

}

3540

3541

SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,

3542

SDValue TrueVal, SDValue ARMcc, SDValue CCR,

3543

SDValue Cmp, SelectionDAG &DAG) const {

3544

if (Subtarget->isFPOnlySP() && VT == MVT::f64) {

3545

FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,

3546

DAG.getVTList(MVT::i32, MVT::i32), FalseVal);

3547

TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,

3548

DAG.getVTList(MVT::i32, MVT::i32), TrueVal);

3549

3550

SDValue TrueLow = TrueVal.getValue(0);

3551

SDValue TrueHigh = TrueVal.getValue(1);

3552

SDValue FalseLow = FalseVal.getValue(0);

3553

SDValue FalseHigh = FalseVal.getValue(1);

3554

3555

SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,

3556

ARMcc, CCR, Cmp);

3557

SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,

3558

ARMcc, CCR, duplicateCmp(Cmp, DAG));

3559

3560

return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);

3561

} else {

3562

return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,

3563

Cmp);

3564

}

3565

}

3566

3567

SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

3568

EVT VT = Op.getValueType();

3569

SDValue LHS = Op.getOperand(0);

3570

SDValue RHS = Op.getOperand(1);

3571

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

3572

SDValue TrueVal = Op.getOperand(2);

3573

SDValue FalseVal = Op.getOperand(3);

3574

SDLoc dl(Op);

3575

3576

if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {

3577

DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,

3578

dl);

3579

3580

// If softenSetCCOperands only returned one value, we should compare it to

3581

// zero.

3582

if (!RHS.getNode()) {

3583

RHS = DAG.getConstant(0, LHS.getValueType());

3584

CC = ISD::SETNE;

3585

}

3586

}

3587

3588

if (LHS.getValueType() == MVT::i32) {

3589

// Try to generate VSEL on ARMv8.

3590

// The VSEL instruction can't use all the usual ARM condition

3591

// codes: it only has two bits to select the condition code, so it's

3592

// constrained to use only GE, GT, VS and EQ.

3593

3594

// To implement all the various ISD::SETXXX opcodes, we sometimes need to

3595

// swap the operands of the previous compare instruction (effectively

3596

// inverting the compare condition, swapping 'less' and 'greater') and

3597

// sometimes need to swap the operands to the VSEL (which inverts the

3598

// condition in the sense of firing whenever the previous condition didn't)

3599

if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||

3600

TrueVal.getValueType() == MVT::f64)) {

3601

ARMCC::CondCodes CondCode = IntCCToARMCC(CC);

3602

if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||

3603

CondCode == ARMCC::VC || CondCode == ARMCC::NE) {

3604

CC = getInverseCCForVSEL(CC);

3605

std::swap(TrueVal, FalseVal);

3606

}

3607

}

3608

3609

SDValue ARMcc;

3610

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3611

SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

3612

return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);

3613

}

3614

3615

ARMCC::CondCodes CondCode, CondCode2;

3616

FPCCToARMCC(CC, CondCode, CondCode2);

3617

3618

// Try to generate VSEL on ARMv8.

3619

if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||

3620

TrueVal.getValueType() == MVT::f64)) {

3621

// We can select VMAXNM/VMINNM from a compare followed by a select with the

3622

// same operands, as follows:

3623

// c = fcmp [ogt, olt, ugt, ult] a, b

3624

// select c, a, b

3625

// We only do this in unsafe-fp-math, because signed zeros and NaNs are

3626

// handled differently than the original code sequence.

3627

if (getTargetMachine().Options.UnsafeFPMath) {

3628

if (LHS == TrueVal && RHS == FalseVal) {

3629

if (CC == ISD::SETOGT || CC == ISD::SETUGT)

3630

return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);

3631

if (CC == ISD::SETOLT || CC == ISD::SETULT)

3632

return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);

3633

} else if (LHS == FalseVal && RHS == TrueVal) {

3634

if (CC == ISD::SETOLT || CC == ISD::SETULT)

3635

return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);

3636

if (CC == ISD::SETOGT || CC == ISD::SETUGT)

3637

return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);

3638

}

3639

}

3640

3641

bool swpCmpOps = false;

3642

bool swpVselOps = false;

3643

checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);

3644

3645

if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||

3646

CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {

3647

if (swpCmpOps)

3648

std::swap(LHS, RHS);

3649

if (swpVselOps)

3650

std::swap(TrueVal, FalseVal);

3651

}

3652

}

3653

3654

SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);

3655

SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);

3656

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3657

SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);

3658

if (CondCode2 != ARMCC::AL) {

3659

SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);

3660

// FIXME: Needs another CMP because flag can have but one use.

3661

SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);

3662

Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);

3663

}

3664

return Result;

3665

}

3666

3667

/// canChangeToInt - Given the fp compare operand, return true if it is suitable

3668

/// to morph to an integer compare sequence.

3669

static bool canChangeToInt(SDValue Op, bool &SeenZero,

3670

const ARMSubtarget *Subtarget) {

3671

SDNode *N = Op.getNode();

3672

if (!N->hasOneUse())

3673

// Otherwise it requires moving the value from fp to integer registers.

3674

return false;

3675

if (!N->getNumValues())

3676

return false;

3677

EVT VT = Op.getValueType();

3678

if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())

3679

// f32 case is generally profitable. f64 case only makes sense when vcmpe +

3680

// vmrs are very slow, e.g. cortex-a8.

3681

return false;

3682

3683

if (isFloatingPointZero(Op)) {

3684

SeenZero = true;

3685

return true;

3686

}

3687

return ISD::isNormalLoad(N);

3688

}

3689

3690

static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {

3691

if (isFloatingPointZero(Op))

3692

return DAG.getConstant(0, MVT::i32);

3693

3694

if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))

3695

return DAG.getLoad(MVT::i32, SDLoc(Op),

3696

Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),

3697

Ld->isVolatile(), Ld->isNonTemporal(),

3698

Ld->isInvariant(), Ld->getAlignment());

3699

3700

llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3700);

3701

}

3702

3703

static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,

3704

SDValue &RetVal1, SDValue &RetVal2) {

3705

if (isFloatingPointZero(Op)) {

3706

RetVal1 = DAG.getConstant(0, MVT::i32);

3707

RetVal2 = DAG.getConstant(0, MVT::i32);

3708

return;

3709

}

3710

3711

if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {

3712

SDValue Ptr = Ld->getBasePtr();

3713

RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),

3714

Ld->getChain(), Ptr,

3715

Ld->getPointerInfo(),

3716

Ld->isVolatile(), Ld->isNonTemporal(),

3717

Ld->isInvariant(), Ld->getAlignment());

3718

3719

EVT PtrType = Ptr.getValueType();

3720

unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);

3721

SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),

3722

PtrType, Ptr, DAG.getConstant(4, PtrType));

3723

RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),

3724

Ld->getChain(), NewPtr,

3725

Ld->getPointerInfo().getWithOffset(4),

3726

Ld->isVolatile(), Ld->isNonTemporal(),

3727

Ld->isInvariant(), NewAlign);

3728

return;

3729

}

3730

3731

3732

}

3733

3734

/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some

3735

/// f32 and even f64 comparisons to integer ones.

3736

SDValue

3737

ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {

3738

SDValue Chain = Op.getOperand(0);

3739

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();

3740

SDValue LHS = Op.getOperand(2);

3741

SDValue RHS = Op.getOperand(3);

3742

SDValue Dest = Op.getOperand(4);

3743

SDLoc dl(Op);

3744

3745

bool LHSSeenZero = false;

3746

bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);

3747

bool RHSSeenZero = false;

3748

bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);

3749

if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {

3750

// If unsafe fp math optimization is enabled and there are no other uses of

3751

// the CMP operands, and the condition code is EQ or NE, we can optimize it

3752

// to an integer comparison.

3753

if (CC == ISD::SETOEQ)

3754

CC = ISD::SETEQ;

3755

else if (CC == ISD::SETUNE)

3756

CC = ISD::SETNE;

3757

3758

SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);

3759

SDValue ARMcc;

3760

if (LHS.getValueType() == MVT::f32) {

3761

LHS = DAG.getNode(ISD::AND, dl, MVT::i32,

3762

bitcastf32Toi32(LHS, DAG), Mask);

3763

RHS = DAG.getNode(ISD::AND, dl, MVT::i32,

3764

bitcastf32Toi32(RHS, DAG), Mask);

3765

SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

3766

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3767

return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,

3768

Chain, Dest, ARMcc, CCR, Cmp);

3769

}

3770

3771

SDValue LHS1, LHS2;

3772

SDValue RHS1, RHS2;

3773

expandf64Toi32(LHS, DAG, LHS1, LHS2);

3774

expandf64Toi32(RHS, DAG, RHS1, RHS2);

3775

LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);

3776

RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);

3777

ARMCC::CondCodes CondCode = IntCCToARMCC(CC);

3778

ARMcc = DAG.getConstant(CondCode, MVT::i32);

3779

SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);

3780

SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };

3781

return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);

3782

}

3783

3784

return SDValue();

3785

}

3786

3787

SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {

3788

SDValue Chain = Op.getOperand(0);

3789

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();

3790

SDValue LHS = Op.getOperand(2);

3791

SDValue RHS = Op.getOperand(3);

3792

SDValue Dest = Op.getOperand(4);

3793

SDLoc dl(Op);

3794

3795

if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {

3796

DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,

3797

dl);

3798

3799

// If softenSetCCOperands only returned one value, we should compare it to

3800

// zero.

3801

if (!RHS.getNode()) {

3802

RHS = DAG.getConstant(0, LHS.getValueType());

3803

CC = ISD::SETNE;

3804

}

3805

}

3806

3807

if (LHS.getValueType() == MVT::i32) {

3808

SDValue ARMcc;

3809

SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

3810

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3811

return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,

3812

Chain, Dest, ARMcc, CCR, Cmp);

3813

}

3814

3815

assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT
::f64) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3815, __PRETTY_FUNCTION__));

3816

3817

if (getTargetMachine().Options.UnsafeFPMath &&

3818

(CC == ISD::SETEQ || CC == ISD::SETOEQ ||

3819

CC == ISD::SETNE || CC == ISD::SETUNE)) {

3820

SDValue Result = OptimizeVFPBrcond(Op, DAG);

3821

if (Result.getNode())

3822

return Result;

3823

}

3824

3825

ARMCC::CondCodes CondCode, CondCode2;

3826

FPCCToARMCC(CC, CondCode, CondCode2);

3827

3828

SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);

3829

SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);

3830

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

3831

SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);

3832

SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };

3833

SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);

3834

if (CondCode2 != ARMCC::AL) {

3835

ARMcc = DAG.getConstant(CondCode2, MVT::i32);

3836

SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };

3837

Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);

3838

}

3839

return Res;

3840

}

3841

3842

SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {

3843

SDValue Chain = Op.getOperand(0);

3844

SDValue Table = Op.getOperand(1);

3845

SDValue Index = Op.getOperand(2);

3846

SDLoc dl(Op);

3847

3848

EVT PTy = getPointerTy();

3849

JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);

3850

ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();

3851

SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);

3852

SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);

3853

Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);

3854

Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));

3855

SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);

3856

if (Subtarget->isThumb2()) {

3857

// Thumb2 uses a two-level jump. That is, it jumps into the jump table

3858

// which does another jump to the destination. This also makes it easier

3859

// to translate it to TBB / TBH later.

3860

// FIXME: This might not work if the function is extremely large.

3861

return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,

3862

Addr, Op.getOperand(2), JTI, UId);

3863

}

3864

if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {

3865

Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,

3866

MachinePointerInfo::getJumpTable(),

3867

false, false, false, 0);

3868

Chain = Addr.getValue(1);

3869

Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);

3870

return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);

3871

} else {

3872

Addr = DAG.getLoad(PTy, dl, Chain, Addr,

3873

MachinePointerInfo::getJumpTable(),

3874

false, false, false, 0);

3875

Chain = Addr.getValue(1);

3876

return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);

3877

}

3878

}

3879

3880

static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {

3881

EVT VT = Op.getValueType();

3882

SDLoc dl(Op);

3883

3884

if (Op.getValueType().getVectorElementType() == MVT::i32) {

3885

if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)

3886

return Op;

3887

return DAG.UnrollVectorOp(Op.getNode());

3888

}

3889

3890

assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3891, __PRETTY_FUNCTION__))

3891

"Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3891, __PRETTY_FUNCTION__));

3892

if (VT != MVT::v4i16)

3893

return DAG.UnrollVectorOp(Op.getNode());

3894

3895

Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));

3896

return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);

3897

}

3898

3899

SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {

3900

EVT VT = Op.getValueType();

3901

if (VT.isVector())

3902

return LowerVectorFP_TO_INT(Op, DAG);

3903

3904

if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {

3905

RTLIB::Libcall LC;

3906

if (Op.getOpcode() == ISD::FP_TO_SINT)

3907

LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),

3908

Op.getValueType());

3909

else

3910

LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),

3911

Op.getValueType());

3912

return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,

3913

/*isSigned*/ false, SDLoc(Op)).first;

3914

}

3915

3916

SDLoc dl(Op);

3917

unsigned Opc;

3918

3919

switch (Op.getOpcode()) {

3920

default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3920);

3921

case ISD::FP_TO_SINT:

3922

Opc = ARMISD::FTOSI;

3923

break;

3924

case ISD::FP_TO_UINT:

3925

Opc = ARMISD::FTOUI;

3926

break;

3927

}

3928

Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));

3929

return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

3930

}

3931

3932

static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {

3933

EVT VT = Op.getValueType();

3934

SDLoc dl(Op);

3935

3936

if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {

3937

if (VT.getVectorElementType() == MVT::f32)

3938

return Op;

3939

return DAG.UnrollVectorOp(Op.getNode());

3940

}

3941

3942

assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3943, __PRETTY_FUNCTION__))

3943

"Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3943, __PRETTY_FUNCTION__));

3944

if (VT != MVT::v4f32)

3945

return DAG.UnrollVectorOp(Op.getNode());

3946

3947

unsigned CastOpc;

3948

unsigned Opc;

3949

switch (Op.getOpcode()) {

3950

default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3950);

3951

case ISD::SINT_TO_FP:

3952

CastOpc = ISD::SIGN_EXTEND;

3953

Opc = ISD::SINT_TO_FP;

3954

break;

3955

case ISD::UINT_TO_FP:

3956

CastOpc = ISD::ZERO_EXTEND;

3957

Opc = ISD::UINT_TO_FP;

3958

break;

3959

}

3960

3961

Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));

3962

return DAG.getNode(Opc, dl, VT, Op);

3963

}

3964

3965

SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {

3966

EVT VT = Op.getValueType();

3967

if (VT.isVector())

3968

return LowerVectorINT_TO_FP(Op, DAG);

3969

3970

if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {

3971

RTLIB::Libcall LC;

3972

if (Op.getOpcode() == ISD::SINT_TO_FP)

3973

LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),

3974

Op.getValueType());

3975

else

3976

LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),

3977

Op.getValueType());

3978

return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,

3979

/*isSigned*/ false, SDLoc(Op)).first;

3980

}

3981

3982

SDLoc dl(Op);

3983

unsigned Opc;

3984

3985

switch (Op.getOpcode()) {

3986

default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 3986);

3987

case ISD::SINT_TO_FP:

3988

Opc = ARMISD::SITOF;

3989

break;

3990

case ISD::UINT_TO_FP:

3991

Opc = ARMISD::UITOF;

3992

break;

3993

}

3994

3995

Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));

3996

return DAG.getNode(Opc, dl, VT, Op);

3997

}

3998

3999

SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {

4000

// Implement fcopysign with a fabs and a conditional fneg.

4001

SDValue Tmp0 = Op.getOperand(0);

4002

SDValue Tmp1 = Op.getOperand(1);

4003

SDLoc dl(Op);

4004

EVT VT = Op.getValueType();

4005

EVT SrcVT = Tmp1.getValueType();

4006

bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||

4007

Tmp0.getOpcode() == ARMISD::VMOVDRR;

4008

bool UseNEON = !InGPR && Subtarget->hasNEON();

4009

4010

if (UseNEON) {

4011

// Use VBSL to copy the sign bit.

4012

unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);

4013

SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,

4014

DAG.getTargetConstant(EncodedVal, MVT::i32));

4015

EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;

4016

if (VT == MVT::f64)

4017

Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,

4018

DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),

4019

DAG.getConstant(32, MVT::i32));

4020

else /*if (VT == MVT::f32)*/

4021

Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);

4022

if (SrcVT == MVT::f32) {

4023

Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);

4024

if (VT == MVT::f64)

4025

Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,

4026

DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),

4027

DAG.getConstant(32, MVT::i32));

4028

} else if (VT == MVT::f32)

4029

Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,

4030

DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),

4031

DAG.getConstant(32, MVT::i32));

4032

Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);

4033

Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);

4034

4035

SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),

4036

MVT::i32);

4037

AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);

4038

SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,

4039

DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));

4040

4041

SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,

4042

DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),

4043

DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));

4044

if (VT == MVT::f32) {

4045

Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);

4046

Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,

4047

DAG.getConstant(0, MVT::i32));

4048

} else {

4049

Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);

4050

}

4051

4052

return Res;

4053

}

4054

4055

// Bitcast operand 1 to i32.

4056

if (SrcVT == MVT::f64)

4057

Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),

4058

Tmp1).getValue(1);

4059

Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);

4060

4061

// Or in the signbit with integer operations.

4062

SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);

4063

SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);

4064

Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);

4065

if (VT == MVT::f32) {

4066

Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,

4067

DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);

4068

return DAG.getNode(ISD::BITCAST, dl, MVT::f32,

4069

DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));

4070

}

4071

4072

// f64: Or the high part with signbit and then combine two parts.

4073

Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),

4074

Tmp0);

4075

SDValue Lo = Tmp0.getValue(0);

4076

SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);

4077

Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);

4078

return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

4079

}

4080

4081

SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{

4082

MachineFunction &MF = DAG.getMachineFunction();

4083

MachineFrameInfo *MFI = MF.getFrameInfo();

4084

MFI->setReturnAddressIsTaken(true);

4085

4086

if (verifyReturnAddressArgumentIsConstant(Op, DAG))

4087

return SDValue();

4088

4089

EVT VT = Op.getValueType();

4090

SDLoc dl(Op);

4091

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

4092

if (Depth) {

4093

SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);

4094

SDValue Offset = DAG.getConstant(4, MVT::i32);

4095

return DAG.getLoad(VT, dl, DAG.getEntryNode(),

4096

DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),

4097

MachinePointerInfo(), false, false, false, 0);

4098

}

4099

4100

// Return LR, which contains the return address. Mark it an implicit live-in.

4101

unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));

4102

return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);

4103

}

4104

4105

SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {

4106

const ARMBaseRegisterInfo &ARI =

4107

*static_cast<const ARMBaseRegisterInfo*>(RegInfo);

4108

MachineFunction &MF = DAG.getMachineFunction();

4109

MachineFrameInfo *MFI = MF.getFrameInfo();

4110

MFI->setFrameAddressIsTaken(true);

4111

4112

EVT VT = Op.getValueType();

4113

SDLoc dl(Op); // FIXME probably not meaningful

4114

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

4115

unsigned FrameReg = ARI.getFrameRegister(MF);

4116

SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);

4117

while (Depth--)

4118

FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,

4119

MachinePointerInfo(),

4120

false, false, false, 0);

4121

return FrameAddr;

4122

}

4123

4124

// FIXME? Maybe this could be a TableGen attribute on some registers and

4125

// this table could be generated automatically from RegInfo.

4126

unsigned ARMTargetLowering::getRegisterByName(const char* RegName,

4127

EVT VT) const {

4128

unsigned Reg = StringSwitch<unsigned>(RegName)

4129

.Case("sp", ARM::SP)

4130

.Default(0);

4131

if (Reg)

4132

return Reg;

4133

report_fatal_error("Invalid register name global variable");

4134

}

4135

4136

/// ExpandBITCAST - If the target supports VFP, this function is called to

4137

/// expand a bit convert where either the source or destination type is i64 to

4138

/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64

4139

/// operand type is illegal (e.g., v2f32 for a target that doesn't support

4140

/// vectors), since the legalizer won't know what to do with that.

4141

static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {

4142

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

4143

SDLoc dl(N);

4144

SDValue Op = N->getOperand(0);

4145

4146

// This function is only supposed to be called for i64 types, either as the

4147

// source or destination of the bit convert.

4148

EVT SrcVT = Op.getValueType();

4149

EVT DstVT = N->getValueType(0);

4150

assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4151, __PRETTY_FUNCTION__))

4151

"ExpandBITCAST called for non-i64 type")(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4151, __PRETTY_FUNCTION__));

4152

4153

// Turn i64->f64 into VMOVDRR.

4154

if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {

4155

SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,

4156

DAG.getConstant(0, MVT::i32));

4157

SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,

4158

DAG.getConstant(1, MVT::i32));

4159

return DAG.getNode(ISD::BITCAST, dl, DstVT,

4160

DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));

4161

}

4162

4163

// Turn f64->i64 into VMOVRRD.

4164

if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {

4165

SDValue Cvt;

4166

if (TLI.isBigEndian() && SrcVT.isVector() &&

4167

SrcVT.getVectorNumElements() > 1)

4168

Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,

4169

DAG.getVTList(MVT::i32, MVT::i32),

4170

DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));

4171

else

4172

Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,

4173

DAG.getVTList(MVT::i32, MVT::i32), Op);

4174

// Merge the pieces into a single i64 value.

4175

return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));

4176

}

4177

4178

return SDValue();

4179

}

4180

4181

/// getZeroVector - Returns a vector of specified type with all zero elements.

4182

/// Zero vectors are used to represent vector negation and in those cases

4183

/// will be implemented with the NEON VNEG instruction. However, VNEG does

4184

/// not support i64 elements, so sometimes the zero vectors will need to be

4185

/// explicitly constructed. Regardless, use a canonical VMOV to create the

4186

/// zero vector.

4187

static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {

4188

assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4188, __PRETTY_FUNCTION__));

4189

// The canonical modified immediate encoding of a zero vector is....0!

4190

SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);

4191

EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;

4192

SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);

4193

return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);

4194

}

4195

4196

/// LowerShiftRightParts - Lower SRA_PARTS, which returns two

4197

/// i32 values and take a 2 x i32 value to shift plus a shift amount.

4198

SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,

4199

SelectionDAG &DAG) const {

4200

assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4200, __PRETTY_FUNCTION__));

4201

EVT VT = Op.getValueType();

4202

unsigned VTBits = VT.getSizeInBits();

4203

SDLoc dl(Op);

4204

SDValue ShOpLo = Op.getOperand(0);

4205

SDValue ShOpHi = Op.getOperand(1);

4206

SDValue ShAmt = Op.getOperand(2);

4207

SDValue ARMcc;

4208

unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;

4209

4210

assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4210, __PRETTY_FUNCTION__));

4211

4212

SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,

4213

DAG.getConstant(VTBits, MVT::i32), ShAmt);

4214

SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);

4215

SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,

4216

DAG.getConstant(VTBits, MVT::i32));

4217

SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);

4218

SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);

4219

SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);

4220

4221

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

4222

SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,

4223

ARMcc, DAG, dl);

4224

SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);

4225

SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,

4226

CCR, Cmp);

4227

4228

SDValue Ops[2] = { Lo, Hi };

4229

return DAG.getMergeValues(Ops, dl);

4230

}

4231

4232

/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two

4233

/// i32 values and take a 2 x i32 value to shift plus a shift amount.

4234

SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,

4235

SelectionDAG &DAG) const {

4236

4237

EVT VT = Op.getValueType();

4238

unsigned VTBits = VT.getSizeInBits();

4239

SDLoc dl(Op);

4240

SDValue ShOpLo = Op.getOperand(0);

4241

SDValue ShOpHi = Op.getOperand(1);

4242

SDValue ShAmt = Op.getOperand(2);

4243

SDValue ARMcc;

4244

4245

assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4245, __PRETTY_FUNCTION__));

4246

SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,

4247

DAG.getConstant(VTBits, MVT::i32), ShAmt);

4248

SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);

4249

SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,

4250

DAG.getConstant(VTBits, MVT::i32));

4251

SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);

4252

SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);

4253

4254

SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);

4255

SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

4256

SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,

4257

ARMcc, DAG, dl);

4258

SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);

4259

SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,

4260

CCR, Cmp);

4261

4262

SDValue Ops[2] = { Lo, Hi };

4263

return DAG.getMergeValues(Ops, dl);

4264

}

4265

4266

SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,

4267

SelectionDAG &DAG) const {

4268

// The rounding mode is in bits 23:22 of the FPSCR.

4269

// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0

4270

// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)

4271

// so that the shift + and get folded into a bitfield extract.

4272

SDLoc dl(Op);

4273

SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,

4274

DAG.getConstant(Intrinsic::arm_get_fpscr,

4275

MVT::i32));

4276

SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,

4277

DAG.getConstant(1U << 22, MVT::i32));

4278

SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,

4279

DAG.getConstant(22, MVT::i32));

4280

return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,

4281

DAG.getConstant(3, MVT::i32));

4282

}

4283

4284

static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,

4285

const ARMSubtarget *ST) {

4286

EVT VT = N->getValueType(0);

4287

SDLoc dl(N);

4288

4289

if (!ST->hasV6T2Ops())

4290

return SDValue();

4291

4292

SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));

4293

return DAG.getNode(ISD::CTLZ, dl, VT, rbit);

4294

}

4295

4296

/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count

4297

/// for each 16-bit element from operand, repeated. The basic idea is to

4298

/// leverage vcnt to get the 8-bit counts, gather and add the results.

4299

///

4300

/// Trace for v4i16:

4301

/// input = [v0 v1 v2 v3 ] (vi 16-bit element)

4302

/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)

4303

/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)

4304

/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]

4305

/// [b0 b1 b2 b3 b4 b5 b6 b7]

4306

/// +[b1 b0 b3 b2 b5 b4 b7 b6]

4307

/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,

4308

/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)

4309

static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {

4310

EVT VT = N->getValueType(0);

4311

SDLoc DL(N);

4312

4313

EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;

4314

SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));

4315

SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);

4316

SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);

4317

SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);

4318

return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);

4319

}

4320

4321

/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the

4322

/// bit-count for each 16-bit element from the operand. We need slightly

4323

/// different sequencing for v4i16 and v8i16 to stay within NEON's available

4324

/// 64/128-bit registers.

4325

///

4326

/// Trace for v4i16:

4327

/// input = [v0 v1 v2 v3 ] (vi 16-bit element)

4328

/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)

4329

/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]

4330

/// v4i16:Extracted = [k0 k1 k2 k3 ]

4331

static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {

4332

EVT VT = N->getValueType(0);

4333

SDLoc DL(N);

4334

4335

SDValue BitCounts = getCTPOP16BitCounts(N, DAG);

4336

if (VT.is64BitVector()) {

4337

SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);

4338

return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,

4339

DAG.getIntPtrConstant(0));

4340

} else {

4341

SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,

4342

BitCounts, DAG.getIntPtrConstant(0));

4343

return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);

4344

}

4345

}

4346

4347

/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the

4348

/// bit-count for each 32-bit element from the operand. The idea here is

4349

/// to split the vector into 16-bit elements, leverage the 16-bit count

4350

/// routine, and then combine the results.

4351

///

4352

/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):

4353

/// input = [v0 v1 ] (vi: 32-bit elements)

4354

/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])

4355

/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)

4356

/// vrev: N0 = [k1 k0 k3 k2 ]

4357

/// [k0 k1 k2 k3 ]

4358

/// N1 =+[k1 k0 k3 k2 ]

4359

/// [k0 k2 k1 k3 ]

4360

/// N2 =+[k1 k3 k0 k2 ]

4361

/// [k0 k2 k1 k3 ]

4362

/// Extended =+[k1 k3 k0 k2 ]

4363

/// [k0 k2 ]

4364

/// Extracted=+[k1 k3 ]

4365

///

4366

static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {

4367

EVT VT = N->getValueType(0);

4368

SDLoc DL(N);

4369

4370

EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;

4371

4372

SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));

4373

SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);

4374

SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);

4375

SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);

4376

SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);

4377

4378

if (VT.is64BitVector()) {

4379

SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);

4380

return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,

4381

DAG.getIntPtrConstant(0));

4382

} else {

4383

SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,

4384

DAG.getIntPtrConstant(0));

4385

return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);

4386

}

4387

}

4388

4389

static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,

4390

const ARMSubtarget *ST) {

4391

EVT VT = N->getValueType(0);

4392

4393

assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4393, __PRETTY_FUNCTION__));

4394

assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4396, __PRETTY_FUNCTION__))

4395

VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4396, __PRETTY_FUNCTION__))

4396

"Unexpected type for custom ctpop lowering")(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4396, __PRETTY_FUNCTION__));

4397

4398

if (VT.getVectorElementType() == MVT::i32)

4399

return lowerCTPOP32BitElements(N, DAG);

4400

else

4401

return lowerCTPOP16BitElements(N, DAG);

4402

}

4403

4404

static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,

4405

const ARMSubtarget *ST) {

4406

EVT VT = N->getValueType(0);

4407

SDLoc dl(N);

4408

4409

if (!VT.isVector())

4410

return SDValue();

4411

4412

// Lower vector shifts on NEON to use VSHL.

4413

assert(ST->hasNEON() && "unexpected vector shift")((ST->hasNEON() && "unexpected vector shift") ? static_cast
<void> (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4413, __PRETTY_FUNCTION__));

4414

4415

// Left shifts translate directly to the vshiftu intrinsic.

4416

if (N->getOpcode() == ISD::SHL)

4417

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,

4418

DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),

4419

N->getOperand(0), N->getOperand(1));

4420

4421

assert((N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4422, __PRETTY_FUNCTION__))

4422

N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4422, __PRETTY_FUNCTION__));

4423

4424

// NEON uses the same intrinsics for both left and right shifts. For

4425

// right shifts, the shift amounts are negative, so negate the vector of

4426

// shift amounts.

4427

EVT ShiftVT = N->getOperand(1).getValueType();

4428

SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,

4429

getZeroVector(ShiftVT, DAG, dl),

4430

N->getOperand(1));

4431

Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?

4432

Intrinsic::arm_neon_vshifts :

4433

Intrinsic::arm_neon_vshiftu);

4434

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,

4435

DAG.getConstant(vshiftInt, MVT::i32),

4436

N->getOperand(0), NegatedCount);

4437

}

4438

4439

static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,

4440

const ARMSubtarget *ST) {

4441

EVT VT = N->getValueType(0);

4442

SDLoc dl(N);

4443

4444

// We can get here for a node like i32 = ISD::SHL i32, i64

4445

if (VT != MVT::i64)

4446

return SDValue();

4447

4448

assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4449, __PRETTY_FUNCTION__))

4449

"Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4449, __PRETTY_FUNCTION__));

4450

4451

// We only lower SRA, SRL of 1 here, all others use generic lowering.

4452

if (!isa<ConstantSDNode>(N->getOperand(1)) ||

4453

cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)

4454

return SDValue();

4455

4456

// If we are in thumb mode, we don't have RRX.

4457

if (ST->isThumb1Only()) return SDValue();

4458

4459

// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.

4460

SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),

4461

DAG.getConstant(0, MVT::i32));

4462

SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),

4463

DAG.getConstant(1, MVT::i32));

4464

4465

// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and

4466

// captures the result into a carry flag.

4467

unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;

4468

Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);

4469

4470

// The low part is an ARMISD::RRX operand, which shifts the carry in.

4471

Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));

4472

4473

// Merge the pieces into a single i64 value.

4474

return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

4475

}

4476

4477

static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {

4478

SDValue TmpOp0, TmpOp1;

4479

bool Invert = false;

4480

bool Swap = false;

4481

unsigned Opc = 0;

4482

4483

SDValue Op0 = Op.getOperand(0);

4484

SDValue Op1 = Op.getOperand(1);

4485

SDValue CC = Op.getOperand(2);

4486

EVT VT = Op.getValueType();

4487

ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();

4488

SDLoc dl(Op);

4489

4490

if (Op1.getValueType().isFloatingPoint()) {

4491

switch (SetCCOpcode) {

4492

default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4492);

4493

case ISD::SETUNE:

4494

case ISD::SETNE: Invert = true; // Fallthrough

4495

case ISD::SETOEQ:

4496

case ISD::SETEQ: Opc = ARMISD::VCEQ; break;

4497

case ISD::SETOLT:

4498

case ISD::SETLT: Swap = true; // Fallthrough

4499

case ISD::SETOGT:

4500

case ISD::SETGT: Opc = ARMISD::VCGT; break;

4501

case ISD::SETOLE:

4502

case ISD::SETLE: Swap = true; // Fallthrough

4503

case ISD::SETOGE:

4504

case ISD::SETGE: Opc = ARMISD::VCGE; break;

4505

case ISD::SETUGE: Swap = true; // Fallthrough

4506

case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;

4507

case ISD::SETUGT: Swap = true; // Fallthrough

4508

case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;

4509

case ISD::SETUEQ: Invert = true; // Fallthrough

4510

case ISD::SETONE:

4511

// Expand this to (OLT | OGT).

4512

TmpOp0 = Op0;

4513

TmpOp1 = Op1;

4514

Opc = ISD::OR;

4515

Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);

4516

Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);

4517

break;

4518

case ISD::SETUO: Invert = true; // Fallthrough

4519

case ISD::SETO:

4520

// Expand this to (OLT | OGE).

4521

TmpOp0 = Op0;

4522

TmpOp1 = Op1;

4523

Opc = ISD::OR;

4524

Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);

4525

Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);

4526

break;

4527

}

4528

} else {

4529

// Integer comparisons.

4530

switch (SetCCOpcode) {

4531

default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4531);

4532

case ISD::SETNE: Invert = true;

4533

case ISD::SETEQ: Opc = ARMISD::VCEQ; break;

4534

case ISD::SETLT: Swap = true;

4535

case ISD::SETGT: Opc = ARMISD::VCGT; break;

4536

case ISD::SETLE: Swap = true;

4537

case ISD::SETGE: Opc = ARMISD::VCGE; break;

4538

case ISD::SETULT: Swap = true;

4539

case ISD::SETUGT: Opc = ARMISD::VCGTU; break;

4540

case ISD::SETULE: Swap = true;

4541

case ISD::SETUGE: Opc = ARMISD::VCGEU; break;

4542

}

4543

4544

// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).

4545

if (Opc == ARMISD::VCEQ) {

4546

4547

SDValue AndOp;

4548

if (ISD::isBuildVectorAllZeros(Op1.getNode()))

4549

AndOp = Op0;

4550

else if (ISD::isBuildVectorAllZeros(Op0.getNode()))

4551

AndOp = Op1;

4552

4553

// Ignore bitconvert.

4554

if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)

4555

AndOp = AndOp.getOperand(0);

4556

4557

if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {

4558

Opc = ARMISD::VTST;

4559

Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));

4560

Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));

4561

Invert = !Invert;

4562

}

4563

}

4564

}

4565

4566

if (Swap)

4567

std::swap(Op0, Op1);

4568

4569

// If one of the operands is a constant vector zero, attempt to fold the

4570

// comparison to a specialized compare-against-zero form.

4571

SDValue SingleOp;

4572

if (ISD::isBuildVectorAllZeros(Op1.getNode()))

4573

SingleOp = Op0;

4574

else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {

4575

if (Opc == ARMISD::VCGE)

4576

Opc = ARMISD::VCLEZ;

4577

else if (Opc == ARMISD::VCGT)

4578

Opc = ARMISD::VCLTZ;

4579

SingleOp = Op1;

4580

}

4581

4582

SDValue Result;

4583

if (SingleOp.getNode()) {

4584

switch (Opc) {

4585

case ARMISD::VCEQ:

4586

Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;

4587

case ARMISD::VCGE:

4588

Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;

4589

case ARMISD::VCLEZ:

4590

Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;

4591

case ARMISD::VCGT:

4592

Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;

4593

case ARMISD::VCLTZ:

4594

Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;

4595

default:

4596

Result = DAG.getNode(Opc, dl, VT, Op0, Op1);

4597

}

4598

} else {

4599

Result = DAG.getNode(Opc, dl, VT, Op0, Op1);

4600

}

4601

4602

if (Invert)

4603

Result = DAG.getNOT(dl, Result, VT);

4604

4605

return Result;

4606

}

4607

4608

/// isNEONModifiedImm - Check if the specified splat value corresponds to a

4609

/// valid vector constant for a NEON instruction with a "modified immediate"

4610

/// operand (e.g., VMOV). If so, return the encoded value.

4611

static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,

4612

unsigned SplatBitSize, SelectionDAG &DAG,

4613

EVT &VT, bool is128Bits, NEONModImmType type) {

4614

unsigned OpCmode, Imm;

4615

4616

// SplatBitSize is set to the smallest size that splats the vector, so a

4617

// zero vector will always have SplatBitSize == 8. However, NEON modified

4618

// immediate instructions others than VMOV do not support the 8-bit encoding

4619

// of a zero vector, and the default encoding of zero is supposed to be the

4620

// 32-bit version.

4621

if (SplatBits == 0)

4622

SplatBitSize = 32;

4623

4624

switch (SplatBitSize) {

4625

case 8:

4626

if (type != VMOVModImm)

4627

return SDValue();

4628

// Any 1-byte value is OK. Op=0, Cmode=1110.

4629

assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4629, __PRETTY_FUNCTION__));

4630

OpCmode = 0xe;

4631

Imm = SplatBits;

4632

VT = is128Bits ? MVT::v16i8 : MVT::v8i8;

4633

break;

4634

4635

case 16:

4636

// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.

4637

VT = is128Bits ? MVT::v8i16 : MVT::v4i16;

4638

if ((SplatBits & ~0xff) == 0) {

4639

// Value = 0x00nn: Op=x, Cmode=100x.

4640

OpCmode = 0x8;

4641

Imm = SplatBits;

4642

break;

4643

}

4644

if ((SplatBits & ~0xff00) == 0) {

4645

// Value = 0xnn00: Op=x, Cmode=101x.

4646

OpCmode = 0xa;

4647

Imm = SplatBits >> 8;

4648

break;

4649

}

4650

return SDValue();

4651

4652

case 32:

4653

// NEON's 32-bit VMOV supports splat values where:

4654

// * only one byte is nonzero, or

4655

// * the least significant byte is 0xff and the second byte is nonzero, or

4656

// * the least significant 2 bytes are 0xff and the third is nonzero.

4657

VT = is128Bits ? MVT::v4i32 : MVT::v2i32;

4658

if ((SplatBits & ~0xff) == 0) {

4659

// Value = 0x000000nn: Op=x, Cmode=000x.

4660

OpCmode = 0;

4661

Imm = SplatBits;

4662

break;

4663

}

4664

if ((SplatBits & ~0xff00) == 0) {

4665

// Value = 0x0000nn00: Op=x, Cmode=001x.

4666

OpCmode = 0x2;

4667

Imm = SplatBits >> 8;

4668

break;

4669

}

4670

if ((SplatBits & ~0xff0000) == 0) {

4671

// Value = 0x00nn0000: Op=x, Cmode=010x.

4672

OpCmode = 0x4;

4673

Imm = SplatBits >> 16;

4674

break;

4675

}

4676

if ((SplatBits & ~0xff000000) == 0) {

4677

// Value = 0xnn000000: Op=x, Cmode=011x.

4678

OpCmode = 0x6;

4679

Imm = SplatBits >> 24;

4680

break;

4681

}

4682

4683

// cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC

4684

if (type == OtherModImm) return SDValue();

4685

4686

if ((SplatBits & ~0xffff) == 0 &&

4687

((SplatBits | SplatUndef) & 0xff) == 0xff) {

4688

// Value = 0x0000nnff: Op=x, Cmode=1100.

4689

OpCmode = 0xc;

4690

Imm = SplatBits >> 8;

4691

break;

4692

}

4693

4694

if ((SplatBits & ~0xffffff) == 0 &&

4695

((SplatBits | SplatUndef) & 0xffff) == 0xffff) {

4696

// Value = 0x00nnffff: Op=x, Cmode=1101.

4697

OpCmode = 0xd;

4698

Imm = SplatBits >> 16;

4699

break;

4700

}

4701

4702

// Note: there are a few 32-bit splat values (specifically: 00ffff00,

4703

// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not

4704

// VMOV.I32. A (very) minor optimization would be to replicate the value

4705

// and fall through here to test for a valid 64-bit splat. But, then the

4706

// caller would also need to check and handle the change in size.

4707

return SDValue();

4708

4709

case 64: {

4710

if (type != VMOVModImm)

4711

return SDValue();

4712

// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.

4713

uint64_t BitMask = 0xff;

4714

uint64_t Val = 0;

4715

unsigned ImmMask = 1;

4716

Imm = 0;

4717

for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {

4718

if (((SplatBits | SplatUndef) & BitMask) == BitMask) {

4719

Val |= BitMask;

4720

Imm |= ImmMask;

4721

} else if ((SplatBits & BitMask) != 0) {

4722

return SDValue();

4723

}

4724

BitMask <<= 8;

4725

ImmMask <<= 1;

4726

}

4727

4728

if (DAG.getTargetLoweringInfo().isBigEndian())

4729

// swap higher and lower 32 bit word

4730

Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);

4731

4732

// Op=1, Cmode=1110.

4733

OpCmode = 0x1e;

4734

VT = is128Bits ? MVT::v2i64 : MVT::v1i64;

4735

break;

4736

}

4737

4738

default:

4739

llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4739);

4740

}

4741

4742

unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);

4743

return DAG.getTargetConstant(EncodedVal, MVT::i32);

4744

}

4745

4746

SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,

4747

const ARMSubtarget *ST) const {

4748

if (!ST->hasVFP3())

4749

return SDValue();

4750

4751

bool IsDouble = Op.getValueType() == MVT::f64;

4752

ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);

4753

4754

// Use the default (constant pool) lowering for double constants when we have

4755

// an SP-only FPU

4756

if (IsDouble && Subtarget->isFPOnlySP())

4757

return SDValue();

4758

4759

// Try splatting with a VMOV.f32...

4760

APFloat FPVal = CFP->getValueAPF();

4761

int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);

4762

4763

if (ImmVal != -1) {

4764

if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {

4765

// We have code in place to select a valid ConstantFP already, no need to

4766

// do any mangling.

4767

return Op;

4768

}

4769

4770

// It's a float and we are trying to use NEON operations where

4771

// possible. Lower it to a splat followed by an extract.

4772

SDLoc DL(Op);

4773

SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);

4774

SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,

4775

NewVal);

4776

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,

4777

DAG.getConstant(0, MVT::i32));

4778

}

4779

4780

// The rest of our options are NEON only, make sure that's allowed before

4781

// proceeding..

4782

if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))

4783

return SDValue();

4784

4785

EVT VMovVT;

4786

uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();

4787

4788

// It wouldn't really be worth bothering for doubles except for one very

4789

// important value, which does happen to match: 0.0. So make sure we don't do

4790

// anything stupid.

4791

if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))

4792

return SDValue();

4793

4794

// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).

4795

SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,

4796

false, VMOVModImm);

4797

if (NewVal != SDValue()) {

4798

SDLoc DL(Op);

4799

SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,

4800

NewVal);

4801

if (IsDouble)

4802

return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);

4803

4804

// It's a float: cast and extract a vector element.

4805

SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,

4806

VecConstant);

4807

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,

4808

DAG.getConstant(0, MVT::i32));

4809

}

4810

4811

// Finally, try a VMVN.i32

4812

NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,

4813

false, VMVNModImm);

4814

if (NewVal != SDValue()) {

4815

SDLoc DL(Op);

4816

SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);

4817

4818

if (IsDouble)

4819

return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);

4820

4821

// It's a float: cast and extract a vector element.

4822

SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,

4823

VecConstant);

4824

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,

4825

DAG.getConstant(0, MVT::i32));

4826

}

4827

4828

return SDValue();

4829

}

4830

4831

// check if an VEXT instruction can handle the shuffle mask when the

4832

// vector sources of the shuffle are the same.

4833

static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {

4834

unsigned NumElts = VT.getVectorNumElements();

4835

4836

// Assume that the first shuffle index is not UNDEF. Fail if it is.

4837

if (M[0] < 0)

4838

return false;

4839

4840

Imm = M[0];

4841

4842

// If this is a VEXT shuffle, the immediate value is the index of the first

4843

// element. The other shuffle indices must be the successive elements after

4844

// the first one.

4845

unsigned ExpectedElt = Imm;

4846

for (unsigned i = 1; i < NumElts; ++i) {

4847

// Increment the expected index. If it wraps around, just follow it

4848

// back to index zero and keep going.

4849

++ExpectedElt;

4850

if (ExpectedElt == NumElts)

4851

ExpectedElt = 0;

4852

4853

if (M[i] < 0) continue; // ignore UNDEF indices

4854

if (ExpectedElt != static_cast<unsigned>(M[i]))

4855

return false;

4856

}

4857

4858

return true;

4859

}

4860

4861

4862

static bool isVEXTMask(ArrayRef<int> M, EVT VT,

4863

bool &ReverseVEXT, unsigned &Imm) {

4864

unsigned NumElts = VT.getVectorNumElements();

4865

ReverseVEXT = false;

4866

4867

// Assume that the first shuffle index is not UNDEF. Fail if it is.

4868

if (M[0] < 0)

4869

return false;

4870

4871

Imm = M[0];

4872

4873

// If this is a VEXT shuffle, the immediate value is the index of the first

4874

// element. The other shuffle indices must be the successive elements after

4875

// the first one.

4876

unsigned ExpectedElt = Imm;

4877

for (unsigned i = 1; i < NumElts; ++i) {

4878

// Increment the expected index. If it wraps around, it may still be

4879

// a VEXT but the source vectors must be swapped.

4880

ExpectedElt += 1;

4881

if (ExpectedElt == NumElts * 2) {

4882

ExpectedElt = 0;

4883

ReverseVEXT = true;

4884

}

4885

4886

if (M[i] < 0) continue; // ignore UNDEF indices

4887

if (ExpectedElt != static_cast<unsigned>(M[i]))

4888

return false;

4889

}

4890

4891

// Adjust the index value if the source operands will be swapped.

4892

if (ReverseVEXT)

4893

Imm -= NumElts;

4894

4895

return true;

4896

}

4897

4898

/// isVREVMask - Check if a vector shuffle corresponds to a VREV

4899

/// instruction with the specified blocksize. (The order of the elements

4900

/// within each block of the vector is reversed.)

4901

static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {

4902

assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4903, __PRETTY_FUNCTION__))

4903

"Only possible block sizes for VREV are: 16, 32, 64")(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 4903, __PRETTY_FUNCTION__));

4904

4905

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

4906

if (EltSz == 64)

4907

return false;

4908

4909

unsigned NumElts = VT.getVectorNumElements();

4910

unsigned BlockElts = M[0] + 1;

4911

// If the first shuffle index is UNDEF, be optimistic.

4912

if (M[0] < 0)

4913

BlockElts = BlockSize / EltSz;

4914

4915

if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)

4916

return false;

4917

4918

for (unsigned i = 0; i < NumElts; ++i) {

4919

if (M[i] < 0) continue; // ignore UNDEF indices

4920

if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))

4921

return false;

4922

}

4923

4924

return true;

4925

}

4926

4927

static bool isVTBLMask(ArrayRef<int> M, EVT VT) {

4928

// We can handle <8 x i8> vector shuffles. If the index in the mask is out of

4929

// range, then 0 is placed into the resulting vector. So pretty much any mask

4930

// of 8 elements can work here.

4931

return VT == MVT::v8i8 && M.size() == 8;

4932

}

4933

4934

static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

4935

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

4936

if (EltSz == 64)

4937

return false;

4938

4939

unsigned NumElts = VT.getVectorNumElements();

4940

WhichResult = (M[0] == 0 ? 0 : 1);

4941

for (unsigned i = 0; i < NumElts; i += 2) {

4942

if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||

4943

(M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))

4944

return false;

4945

}

4946

return true;

4947

}

4948

4949

/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of

4950

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

4951

/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.

4952

static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

4953

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

4954

if (EltSz == 64)

4955

return false;

4956

4957

unsigned NumElts = VT.getVectorNumElements();

4958

WhichResult = (M[0] == 0 ? 0 : 1);

4959

for (unsigned i = 0; i < NumElts; i += 2) {

4960

if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||

4961

(M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))

4962

return false;

4963

}

4964

return true;

4965

}

4966

4967

static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

4968

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

4969

if (EltSz == 64)

4970

return false;

4971

4972

unsigned NumElts = VT.getVectorNumElements();

4973

WhichResult = (M[0] == 0 ? 0 : 1);

4974

for (unsigned i = 0; i != NumElts; ++i) {

4975

if (M[i] < 0) continue; // ignore UNDEF indices

4976

if ((unsigned) M[i] != 2 * i + WhichResult)

4977

return false;

4978

}

4979

4980

// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

4981

if (VT.is64BitVector() && EltSz == 32)

4982

return false;

4983

4984

return true;

4985

}

4986

4987

/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of

4988

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

4989

/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,

4990

static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

4991

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

4992

if (EltSz == 64)

4993

return false;

4994

4995

unsigned Half = VT.getVectorNumElements() / 2;

4996

WhichResult = (M[0] == 0 ? 0 : 1);

4997

for (unsigned j = 0; j != 2; ++j) {

4998

unsigned Idx = WhichResult;

4999

for (unsigned i = 0; i != Half; ++i) {

5000

int MIdx = M[i + j * Half];

5001

if (MIdx >= 0 && (unsigned) MIdx != Idx)

5002

return false;

5003

Idx += 2;

5004

}

5005

}

5006

5007

// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

5008

if (VT.is64BitVector() && EltSz == 32)

5009

return false;

5010

5011

return true;

5012

}

5013

5014

static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

5015

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

5016

if (EltSz == 64)

5017

return false;

5018

5019

unsigned NumElts = VT.getVectorNumElements();

5020

WhichResult = (M[0] == 0 ? 0 : 1);

5021

unsigned Idx = WhichResult * NumElts / 2;

5022

for (unsigned i = 0; i != NumElts; i += 2) {

5023

if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||

5024

(M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))

5025

return false;

5026

Idx += 1;

5027

}

5028

5029

// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

5030

if (VT.is64BitVector() && EltSz == 32)

5031

return false;

5032

5033

return true;

5034

}

5035

5036

/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of

5037

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

5038

/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.

5039

static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

5040

unsigned EltSz = VT.getVectorElementType().getSizeInBits();

5041

if (EltSz == 64)

5042

return false;

5043

5044

unsigned NumElts = VT.getVectorNumElements();

5045

WhichResult = (M[0] == 0 ? 0 : 1);

5046

unsigned Idx = WhichResult * NumElts / 2;

5047

for (unsigned i = 0; i != NumElts; i += 2) {

5048

if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||

5049

(M[i+1] >= 0 && (unsigned) M[i+1] != Idx))

5050

return false;

5051

Idx += 1;

5052

}

5053

5054

// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

5055

if (VT.is64BitVector() && EltSz == 32)

5056

return false;

5057

5058

return true;

5059

}

5060

5061

/// \return true if this is a reverse operation on an vector.

5062

static bool isReverseMask(ArrayRef<int> M, EVT VT) {

5063

unsigned NumElts = VT.getVectorNumElements();

5064

// Make sure the mask has the right size.

5065

if (NumElts != M.size())

5066

return false;

5067

5068

// Look for <15, ..., 3, -1, 1, 0>.

5069

for (unsigned i = 0; i != NumElts; ++i)

5070

if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))

5071

return false;

5072

5073

return true;

5074

}

5075

5076

// If N is an integer constant that can be moved into a register in one

5077

// instruction, return an SDValue of such a constant (will become a MOV

5078

// instruction). Otherwise return null.

5079

static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,

5080

const ARMSubtarget *ST, SDLoc dl) {

5081

uint64_t Val;

5082

if (!isa<ConstantSDNode>(N))

5083

return SDValue();

5084

Val = cast<ConstantSDNode>(N)->getZExtValue();

5085

5086

if (ST->isThumb1Only()) {

5087

if (Val <= 255 || ~Val <= 255)

5088

return DAG.getConstant(Val, MVT::i32);

5089

} else {

5090

if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)

5091

return DAG.getConstant(Val, MVT::i32);

5092

}

5093

return SDValue();

5094

}

5095

5096

// If this is a case we can't handle, return null and let the default

5097

// expansion code take care of it.

5098

SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

5099

const ARMSubtarget *ST) const {

5100

BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());

5101

SDLoc dl(Op);

5102

EVT VT = Op.getValueType();

5103

5104

APInt SplatBits, SplatUndef;

5105

unsigned SplatBitSize;

5106

bool HasAnyUndefs;

5107

if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

5108

if (SplatBitSize <= 64) {

5109

// Check if an immediate VMOV works.

5110

EVT VmovVT;

5111

SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),

5112

SplatUndef.getZExtValue(), SplatBitSize,

5113

DAG, VmovVT, VT.is128BitVector(),

5114

VMOVModImm);

5115

if (Val.getNode()) {

5116

SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);

5117

return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);

5118

}

5119

5120

// Try an immediate VMVN.

5121

uint64_t NegatedImm = (~SplatBits).getZExtValue();

5122

Val = isNEONModifiedImm(NegatedImm,

5123

SplatUndef.getZExtValue(), SplatBitSize,

5124

DAG, VmovVT, VT.is128BitVector(),

5125

VMVNModImm);

5126

if (Val.getNode()) {

5127

SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);

5128

return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);

5129

}

5130

5131

// Use vmov.f32 to materialize other v2f32 and v4f32 splats.

5132

if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {

5133

int ImmVal = ARM_AM::getFP32Imm(SplatBits);

5134

if (ImmVal != -1) {

5135

SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);

5136

return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);

5137

}

5138

}

5139

}

5140

}

5141

5142

// Scan through the operands to see if only one value is used.

5143

5144

// As an optimisation, even if more than one value is used it may be more

5145

// profitable to splat with one value then change some lanes.

5146

5147

// Heuristically we decide to do this if the vector has a "dominant" value,

5148

// defined as splatted to more than half of the lanes.

5149

unsigned NumElts = VT.getVectorNumElements();

5150

bool isOnlyLowElement = true;

5151

bool usesOnlyOneValue = true;

5152

bool hasDominantValue = false;

5153

bool isConstant = true;

5154

5155

// Map of the number of times a particular SDValue appears in the

5156

// element list.

5157

DenseMap<SDValue, unsigned> ValueCounts;

5158

SDValue Value;

5159

for (unsigned i = 0; i < NumElts; ++i) {

5160

SDValue V = Op.getOperand(i);

5161

if (V.getOpcode() == ISD::UNDEF)

5162

continue;

5163

if (i > 0)

5164

isOnlyLowElement = false;

5165

if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))

5166

isConstant = false;

5167

5168

ValueCounts.insert(std::make_pair(V, 0));

5169

unsigned &Count = ValueCounts[V];

5170

5171

// Is this value dominant? (takes up more than half of the lanes)

5172

if (++Count > (NumElts / 2)) {

5173

hasDominantValue = true;

5174

Value = V;

5175

}

5176

}

5177

if (ValueCounts.size() != 1)

5178

usesOnlyOneValue = false;

5179

if (!Value.getNode() && ValueCounts.size() > 0)

5180

Value = ValueCounts.begin()->first;

5181

5182

if (ValueCounts.size() == 0)

5183

return DAG.getUNDEF(VT);

5184

5185

// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.

5186

// Keep going if we are hitting this case.

5187

if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))

5188

return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);

5189

5190

unsigned EltSize = VT.getVectorElementType().getSizeInBits();

5191

5192

// Use VDUP for non-constant splats. For f32 constant splats, reduce to

5193

// i32 and try again.

5194

if (hasDominantValue && EltSize <= 32) {

5195

if (!isConstant) {

5196

SDValue N;

5197

5198

// If we are VDUPing a value that comes directly from a vector, that will

5199

// cause an unnecessary move to and from a GPR, where instead we could

5200

// just use VDUPLANE. We can only do this if the lane being extracted

5201

// is at a constant index, as the VDUP from lane instructions only have

5202

// constant-index forms.

5203

if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

5204

isa<ConstantSDNode>(Value->getOperand(1))) {

5205

// We need to create a new undef vector to use for the VDUPLANE if the

5206

// size of the vector from which we get the value is different than the

5207

// size of the vector that we need to create. We will insert the element

5208

// such that the register coalescer will remove unnecessary copies.

5209

if (VT != Value->getOperand(0).getValueType()) {

5210

ConstantSDNode *constIndex;

5211

constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));

5212

assert(constIndex && "The index is not a constant!")((constIndex && "The index is not a constant!") ? static_cast
<void> (0) : __assert_fail ("constIndex && \"The index is not a constant!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5212, __PRETTY_FUNCTION__));

5213

unsigned index = constIndex->getAPIntValue().getLimitedValue() %

5214

VT.getVectorNumElements();

5215

N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,

5216

DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),

5217

Value, DAG.getConstant(index, MVT::i32)),

5218

DAG.getConstant(index, MVT::i32));

5219

} else

5220

N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,

5221

Value->getOperand(0), Value->getOperand(1));

5222

} else

5223

N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);

5224

5225

if (!usesOnlyOneValue) {

5226

// The dominant value was splatted as 'N', but we now have to insert

5227

// all differing elements.

5228

for (unsigned I = 0; I < NumElts; ++I) {

5229

if (Op.getOperand(I) == Value)

5230

continue;

5231

SmallVector<SDValue, 3> Ops;

5232

Ops.push_back(N);

5233

Ops.push_back(Op.getOperand(I));

5234

Ops.push_back(DAG.getConstant(I, MVT::i32));

5235

N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);

5236

}

5237

}

5238

return N;

5239

}

5240

if (VT.getVectorElementType().isFloatingPoint()) {

5241

SmallVector<SDValue, 8> Ops;

5242

for (unsigned i = 0; i < NumElts; ++i)

5243

Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,

5244

Op.getOperand(i)));

5245

EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);

5246

SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);

5247

Val = LowerBUILD_VECTOR(Val, DAG, ST);

5248

if (Val.getNode())

5249

return DAG.getNode(ISD::BITCAST, dl, VT, Val);

5250

}

5251

if (usesOnlyOneValue) {

5252

SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);

5253

if (isConstant && Val.getNode())

5254

return DAG.getNode(ARMISD::VDUP, dl, VT, Val);

5255

}

5256

}

5257

5258

// If all elements are constants and the case above didn't get hit, fall back

5259

// to the default expansion, which will generate a load from the constant

5260

// pool.

5261

if (isConstant)

5262

return SDValue();

5263

5264

// Empirical tests suggest this is rarely worth it for vectors of length <= 2.

5265

if (NumElts >= 4) {

5266

SDValue shuffle = ReconstructShuffle(Op, DAG);

5267

if (shuffle != SDValue())

5268

return shuffle;

5269

}

5270

5271

// Vectors with 32- or 64-bit elements can be built by directly assigning

5272

// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands

5273

// will be legalized.

5274

if (EltSize >= 32) {

5275

// Do the expansion with floating-point types, since that is what the VFP

5276

// registers are defined to use, and since i64 is not legal.

5277

EVT EltVT = EVT::getFloatingPointVT(EltSize);

5278

EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);

5279

SmallVector<SDValue, 8> Ops;

5280

for (unsigned i = 0; i < NumElts; ++i)

5281

Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));

5282

SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);

5283

return DAG.getNode(ISD::BITCAST, dl, VT, Val);

5284

}

5285

5286

// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we

5287

// know the default expansion would otherwise fall back on something even

5288

// worse. For a vector with one or two non-undef values, that's

5289

// scalar_to_vector for the elements followed by a shuffle (provided the

5290

// shuffle is valid for the target) and materialization element by element

5291

// on the stack followed by a load for everything else.

5292

if (!isConstant && !usesOnlyOneValue) {

5293

SDValue Vec = DAG.getUNDEF(VT);

5294

for (unsigned i = 0 ; i < NumElts; ++i) {

5295

SDValue V = Op.getOperand(i);

5296

if (V.getOpcode() == ISD::UNDEF)

5297

continue;

5298

SDValue LaneIdx = DAG.getConstant(i, MVT::i32);

5299

Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);

5300

}

5301

return Vec;

5302

}

5303

5304

return SDValue();

5305

}

5306

5307

// Gather data to see if the operation can be modelled as a

5308

// shuffle in combination with VEXTs.

5309

SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,

5310

SelectionDAG &DAG) const {

5311

SDLoc dl(Op);

5312

EVT VT = Op.getValueType();

5313

unsigned NumElts = VT.getVectorNumElements();

5314

5315

SmallVector<SDValue, 2> SourceVecs;

5316

SmallVector<unsigned, 2> MinElts;

5317

SmallVector<unsigned, 2> MaxElts;

5318

5319

for (unsigned i = 0; i < NumElts; ++i) {

5320

SDValue V = Op.getOperand(i);

5321

if (V.getOpcode() == ISD::UNDEF)

5322

continue;

5323

else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {

5324

// A shuffle can only come from building a vector from various

5325

// elements of other vectors.

5326

return SDValue();

5327

} else if (V.getOperand(0).getValueType().getVectorElementType() !=

5328

VT.getVectorElementType()) {

5329

// This code doesn't know how to handle shuffles where the vector

5330

// element types do not match (this happens because type legalization

5331

// promotes the return type of EXTRACT_VECTOR_ELT).

5332

// FIXME: It might be appropriate to extend this code to handle

5333

// mismatched types.

5334

return SDValue();

5335

}

5336

5337

// Record this extraction against the appropriate vector if possible...

5338

SDValue SourceVec = V.getOperand(0);

5339

// If the element number isn't a constant, we can't effectively

5340

// analyze what's going on.

5341

if (!isa<ConstantSDNode>(V.getOperand(1)))

5342

return SDValue();

5343

unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();

5344

bool FoundSource = false;

5345

for (unsigned j = 0; j < SourceVecs.size(); ++j) {

5346

if (SourceVecs[j] == SourceVec) {

5347

if (MinElts[j] > EltNo)

5348

MinElts[j] = EltNo;

5349

if (MaxElts[j] < EltNo)

5350

MaxElts[j] = EltNo;

5351

FoundSource = true;

5352

break;

5353

}

5354

}

5355

5356

// Or record a new source if not...

5357

if (!FoundSource) {

5358

SourceVecs.push_back(SourceVec);

5359

MinElts.push_back(EltNo);

5360

MaxElts.push_back(EltNo);

5361

}

5362

}

5363

5364

// Currently only do something sane when at most two source vectors

5365

// involved.

5366

if (SourceVecs.size() > 2)

5367

return SDValue();

5368

5369

SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };

5370

int VEXTOffsets[2] = {0, 0};

5371

5372

// This loop extracts the usage patterns of the source vectors

5373

// and prepares appropriate SDValues for a shuffle if possible.

5374

for (unsigned i = 0; i < SourceVecs.size(); ++i) {

5375

if (SourceVecs[i].getValueType() == VT) {

5376

// No VEXT necessary

5377

ShuffleSrcs[i] = SourceVecs[i];

5378

VEXTOffsets[i] = 0;

5379

continue;

5380

} else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {

5381

// It probably isn't worth padding out a smaller vector just to

5382

// break it down again in a shuffle.

5383

return SDValue();

5384

}

5385

5386

// Since only 64-bit and 128-bit vectors are legal on ARM and

5387

// we've eliminated the other cases...

5388

assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&((SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts
&& "unexpected vector sizes in ReconstructShuffle") ?
static_cast<void> (0) : __assert_fail ("SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && \"unexpected vector sizes in ReconstructShuffle\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5389, __PRETTY_FUNCTION__))

5389

"unexpected vector sizes in ReconstructShuffle")((SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts
&& "unexpected vector sizes in ReconstructShuffle") ?
static_cast<void> (0) : __assert_fail ("SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && \"unexpected vector sizes in ReconstructShuffle\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5389, __PRETTY_FUNCTION__));

5390

5391

if (MaxElts[i] - MinElts[i] >= NumElts) {

5392

// Span too large for a VEXT to cope

5393

return SDValue();

5394

}

5395

5396

if (MinElts[i] >= NumElts) {

5397

// The extraction can just take the second half

5398

VEXTOffsets[i] = NumElts;

5399

ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,

5400

SourceVecs[i],

5401

DAG.getIntPtrConstant(NumElts));

5402

} else if (MaxElts[i] < NumElts) {

5403

// The extraction can just take the first half

5404

VEXTOffsets[i] = 0;

5405

ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,

5406

SourceVecs[i],

5407

DAG.getIntPtrConstant(0));

5408

} else {

5409

// An actual VEXT is needed

5410

VEXTOffsets[i] = MinElts[i];

5411

SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,

5412

SourceVecs[i],

5413

DAG.getIntPtrConstant(0));

5414

SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,

5415

SourceVecs[i],

5416

DAG.getIntPtrConstant(NumElts));

5417

ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,

5418

DAG.getConstant(VEXTOffsets[i], MVT::i32));

5419

}

5420

}

5421

5422

SmallVector<int, 8> Mask;

5423

5424

for (unsigned i = 0; i < NumElts; ++i) {

5425

SDValue Entry = Op.getOperand(i);

5426

if (Entry.getOpcode() == ISD::UNDEF) {

5427

Mask.push_back(-1);

5428

continue;

5429

}

5430

5431

SDValue ExtractVec = Entry.getOperand(0);

5432

int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)

5433

.getOperand(1))->getSExtValue();

5434

if (ExtractVec == SourceVecs[0]) {

5435

Mask.push_back(ExtractElt - VEXTOffsets[0]);

5436

} else {

5437

Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);

5438

}

5439

}

5440

5441

// Final check before we try to produce nonsense...

5442

if (isShuffleMaskLegal(Mask, VT))

5443

return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],

5444

&Mask[0]);

5445

5446

return SDValue();

5447

}

5448

5449

/// isShuffleMaskLegal - Targets can use this to indicate that they only

5450

/// support *some* VECTOR_SHUFFLE operations, those with specific masks.

5451

/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values

5452

/// are assumed to be legal.

5453

bool

5454

ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,

5455

EVT VT) const {

5456

if (VT.getVectorNumElements() == 4 &&

5457

(VT.is128BitVector() || VT.is64BitVector())) {

5458

unsigned PFIndexes[4];

5459

for (unsigned i = 0; i != 4; ++i) {

5460

if (M[i] < 0)

5461

PFIndexes[i] = 8;

5462

else

5463

PFIndexes[i] = M[i];

5464

}

5465

5466

// Compute the index in the perfect shuffle table.

5467

unsigned PFTableIndex =

5468

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

5469

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

5470

unsigned Cost = (PFEntry >> 30);

5471

5472

if (Cost <= 4)

5473

return true;

5474

}

5475

5476

bool ReverseVEXT;

5477

unsigned Imm, WhichResult;

5478

5479

unsigned EltSize = VT.getVectorElementType().getSizeInBits();

5480

return (EltSize >= 32 ||

5481

ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||

5482

isVREVMask(M, VT, 64) ||

5483

isVREVMask(M, VT, 32) ||

5484

isVREVMask(M, VT, 16) ||

5485

isVEXTMask(M, VT, ReverseVEXT, Imm) ||

5486

isVTBLMask(M, VT) ||

5487

isVTRNMask(M, VT, WhichResult) ||

5488

isVUZPMask(M, VT, WhichResult) ||

5489

isVZIPMask(M, VT, WhichResult) ||

5490

isVTRN_v_undef_Mask(M, VT, WhichResult) ||

5491

isVUZP_v_undef_Mask(M, VT, WhichResult) ||

5492

isVZIP_v_undef_Mask(M, VT, WhichResult) ||

5493

((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));

5494

}

5495

5496

/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

5497

/// the specified operations to build the shuffle.

5498

static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,

5499

SDValue RHS, SelectionDAG &DAG,

5500

SDLoc dl) {

5501

unsigned OpNum = (PFEntry >> 26) & 0x0F;

5502

unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

5503

unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

5504

5505

enum {

5506

OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

5507

OP_VREV,

5508

OP_VDUP0,

5509

OP_VDUP1,

5510

OP_VDUP2,

5511

OP_VDUP3,

5512

OP_VEXT1,

5513

OP_VEXT2,

5514

OP_VEXT3,

5515

OP_VUZPL, // VUZP, left result

5516

OP_VUZPR, // VUZP, right result

5517

OP_VZIPL, // VZIP, left result

5518

OP_VZIPR, // VZIP, right result

5519

OP_VTRNL, // VTRN, left result

5520

OP_VTRNR // VTRN, right result

5521

};

5522

5523

if (OpNum == OP_COPY) {

5524

if (LHSID == (1*9+2)*9+3) return LHS;

5525

assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5525, __PRETTY_FUNCTION__));

5526

return RHS;

5527

}

5528

5529

SDValue OpLHS, OpRHS;

5530

OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);

5531

OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);

5532

EVT VT = OpLHS.getValueType();

5533

5534

switch (OpNum) {

5535

default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5535);

5536

case OP_VREV:

5537

// VREV divides the vector in half and swaps within the half.

5538

if (VT.getVectorElementType() == MVT::i32 ||

5539

VT.getVectorElementType() == MVT::f32)

5540

return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);

5541

// vrev <4 x i16> -> VREV32

5542

if (VT.getVectorElementType() == MVT::i16)

5543

return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);

5544

// vrev <4 x i8> -> VREV16

5545

assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5545, __PRETTY_FUNCTION__));

5546

return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);

5547

case OP_VDUP0:

5548

case OP_VDUP1:

5549

case OP_VDUP2:

5550

case OP_VDUP3:

5551

return DAG.getNode(ARMISD::VDUPLANE, dl, VT,

5552

OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));

5553

case OP_VEXT1:

5554

case OP_VEXT2:

5555

case OP_VEXT3:

5556

return DAG.getNode(ARMISD::VEXT, dl, VT,

5557

OpLHS, OpRHS,

5558

DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));

5559

case OP_VUZPL:

5560

case OP_VUZPR:

5561

return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),

5562

OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);

5563

case OP_VZIPL:

5564

case OP_VZIPR:

5565

return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),

5566

OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);

5567

case OP_VTRNL:

5568

case OP_VTRNR:

5569

return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),

5570

OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);

5571

}

5572

}

5573

5574

static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,

5575

ArrayRef<int> ShuffleMask,

5576

SelectionDAG &DAG) {

5577

// Check to see if we can use the VTBL instruction.

5578

SDValue V1 = Op.getOperand(0);

5579

SDValue V2 = Op.getOperand(1);

5580

SDLoc DL(Op);

5581

5582

SmallVector<SDValue, 8> VTBLMask;

5583

for (ArrayRef<int>::iterator

5584

I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)

5585

VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));

5586

5587

if (V2.getNode()->getOpcode() == ISD::UNDEF)

5588

return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,

5589

DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));

5590

5591

return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,

5592

DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));

5593

}

5594

5595

static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,

5596

SelectionDAG &DAG) {

5597

SDLoc DL(Op);

5598

SDValue OpLHS = Op.getOperand(0);

5599

EVT VT = OpLHS.getValueType();

5600

5601

assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5602, __PRETTY_FUNCTION__))

5602

"Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5602, __PRETTY_FUNCTION__));

5603

OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);

5604

// For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,

5605

// extract the first 8 bytes into the top double word and the last 8 bytes

5606

// into the bottom double word. The v8i16 case is similar.

5607

unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;

5608

return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,

5609

DAG.getConstant(ExtractNum, MVT::i32));

5610

}

5611

5612

static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {

5613

SDValue V1 = Op.getOperand(0);

5614

SDValue V2 = Op.getOperand(1);

5615

SDLoc dl(Op);

5616

EVT VT = Op.getValueType();

5617

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

5618

5619

// Convert shuffles that are directly supported on NEON to target-specific

5620

// DAG nodes, instead of keeping them as shuffles and matching them again

5621

// during code selection. This is more efficient and avoids the possibility

5622

// of inconsistencies between legalization and selection.

5623

// FIXME: floating-point vectors should be canonicalized to integer vectors

5624

// of the same time so that they get CSEd properly.

5625

ArrayRef<int> ShuffleMask = SVN->getMask();

5626

5627

unsigned EltSize = VT.getVectorElementType().getSizeInBits();

5628

if (EltSize <= 32) {

5629

if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {

5630

int Lane = SVN->getSplatIndex();

5631

// If this is undef splat, generate it via "just" vdup, if possible.

5632

if (Lane == -1) Lane = 0;

5633

5634

// Test if V1 is a SCALAR_TO_VECTOR.

5635

if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {

5636

return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));

5637

}

5638

// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR

5639

// (and probably will turn into a SCALAR_TO_VECTOR once legalization

5640

// reaches it).

5641

if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&

5642

!isa<ConstantSDNode>(V1.getOperand(0))) {

5643

bool IsScalarToVector = true;

5644

for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)

5645

if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {

5646

IsScalarToVector = false;

5647

break;

5648

}

5649

if (IsScalarToVector)

5650

return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));

5651

}

5652

return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,

5653

DAG.getConstant(Lane, MVT::i32));

5654

}

5655

5656

bool ReverseVEXT;

5657

unsigned Imm;

5658

if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {

5659

if (ReverseVEXT)

5660

std::swap(V1, V2);

5661

return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,

5662

DAG.getConstant(Imm, MVT::i32));

5663

}

5664

5665

if (isVREVMask(ShuffleMask, VT, 64))

5666

return DAG.getNode(ARMISD::VREV64, dl, VT, V1);

5667

if (isVREVMask(ShuffleMask, VT, 32))

5668

return DAG.getNode(ARMISD::VREV32, dl, VT, V1);

5669

if (isVREVMask(ShuffleMask, VT, 16))

5670

return DAG.getNode(ARMISD::VREV16, dl, VT, V1);

5671

5672

if (V2->getOpcode() == ISD::UNDEF &&

5673

isSingletonVEXTMask(ShuffleMask, VT, Imm)) {

5674

return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,

5675

DAG.getConstant(Imm, MVT::i32));

5676

}

5677

5678

// Check for Neon shuffles that modify both input vectors in place.

5679

// If both results are used, i.e., if there are two shuffles with the same

5680

// source operands and with masks corresponding to both results of one of

5681

// these operations, DAG memoization will ensure that a single node is

5682

// used for both shuffles.

5683

unsigned WhichResult;

5684

if (isVTRNMask(ShuffleMask, VT, WhichResult))

5685

return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),

5686

V1, V2).getValue(WhichResult);

5687

if (isVUZPMask(ShuffleMask, VT, WhichResult))

5688

return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),

5689

V1, V2).getValue(WhichResult);

5690

if (isVZIPMask(ShuffleMask, VT, WhichResult))

5691

return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),

5692

V1, V2).getValue(WhichResult);

5693

5694

if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))

5695

return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),

5696

V1, V1).getValue(WhichResult);

5697

if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))

5698

return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),

5699

V1, V1).getValue(WhichResult);

5700

if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))

5701

return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),

5702

V1, V1).getValue(WhichResult);

5703

}

5704

5705

// If the shuffle is not directly supported and it has 4 elements, use

5706

// the PerfectShuffle-generated table to synthesize it from other shuffles.

5707

unsigned NumElts = VT.getVectorNumElements();

5708

if (NumElts == 4) {

5709

unsigned PFIndexes[4];

5710

for (unsigned i = 0; i != 4; ++i) {

5711

if (ShuffleMask[i] < 0)

5712

PFIndexes[i] = 8;

5713

else

5714

PFIndexes[i] = ShuffleMask[i];

5715

}

5716

5717

// Compute the index in the perfect shuffle table.

5718

unsigned PFTableIndex =

5719

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

5720

unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

5721

unsigned Cost = (PFEntry >> 30);

5722

5723

if (Cost <= 4)

5724

return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);

5725

}

5726

5727

// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.

5728

if (EltSize >= 32) {

5729

// Do the expansion with floating-point types, since that is what the VFP

5730

// registers are defined to use, and since i64 is not legal.

5731

EVT EltVT = EVT::getFloatingPointVT(EltSize);

5732

EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);

5733

V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);

5734

V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);

5735

SmallVector<SDValue, 8> Ops;

5736

for (unsigned i = 0; i < NumElts; ++i) {

5737

if (ShuffleMask[i] < 0)

5738

Ops.push_back(DAG.getUNDEF(EltVT));

5739

else

5740

Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,

5741

ShuffleMask[i] < (int)NumElts ? V1 : V2,

5742

DAG.getConstant(ShuffleMask[i] & (NumElts-1),

5743

MVT::i32)));

5744

}

5745

SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);

5746

return DAG.getNode(ISD::BITCAST, dl, VT, Val);

5747

}

5748

5749

if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))

5750

return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);

5751

5752

if (VT == MVT::v8i8) {

5753

SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);

5754

if (NewOp.getNode())

5755

return NewOp;

5756

}

5757

5758

return SDValue();

5759

}

5760

5761

static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {

5762

// INSERT_VECTOR_ELT is legal only for immediate indexes.

5763

SDValue Lane = Op.getOperand(2);

5764

if (!isa<ConstantSDNode>(Lane))

5765

return SDValue();

5766

5767

return Op;

5768

}

5769

5770

static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {

5771

// EXTRACT_VECTOR_ELT is legal only for immediate indexes.

5772

SDValue Lane = Op.getOperand(1);

5773

if (!isa<ConstantSDNode>(Lane))

5774

return SDValue();

5775

5776

SDValue Vec = Op.getOperand(0);

5777

if (Op.getValueType() == MVT::i32 &&

5778

Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {

5779

SDLoc dl(Op);

5780

return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);

5781

}

5782

5783

return Op;

5784

}

5785

5786

static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {

5787

// The only time a CONCAT_VECTORS operation can have legal types is when

5788

// two 64-bit vectors are concatenated to a 128-bit vector.

5789

assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5790, __PRETTY_FUNCTION__))

5790

"unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5790, __PRETTY_FUNCTION__));

5791

SDLoc dl(Op);

5792

SDValue Val = DAG.getUNDEF(MVT::v2f64);

5793

SDValue Op0 = Op.getOperand(0);

5794

SDValue Op1 = Op.getOperand(1);

5795

if (Op0.getOpcode() != ISD::UNDEF)

5796

Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,

5797

DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),

5798

DAG.getIntPtrConstant(0));

5799

if (Op1.getOpcode() != ISD::UNDEF)

5800

Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,

5801

DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),

5802

DAG.getIntPtrConstant(1));

5803

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);

5804

}

5805

5806

/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each

5807

/// element has been zero/sign-extended, depending on the isSigned parameter,

5808

/// from an integer type half its size.

5809

static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,

5810

bool isSigned) {

5811

// A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.

5812

EVT VT = N->getValueType(0);

5813

if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {

5814

SDNode *BVN = N->getOperand(0).getNode();

5815

if (BVN->getValueType(0) != MVT::v4i32 ||

5816

BVN->getOpcode() != ISD::BUILD_VECTOR)

5817

return false;

5818

unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;

5819

unsigned HiElt = 1 - LoElt;

5820

ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));

5821

ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));

5822

ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));

5823

ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));

5824

if (!Lo0 || !Hi0 || !Lo1 || !Hi1)

5825

return false;

5826

if (isSigned) {

5827

if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&

5828

Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)

5829

return true;

5830

} else {

5831

if (Hi0->isNullValue() && Hi1->isNullValue())

5832

return true;

5833

}

5834

return false;

5835

}

5836

5837

if (N->getOpcode() != ISD::BUILD_VECTOR)

5838

return false;

5839

5840

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

5841

SDNode *Elt = N->getOperand(i).getNode();

5842

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {

5843

unsigned EltSize = VT.getVectorElementType().getSizeInBits();

5844

unsigned HalfSize = EltSize / 2;

5845

if (isSigned) {

5846

if (!isIntN(HalfSize, C->getSExtValue()))

5847

return false;

5848

} else {

5849

if (!isUIntN(HalfSize, C->getZExtValue()))

5850

return false;

5851

}

5852

continue;

5853

}

5854

return false;

5855

}

5856

5857

return true;

5858

}

5859

5860

/// isSignExtended - Check if a node is a vector value that is sign-extended

5861

/// or a constant BUILD_VECTOR with sign-extended elements.

5862

static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {

5863

if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))

5864

return true;

5865

if (isExtendedBUILD_VECTOR(N, DAG, true))

5866

return true;

5867

return false;

5868

}

5869

5870

/// isZeroExtended - Check if a node is a vector value that is zero-extended

5871

/// or a constant BUILD_VECTOR with zero-extended elements.

5872

static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {

5873

if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))

5874

return true;

5875

if (isExtendedBUILD_VECTOR(N, DAG, false))

5876

return true;

5877

return false;

5878

}

5879

5880

static EVT getExtensionTo64Bits(const EVT &OrigVT) {

5881

if (OrigVT.getSizeInBits() >= 64)

5882

return OrigVT;

5883

5884

assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5884, __PRETTY_FUNCTION__));

5885

5886

MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;

5887

switch (OrigSimpleTy) {

5888

default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5888);

5889

case MVT::v2i8:

5890

case MVT::v2i16:

5891

return MVT::v2i32;

5892

case MVT::v4i8:

5893

return MVT::v4i16;

5894

}

5895

}

5896

5897

/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total

5898

/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.

5899

/// We insert the required extension here to get the vector to fill a D register.

5900

static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,

5901

const EVT &OrigTy,

5902

const EVT &ExtTy,

5903

unsigned ExtOpcode) {

5904

// The vector originally had a size of OrigTy. It was then extended to ExtTy.

5905

// We expect the ExtTy to be 128-bits total. If the OrigTy is less than

5906

// 64-bits we need to insert a new extension so that it will be 64-bits.

5907

assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5907, __PRETTY_FUNCTION__));

5908

if (OrigTy.getSizeInBits() >= 64)

5909

return N;

5910

5911

// Must extend size to at least 64 bits to be used as an operand for VMULL.

5912

EVT NewVT = getExtensionTo64Bits(OrigTy);

5913

5914

return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);

5915

}

5916

5917

/// SkipLoadExtensionForVMULL - return a load of the original vector size that

5918

/// does not do any sign/zero extension. If the original vector is less

5919

/// than 64 bits, an appropriate extension will be added after the load to

5920

/// reach a total size of 64 bits. We have to add the extension separately

5921

/// because ARM does not have a sign/zero extending load for vectors.

5922

static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {

5923

EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());

5924

5925

// The load already has the right type.

5926

if (ExtendedTy == LD->getMemoryVT())

5927

return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),

5928

LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),

5929

LD->isNonTemporal(), LD->isInvariant(),

5930

LD->getAlignment());

5931

5932

// We need to create a zextload/sextload. We cannot just create a load

5933

// followed by a zext/zext node because LowerMUL is also run during normal

5934

// operation legalization where we can't create illegal types.

5935

return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,

5936

LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),

5937

LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),

5938

LD->isNonTemporal(), LD->getAlignment());

5939

}

5940

5941

/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,

5942

/// extending load, or BUILD_VECTOR with extended elements, return the

5943

/// unextended value. The unextended vector should be 64 bits so that it can

5944

/// be used as an operand to a VMULL instruction. If the original vector size

5945

/// before extension is less than 64 bits we add a an extension to resize

5946

/// the vector to 64 bits.

5947

static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {

5948

if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)

5949

return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,

5950

N->getOperand(0)->getValueType(0),

5951

N->getValueType(0),

5952

N->getOpcode());

5953

5954

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))

5955

return SkipLoadExtensionForVMULL(LD, DAG);

5956

5957

// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will

5958

// have been legalized as a BITCAST from v4i32.

5959

if (N->getOpcode() == ISD::BITCAST) {

5960

SDNode *BVN = N->getOperand(0).getNode();

5961

assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5962, __PRETTY_FUNCTION__))

5962

BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5962, __PRETTY_FUNCTION__));

5963

unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;

5964

return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,

5965

BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));

5966

}

5967

// Construct a new BUILD_VECTOR with elements truncated to half the size.

5968

assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 5968, __PRETTY_FUNCTION__));

5969

EVT VT = N->getValueType(0);

5970

unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;

5971

unsigned NumElts = VT.getVectorNumElements();

5972

MVT TruncVT = MVT::getIntegerVT(EltSize);

5973

SmallVector<SDValue, 8> Ops;

5974

for (unsigned i = 0; i != NumElts; ++i) {

5975

ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));

5976

const APInt &CInt = C->getAPIntValue();

5977

// Element types smaller than 32 bits are not legal, so use i32 elements.

5978

// The values are implicitly truncated so sext vs. zext doesn't matter.

5979

Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));

5980

}

5981

return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),

5982

MVT::getVectorVT(TruncVT, NumElts), Ops);

5983

}

5984

5985

static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {

5986

unsigned Opcode = N->getOpcode();

5987

if (Opcode == ISD::ADD || Opcode == ISD::SUB) {

5988

SDNode *N0 = N->getOperand(0).getNode();

5989

SDNode *N1 = N->getOperand(1).getNode();

5990

return N0->hasOneUse() && N1->hasOneUse() &&

5991

isSignExtended(N0, DAG) && isSignExtended(N1, DAG);

5992

}

5993

return false;

5994

}

5995

5996

static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {

5997

unsigned Opcode = N->getOpcode();

5998

if (Opcode == ISD::ADD || Opcode == ISD::SUB) {

5999

SDNode *N0 = N->getOperand(0).getNode();

6000

SDNode *N1 = N->getOperand(1).getNode();

6001

return N0->hasOneUse() && N1->hasOneUse() &&

6002

isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);

6003

}

6004

return false;

6005

}

6006

6007

static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {

6008

// Multiplications are only custom-lowered for 128-bit vectors so that

6009

// VMULL can be detected. Otherwise v2i64 multiplications are not legal.

6010

EVT VT = Op.getValueType();

6011

assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6012, __PRETTY_FUNCTION__))

6012

"unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6012, __PRETTY_FUNCTION__));

6013

SDNode *N0 = Op.getOperand(0).getNode();

6014

SDNode *N1 = Op.getOperand(1).getNode();

6015

unsigned NewOpc = 0;

6016

bool isMLA = false;

6017

bool isN0SExt = isSignExtended(N0, DAG);

6018

bool isN1SExt = isSignExtended(N1, DAG);

6019

if (isN0SExt && isN1SExt)

6020

NewOpc = ARMISD::VMULLs;

6021

else {

6022

bool isN0ZExt = isZeroExtended(N0, DAG);

6023

bool isN1ZExt = isZeroExtended(N1, DAG);

6024

if (isN0ZExt && isN1ZExt)

6025

NewOpc = ARMISD::VMULLu;

6026

else if (isN1SExt || isN1ZExt) {

6027

// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these

6028

// into (s/zext A * s/zext C) + (s/zext B * s/zext C)

6029

if (isN1SExt && isAddSubSExt(N0, DAG)) {

6030

NewOpc = ARMISD::VMULLs;

6031

isMLA = true;

6032

} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {

6033

NewOpc = ARMISD::VMULLu;

6034

isMLA = true;

6035

} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {

6036

std::swap(N0, N1);

6037

NewOpc = ARMISD::VMULLu;

6038

isMLA = true;

6039

}

6040

}

6041

6042

if (!NewOpc) {

6043

if (VT == MVT::v2i64)

6044

// Fall through to expand this. It is not legal.

6045

return SDValue();

6046

else

6047

// Other vector multiplications are legal.

6048

return Op;

6049

}

6050

}

6051

6052

// Legalize to a VMULL instruction.

6053

SDLoc DL(Op);

6054

SDValue Op0;

6055

SDValue Op1 = SkipExtensionForVMULL(N1, DAG);

6056

if (!isMLA) {

6057

Op0 = SkipExtensionForVMULL(N0, DAG);

6058

assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6060, __PRETTY_FUNCTION__))

6059

Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6060, __PRETTY_FUNCTION__))

6060

"unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6060, __PRETTY_FUNCTION__));

6061

return DAG.getNode(NewOpc, DL, VT, Op0, Op1);

6062

}

6063

6064

// Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during

6065

// isel lowering to take advantage of no-stall back to back vmul + vmla.

6066

// vmull q0, d4, d6

6067

// vmlal q0, d5, d6

6068

// is faster than

6069

// vaddl q0, d4, d5

6070

// vmovl q1, d6

6071

// vmul q0, q0, q1

6072

SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);

6073

SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);

6074

EVT Op1VT = Op1.getValueType();

6075

return DAG.getNode(N0->getOpcode(), DL, VT,

6076

DAG.getNode(NewOpc, DL, VT,

6077

DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),

6078

DAG.getNode(NewOpc, DL, VT,

6079

DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));

6080

}

6081

6082

static SDValue

6083

LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {

6084

// Convert to float

6085

// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));

6086

// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));

6087

X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);

6088

Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);

6089

X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);

6090

Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);

6091

// Get reciprocal estimate.

6092

// float4 recip = vrecpeq_f32(yf);

6093

Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6094

DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);

6095

// Because char has a smaller range than uchar, we can actually get away

6096

// without any newton steps. This requires that we use a weird bias

6097

// of 0xb000, however (again, this has been exhaustively tested).

6098

// float4 result = as_float4(as_int4(xf*recip) + 0xb000);

6099

X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);

6100

X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);

6101

Y = DAG.getConstant(0xb000, MVT::i32);

6102

Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);

6103

X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);

6104

X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);

6105

// Convert back to short.

6106

X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);

6107

X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);

6108

return X;

6109

}

6110

6111

static SDValue

6112

LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {

6113

SDValue N2;

6114

// Convert to float.

6115

// float4 yf = vcvt_f32_s32(vmovl_s16(y));

6116

// float4 xf = vcvt_f32_s32(vmovl_s16(x));

6117

N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);

6118

N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);

6119

N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);

6120

N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);

6121

6122

// Use reciprocal estimate and one refinement step.

6123

// float4 recip = vrecpeq_f32(yf);

6124

// recip *= vrecpsq_f32(yf, recip);

6125

N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6126

DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);

6127

N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6128

DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),

6129

N1, N2);

6130

N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

6131

// Because short has a smaller range than ushort, we can actually get away

6132

// with only a single newton step. This requires that we use a weird bias

6133

// of 89, however (again, this has been exhaustively tested).

6134

// float4 result = as_float4(as_int4(xf*recip) + 0x89);

6135

N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);

6136

N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);

6137

N1 = DAG.getConstant(0x89, MVT::i32);

6138

N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);

6139

N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);

6140

N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);

6141

// Convert back to integer and return.

6142

// return vmovn_s32(vcvt_s32_f32(result));

6143

N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);

6144

N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);

6145

return N0;

6146

}

6147

6148

static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {

6149

EVT VT = Op.getValueType();

6150

assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6151, __PRETTY_FUNCTION__))

6151

"unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6151, __PRETTY_FUNCTION__));

6152

6153

SDLoc dl(Op);

6154

SDValue N0 = Op.getOperand(0);

6155

SDValue N1 = Op.getOperand(1);

6156

SDValue N2, N3;

6157

6158

if (VT == MVT::v8i8) {

6159

N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);

6160

N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);

6161

6162

N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

6163

DAG.getIntPtrConstant(4));

6164

N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

6165

DAG.getIntPtrConstant(4));

6166

N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

6167

DAG.getIntPtrConstant(0));

6168

N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

6169

DAG.getIntPtrConstant(0));

6170

6171

N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16

6172

N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16

6173

6174

N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);

6175

N0 = LowerCONCAT_VECTORS(N0, DAG);

6176

6177

N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);

6178

return N0;

6179

}

6180

return LowerSDIV_v4i16(N0, N1, dl, DAG);

6181

}

6182

6183

static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {

6184

EVT VT = Op.getValueType();

6185

assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6186, __PRETTY_FUNCTION__))

6186

"unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6186, __PRETTY_FUNCTION__));

6187

6188

SDLoc dl(Op);

6189

SDValue N0 = Op.getOperand(0);

6190

SDValue N1 = Op.getOperand(1);

6191

SDValue N2, N3;

6192

6193

if (VT == MVT::v8i8) {

6194

N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);

6195

N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);

6196

6197

N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

6198

DAG.getIntPtrConstant(4));

6199

N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

6200

DAG.getIntPtrConstant(4));

6201

N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

6202

DAG.getIntPtrConstant(0));

6203

N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

6204

DAG.getIntPtrConstant(0));

6205

6206

N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16

6207

N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16

6208

6209

N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);

6210

N0 = LowerCONCAT_VECTORS(N0, DAG);

6211

6212

N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,

6213

DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),

6214

N0);

6215

return N0;

6216

}

6217

6218

// v4i16 sdiv ... Convert to float.

6219

// float4 yf = vcvt_f32_s32(vmovl_u16(y));

6220

// float4 xf = vcvt_f32_s32(vmovl_u16(x));

6221

N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);

6222

N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);

6223

N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);

6224

SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);

6225

6226

// Use reciprocal estimate and two refinement steps.

6227

// float4 recip = vrecpeq_f32(yf);

6228

// recip *= vrecpsq_f32(yf, recip);

6229

// recip *= vrecpsq_f32(yf, recip);

6230

N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6231

DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);

6232

N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6233

DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),

6234

BN1, N2);

6235

N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

6236

N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

6237

DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),

6238

BN1, N2);

6239

N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

6240

// Simply multiplying by the reciprocal estimate can leave us a few ulps

6241

// too low, so we add 2 ulps (exhaustive testing shows that this is enough,

6242

// and that it will never cause us to return an answer too large).

6243

// float4 result = as_float4(as_int4(xf*recip) + 2);

6244

N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);

6245

N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);

6246

N1 = DAG.getConstant(2, MVT::i32);

6247

N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);

6248

N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);

6249

N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);

6250

// Convert back to integer and return.

6251

// return vmovn_u32(vcvt_s32_f32(result));

6252

N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);

6253

N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);

6254

return N0;

6255

}

6256

6257

static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {

6258

EVT VT = Op.getNode()->getValueType(0);

6259

SDVTList VTs = DAG.getVTList(VT, MVT::i32);

6260

6261

unsigned Opc;

6262

bool ExtraOp = false;

6263

switch (Op.getOpcode()) {

6264

default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6264);

6265

case ISD::ADDC: Opc = ARMISD::ADDC; break;

6266

case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;

6267

case ISD::SUBC: Opc = ARMISD::SUBC; break;

6268

case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;

6269

}

6270

6271

if (!ExtraOp)

6272

return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),

6273

Op.getOperand(1));

6274

return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),

6275

Op.getOperand(1), Op.getOperand(2));

6276

}

6277

6278

SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {

6279

assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6279, __PRETTY_FUNCTION__));

6280

6281

// For iOS, we want to call an alternative entry point: __sincos_stret,

6282

// return values are passed via sret.

6283

SDLoc dl(Op);

6284

SDValue Arg = Op.getOperand(0);

6285

EVT ArgVT = Arg.getValueType();

6286

Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());

6287

6288

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

6289

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

6290

6291

// Pair of floats / doubles used to pass the result.

6292

StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);

6293

6294

// Create stack object for sret.

6295

const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);

6296

const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);

6297

int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);

6298

SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());

6299

6300

ArgListTy Args;

6301

ArgListEntry Entry;

6302

6303

Entry.Node = SRet;

6304

Entry.Ty = RetTy->getPointerTo();

6305

Entry.isSExt = false;

6306

Entry.isZExt = false;

6307

Entry.isSRet = true;

6308

Args.push_back(Entry);

6309

6310

Entry.Node = Arg;

6311

Entry.Ty = ArgTy;

6312

Entry.isSExt = false;

6313

Entry.isZExt = false;

6314

Args.push_back(Entry);

6315

6316

const char *LibcallName = (ArgVT == MVT::f64)

6317

? "__sincos_stret" : "__sincosf_stret";

6318

SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());

6319

6320

TargetLowering::CallLoweringInfo CLI(DAG);

6321

CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())

6322

.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,

6323

std::move(Args), 0)

6324

.setDiscardResult();

6325

6326

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

6327

6328

SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,

6329

MachinePointerInfo(), false, false, false, 0);

6330

6331

// Address of cos field.

6332

SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,

6333

DAG.getIntPtrConstant(ArgVT.getStoreSize()));

6334

SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,

6335

MachinePointerInfo(), false, false, false, 0);

6336

6337

SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);

6338

return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,

6339

LoadSin.getValue(0), LoadCos.getValue(0));

6340

}

6341

6342

static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {

6343

// Monotonic load/store is legal for all targets

6344

if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)

6345

return Op;

6346

6347

// Acquire/Release load/store is not legal for targets without a

6348

// dmb or equivalent available.

6349

return SDValue();

6350

}

6351

6352

static void ReplaceREADCYCLECOUNTER(SDNode *N,

6353

SmallVectorImpl<SDValue> &Results,

6354

SelectionDAG &DAG,

6355

const ARMSubtarget *Subtarget) {

6356

SDLoc DL(N);

6357

SDValue Cycles32, OutChain;

6358

6359

if (Subtarget->hasPerfMon()) {

6360

// Under Power Management extensions, the cycle-count is:

6361

// mrc p15, #0, <Rt>, c9, c13, #0

6362

SDValue Ops[] = { N->getOperand(0), // Chain

6363

DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),

6364

DAG.getConstant(15, MVT::i32),

6365

DAG.getConstant(0, MVT::i32),

6366

DAG.getConstant(9, MVT::i32),

6367

DAG.getConstant(13, MVT::i32),

6368

DAG.getConstant(0, MVT::i32)

6369

};

6370

6371

Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,

6372

DAG.getVTList(MVT::i32, MVT::Other), Ops);

6373

OutChain = Cycles32.getValue(1);

6374

} else {

6375

// Intrinsic is defined to return 0 on unsupported platforms. Technically

6376

// there are older ARM CPUs that have implementation-specific ways of

6377

// obtaining this information (FIXME!).

6378

Cycles32 = DAG.getConstant(0, MVT::i32);

6379

OutChain = DAG.getEntryNode();

6380

}

6381

6382

6383

SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,

6384

Cycles32, DAG.getConstant(0, MVT::i32));

6385

Results.push_back(Cycles64);

6386

Results.push_back(OutChain);

6387

}

6388

6389

SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

6390

switch (Op.getOpcode()) {

6391

default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6391);

6392

case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

6393

case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);

6394

case ISD::GlobalAddress:

6395

switch (Subtarget->getTargetTriple().getObjectFormat()) {

6396

default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6396);

6397

case Triple::COFF:

6398

return LowerGlobalAddressWindows(Op, DAG);

6399

case Triple::ELF:

6400

return LowerGlobalAddressELF(Op, DAG);

6401

case Triple::MachO:

6402

return LowerGlobalAddressDarwin(Op, DAG);

6403

}

6404

case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);

6405

case ISD::SELECT: return LowerSELECT(Op, DAG);

6406

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

6407

case ISD::BR_CC: return LowerBR_CC(Op, DAG);

6408

case ISD::BR_JT: return LowerBR_JT(Op, DAG);

6409

case ISD::VASTART: return LowerVASTART(Op, DAG);

6410

case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);

6411

case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);

6412

case ISD::SINT_TO_FP:

6413

case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);

6414

case ISD::FP_TO_SINT:

6415

case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);

6416

case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);

6417

case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);

6418

case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);

6419

case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);

6420

case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);

6421

case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);

6422

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,

6423

Subtarget);

6424

case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);

6425

case ISD::SHL:

6426

case ISD::SRL:

6427

case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);

6428

case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);

6429

case ISD::SRL_PARTS:

6430

case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);

6431

case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);

6432

case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);

6433

case ISD::SETCC: return LowerVSETCC(Op, DAG);

6434

case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);

6435

case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);

6436

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

6437

case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

6438

case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);

6439

case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);

6440

case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);

6441

case ISD::MUL: return LowerMUL(Op, DAG);

6442

case ISD::SDIV: return LowerSDIV(Op, DAG);

6443

case ISD::UDIV: return LowerUDIV(Op, DAG);

6444

case ISD::ADDC:

6445

case ISD::ADDE:

6446

case ISD::SUBC:

6447

case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);

6448

case ISD::SADDO:

6449

case ISD::UADDO:

6450

case ISD::SSUBO:

6451

case ISD::USUBO:

6452

return LowerXALUO(Op, DAG);

6453

case ISD::ATOMIC_LOAD:

6454

case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);

6455

case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);

6456

case ISD::SDIVREM:

6457

case ISD::UDIVREM: return LowerDivRem(Op, DAG);

6458

case ISD::DYNAMIC_STACKALLOC:

6459

if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())

6460

return LowerDYNAMIC_STACKALLOC(Op, DAG);

6461

llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6461);

6462

case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);

6463

case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);

6464

}

6465

}

6466

6467

/// ReplaceNodeResults - Replace the results of node with an illegal result

6468

/// type with new values built out of custom code.

6469

void ARMTargetLowering::ReplaceNodeResults(SDNode *N,

6470

SmallVectorImpl<SDValue>&Results,

6471

SelectionDAG &DAG) const {

6472

SDValue Res;

6473

switch (N->getOpcode()) {

6474

default:

6475

llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6475);

6476

case ISD::BITCAST:

6477

Res = ExpandBITCAST(N, DAG);

6478

break;

6479

case ISD::SRL:

6480

case ISD::SRA:

6481

Res = Expand64BitShift(N, DAG, Subtarget);

6482

break;

6483

case ISD::READCYCLECOUNTER:

6484

ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);

6485

return;

6486

}

6487

if (Res.getNode())

6488

Results.push_back(Res);

6489

}

6490

6491

//===----------------------------------------------------------------------===//

6492

// ARM Scheduler Hooks

6493

//===----------------------------------------------------------------------===//

6494

6495

/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and

6496

/// registers the function context.

6497

void ARMTargetLowering::

6498

SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,

6499

MachineBasicBlock *DispatchBB, int FI) const {

6500

const TargetInstrInfo *TII =

6501

getTargetMachine().getSubtargetImpl()->getInstrInfo();

6502

DebugLoc dl = MI->getDebugLoc();

6503

MachineFunction *MF = MBB->getParent();

6504

MachineRegisterInfo *MRI = &MF->getRegInfo();

6505

MachineConstantPool *MCP = MF->getConstantPool();

6506

ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();

6507

const Function *F = MF->getFunction();

6508

6509

bool isThumb = Subtarget->isThumb();

6510

bool isThumb2 = Subtarget->isThumb2();

6511

6512

unsigned PCLabelId = AFI->createPICLabelUId();

6513

unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;

6514

ARMConstantPoolValue *CPV =

6515

ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);

6516

unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);

6517

6518

const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass

6519

: &ARM::GPRRegClass;

6520

6521

// Grab constant pool and fixed stack memory operands.

6522

MachineMemOperand *CPMMO =

6523

MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),

6524

MachineMemOperand::MOLoad, 4, 4);

6525

6526

MachineMemOperand *FIMMOSt =

6527

MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),

6528

MachineMemOperand::MOStore, 4, 4);

6529

6530

// Load the address of the dispatch MBB into the jump buffer.

6531

if (isThumb2) {

6532

// Incoming value: jbuf

6533

// ldr.n r5, LCPI1_1

6534

// orr r5, r5, #1

6535

// add r5, pc

6536

// str r5, [$jbuf, #+4] ; &jbuf[1]

6537

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6538

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)

6539

.addConstantPoolIndex(CPI)

6540

.addMemOperand(CPMMO));

6541

// Set the low bit because of thumb mode.

6542

unsigned NewVReg2 = MRI->createVirtualRegister(TRC);

6543

AddDefaultCC(

6544

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)

6545

.addReg(NewVReg1, RegState::Kill)

6546

.addImm(0x01)));

6547

unsigned NewVReg3 = MRI->createVirtualRegister(TRC);

6548

BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)

6549

.addReg(NewVReg2, RegState::Kill)

6550

.addImm(PCLabelId);

6551

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))

6552

.addReg(NewVReg3, RegState::Kill)

6553

.addFrameIndex(FI)

6554

.addImm(36) // &jbuf[1] :: pc

6555

.addMemOperand(FIMMOSt));

6556

} else if (isThumb) {

6557

// Incoming value: jbuf

6558

// ldr.n r1, LCPI1_4

6559

// add r1, pc

6560

// mov r2, #1

6561

// orrs r1, r2

6562

// add r2, $jbuf, #+4 ; &jbuf[1]

6563

// str r1, [r2]

6564

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6565

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)

6566

.addConstantPoolIndex(CPI)

6567

.addMemOperand(CPMMO));

6568

unsigned NewVReg2 = MRI->createVirtualRegister(TRC);

6569

BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)

6570

.addReg(NewVReg1, RegState::Kill)

6571

.addImm(PCLabelId);

6572

// Set the low bit because of thumb mode.

6573

unsigned NewVReg3 = MRI->createVirtualRegister(TRC);

6574

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)

6575

.addReg(ARM::CPSR, RegState::Define)

6576

.addImm(1));

6577

unsigned NewVReg4 = MRI->createVirtualRegister(TRC);

6578

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)

6579

.addReg(ARM::CPSR, RegState::Define)

6580

.addReg(NewVReg2, RegState::Kill)

6581

.addReg(NewVReg3, RegState::Kill));

6582

unsigned NewVReg5 = MRI->createVirtualRegister(TRC);

6583

BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)

6584

.addFrameIndex(FI)

6585

.addImm(36); // &jbuf[1] :: pc

6586

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))

6587

.addReg(NewVReg4, RegState::Kill)

6588

.addReg(NewVReg5, RegState::Kill)

6589

.addImm(0)

6590

.addMemOperand(FIMMOSt));

6591

} else {

6592

// Incoming value: jbuf

6593

// ldr r1, LCPI1_1

6594

// add r1, pc, r1

6595

// str r1, [$jbuf, #+4] ; &jbuf[1]

6596

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6597

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)

6598

.addConstantPoolIndex(CPI)

6599

.addImm(0)

6600

.addMemOperand(CPMMO));

6601

unsigned NewVReg2 = MRI->createVirtualRegister(TRC);

6602

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)

6603

.addReg(NewVReg1, RegState::Kill)

6604

.addImm(PCLabelId));

6605

AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))

6606

.addReg(NewVReg2, RegState::Kill)

6607

.addFrameIndex(FI)

6608

.addImm(36) // &jbuf[1] :: pc

6609

.addMemOperand(FIMMOSt));

6610

}

6611

}

6612

6613

MachineBasicBlock *ARMTargetLowering::

6614

EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {

6615

const TargetInstrInfo *TII =

6616

getTargetMachine().getSubtargetImpl()->getInstrInfo();

6617

DebugLoc dl = MI->getDebugLoc();

6618

MachineFunction *MF = MBB->getParent();

6619

MachineRegisterInfo *MRI = &MF->getRegInfo();

6620

ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();

6621

MachineFrameInfo *MFI = MF->getFrameInfo();

6622

int FI = MFI->getFunctionContextIndex();

6623

6624

const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass

6625

: &ARM::GPRnopcRegClass;

6626

6627

// Get a mapping of the call site numbers to all of the landing pads they're

6628

// associated with.

6629

DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;

6630

unsigned MaxCSNum = 0;

6631

MachineModuleInfo &MMI = MF->getMMI();

6632

for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;

6633

++BB) {

6634

if (!BB->isLandingPad()) continue;

6635

6636

// FIXME: We should assert that the EH_LABEL is the first MI in the landing

6637

// pad.

6638

for (MachineBasicBlock::iterator

6639

II = BB->begin(), IE = BB->end(); II != IE; ++II) {

6640

if (!II->isEHLabel()) continue;

6641

6642

MCSymbol *Sym = II->getOperand(0).getMCSymbol();

6643

if (!MMI.hasCallSiteLandingPad(Sym)) continue;

6644

6645

SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);

6646

for (SmallVectorImpl<unsigned>::iterator

6647

CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();

6648

CSI != CSE; ++CSI) {

6649

CallSiteNumToLPad[*CSI].push_back(BB);

6650

MaxCSNum = std::max(MaxCSNum, *CSI);

6651

}

6652

break;

6653

}

6654

}

6655

6656

// Get an ordered list of the machine basic blocks for the jump table.

6657

std::vector<MachineBasicBlock*> LPadList;

6658

SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;

6659

LPadList.reserve(CallSiteNumToLPad.size());

6660

for (unsigned I = 1; I <= MaxCSNum; ++I) {

6661

SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];

6662

for (SmallVectorImpl<MachineBasicBlock*>::iterator

6663

II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {

6664

LPadList.push_back(*II);

6665

InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());

6666

}

6667

}

6668

6669

assert(!LPadList.empty() &&((!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? static_cast<void> (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6670, __PRETTY_FUNCTION__))

6670

"No landing pad destinations for the dispatch jump table!")((!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? static_cast<void> (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 6670, __PRETTY_FUNCTION__));

6671

6672

// Create the jump table and associated information.

6673

MachineJumpTableInfo *JTI =

6674

MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);

6675

unsigned MJTI = JTI->createJumpTableIndex(LPadList);

6676

unsigned UId = AFI->createJumpTableUId();

6677

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

6678

6679

// Create the MBBs for the dispatch code.

6680

6681

// Shove the dispatch's address into the return slot in the function context.

6682

MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();

6683

DispatchBB->setIsLandingPad();

6684

6685

MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();

6686

unsigned trap_opcode;

6687

if (Subtarget->isThumb())

6688

trap_opcode = ARM::tTRAP;

6689

else

6690

trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;

6691

6692

BuildMI(TrapBB, dl, TII->get(trap_opcode));

6693

DispatchBB->addSuccessor(TrapBB);

6694

6695

MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();

6696

DispatchBB->addSuccessor(DispContBB);

6697

6698

// Insert and MBBs.

6699

MF->insert(MF->end(), DispatchBB);

6700

MF->insert(MF->end(), DispContBB);

6701

MF->insert(MF->end(), TrapBB);

6702

6703

// Insert code into the entry block that creates and registers the function

6704

// context.

6705

SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);

6706

6707

MachineMemOperand *FIMMOLd =

6708

MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),

6709

MachineMemOperand::MOLoad |

6710

MachineMemOperand::MOVolatile, 4, 4);

6711

6712

MachineInstrBuilder MIB;

6713

MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));

6714

6715

const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);

6716

const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();

6717

6718

// Add a register mask with no preserved registers. This results in all

6719

// registers being marked as clobbered.

6720

MIB.addRegMask(RI.getNoPreservedMask());

6721

6722

unsigned NumLPads = LPadList.size();

6723

if (Subtarget->isThumb2()) {

6724

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6725

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)

6726

.addFrameIndex(FI)

6727

.addImm(4)

6728

.addMemOperand(FIMMOLd));

6729

6730

if (NumLPads < 256) {

6731

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))

6732

.addReg(NewVReg1)

6733

.addImm(LPadList.size()));

6734

} else {

6735

unsigned VReg1 = MRI->createVirtualRegister(TRC);

6736

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)

6737

.addImm(NumLPads & 0xFFFF));

6738

6739

unsigned VReg2 = VReg1;

6740

if ((NumLPads & 0xFFFF0000) != 0) {

6741

VReg2 = MRI->createVirtualRegister(TRC);

6742

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)

6743

.addReg(VReg1)

6744

.addImm(NumLPads >> 16));

6745

}

6746

6747

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))

6748

.addReg(NewVReg1)

6749

.addReg(VReg2));

6750

}

6751

6752

BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))

6753

.addMBB(TrapBB)

6754

.addImm(ARMCC::HI)

6755

.addReg(ARM::CPSR);

6756

6757

unsigned NewVReg3 = MRI->createVirtualRegister(TRC);

6758

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)

6759

.addJumpTableIndex(MJTI)

6760

.addImm(UId));

6761

6762

unsigned NewVReg4 = MRI->createVirtualRegister(TRC);

6763

AddDefaultCC(

6764

AddDefaultPred(

6765

BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)

6766

.addReg(NewVReg3, RegState::Kill)

6767

.addReg(NewVReg1)

6768

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));

6769

6770

BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))

6771

.addReg(NewVReg4, RegState::Kill)

6772

.addReg(NewVReg1)

6773

.addJumpTableIndex(MJTI)

6774

.addImm(UId);

6775

} else if (Subtarget->isThumb()) {

6776

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6777

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)

6778

.addFrameIndex(FI)

6779

.addImm(1)

6780

.addMemOperand(FIMMOLd));

6781

6782

if (NumLPads < 256) {

6783

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))

6784

.addReg(NewVReg1)

6785

.addImm(NumLPads));

6786

} else {

6787

MachineConstantPool *ConstantPool = MF->getConstantPool();

6788

Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());

6789

const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

6790

6791

// MachineConstantPool wants an explicit alignment.

6792

unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

6793

if (Align == 0)

6794

Align = getDataLayout()->getTypeAllocSize(C->getType());

6795

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

6796

6797

unsigned VReg1 = MRI->createVirtualRegister(TRC);

6798

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))

6799

.addReg(VReg1, RegState::Define)

6800

.addConstantPoolIndex(Idx));

6801

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))

6802

.addReg(NewVReg1)

6803

.addReg(VReg1));

6804

}

6805

6806

BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))

6807

.addMBB(TrapBB)

6808

.addImm(ARMCC::HI)

6809

.addReg(ARM::CPSR);

6810

6811

unsigned NewVReg2 = MRI->createVirtualRegister(TRC);

6812

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)

6813

.addReg(ARM::CPSR, RegState::Define)

6814

.addReg(NewVReg1)

6815

.addImm(2));

6816

6817

unsigned NewVReg3 = MRI->createVirtualRegister(TRC);

6818

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)

6819

.addJumpTableIndex(MJTI)

6820

.addImm(UId));

6821

6822

unsigned NewVReg4 = MRI->createVirtualRegister(TRC);

6823

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)

6824

.addReg(ARM::CPSR, RegState::Define)

6825

.addReg(NewVReg2, RegState::Kill)

6826

.addReg(NewVReg3));

6827

6828

MachineMemOperand *JTMMOLd =

6829

MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),

6830

MachineMemOperand::MOLoad, 4, 4);

6831

6832

unsigned NewVReg5 = MRI->createVirtualRegister(TRC);

6833

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)

6834

.addReg(NewVReg4, RegState::Kill)

6835

.addImm(0)

6836

.addMemOperand(JTMMOLd));

6837

6838

unsigned NewVReg6 = NewVReg5;

6839

if (RelocM == Reloc::PIC_) {

6840

NewVReg6 = MRI->createVirtualRegister(TRC);

6841

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)

6842

.addReg(ARM::CPSR, RegState::Define)

6843

.addReg(NewVReg5, RegState::Kill)

6844

.addReg(NewVReg3));

6845

}

6846

6847

BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))

6848

.addReg(NewVReg6, RegState::Kill)

6849

.addJumpTableIndex(MJTI)

6850

.addImm(UId);

6851

} else {

6852

unsigned NewVReg1 = MRI->createVirtualRegister(TRC);

6853

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)

6854

.addFrameIndex(FI)

6855

.addImm(4)

6856

.addMemOperand(FIMMOLd));

6857

6858

if (NumLPads < 256) {

6859

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))

6860

.addReg(NewVReg1)

6861

.addImm(NumLPads));

6862

} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {

6863

unsigned VReg1 = MRI->createVirtualRegister(TRC);

6864

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)

6865

.addImm(NumLPads & 0xFFFF));

6866

6867

unsigned VReg2 = VReg1;

6868

if ((NumLPads & 0xFFFF0000) != 0) {

6869

VReg2 = MRI->createVirtualRegister(TRC);

6870

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)

6871

.addReg(VReg1)

6872

.addImm(NumLPads >> 16));

6873

}

6874

6875

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))

6876

.addReg(NewVReg1)

6877

.addReg(VReg2));

6878

} else {

6879

MachineConstantPool *ConstantPool = MF->getConstantPool();

6880

Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());

6881

const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

6882

6883

// MachineConstantPool wants an explicit alignment.

6884

unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

6885

if (Align == 0)

6886

Align = getDataLayout()->getTypeAllocSize(C->getType());

6887

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

6888

6889

unsigned VReg1 = MRI->createVirtualRegister(TRC);

6890

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))

6891

.addReg(VReg1, RegState::Define)

6892

.addConstantPoolIndex(Idx)

6893

.addImm(0));

6894

AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))

6895

.addReg(NewVReg1)

6896

.addReg(VReg1, RegState::Kill));

6897

}

6898

6899

BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))

6900

.addMBB(TrapBB)

6901

.addImm(ARMCC::HI)

6902

.addReg(ARM::CPSR);

6903

6904

unsigned NewVReg3 = MRI->createVirtualRegister(TRC);

6905

AddDefaultCC(

6906

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)

6907

.addReg(NewVReg1)

6908

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));

6909

unsigned NewVReg4 = MRI->createVirtualRegister(TRC);

6910

AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)

6911

.addJumpTableIndex(MJTI)

6912

.addImm(UId));

6913

6914

MachineMemOperand *JTMMOLd =

6915

MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),

6916

MachineMemOperand::MOLoad, 4, 4);

6917

unsigned NewVReg5 = MRI->createVirtualRegister(TRC);

6918

AddDefaultPred(

6919

BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)

6920

.addReg(NewVReg3, RegState::Kill)

6921

.addReg(NewVReg4)

6922

.addImm(0)

6923

.addMemOperand(JTMMOLd));

6924

6925

if (RelocM == Reloc::PIC_) {

6926

BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))

6927

.addReg(NewVReg5, RegState::Kill)

6928

.addReg(NewVReg4)

6929

.addJumpTableIndex(MJTI)

6930

.addImm(UId);

6931

} else {

6932

BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))

6933

.addReg(NewVReg5, RegState::Kill)

6934

.addJumpTableIndex(MJTI)

6935

.addImm(UId);

6936

}

6937

}

6938

6939

// Add the jump table entries as successors to the MBB.

6940

SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;

6941

for (std::vector<MachineBasicBlock*>::iterator

6942

I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {

6943

MachineBasicBlock *CurMBB = *I;

6944

if (SeenMBBs.insert(CurMBB).second)

6945

DispContBB->addSuccessor(CurMBB);

6946

}

6947

6948

// N.B. the order the invoke BBs are processed in doesn't matter here.

6949

const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);

6950

SmallVector<MachineBasicBlock*, 64> MBBLPads;

6951

for (MachineBasicBlock *BB : InvokeBBs) {

6952

6953

// Remove the landing pad successor from the invoke block and replace it

6954

// with the new dispatch block.

6955

SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),

6956

BB->succ_end());

6957

while (!Successors.empty()) {

6958

MachineBasicBlock *SMBB = Successors.pop_back_val();

6959

if (SMBB->isLandingPad()) {

6960

BB->removeSuccessor(SMBB);

6961

MBBLPads.push_back(SMBB);

6962

}

6963

}

6964

6965

BB->addSuccessor(DispatchBB);

6966

6967

// Find the invoke call and mark all of the callee-saved registers as

6968

// 'implicit defined' so that they're spilled. This prevents code from

6969

// moving instructions to before the EH block, where they will never be

6970

// executed.

6971

for (MachineBasicBlock::reverse_iterator

6972

II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {

6973

if (!II->isCall()) continue;

6974

6975

DenseMap<unsigned, bool> DefRegs;

6976

for (MachineInstr::mop_iterator

6977

OI = II->operands_begin(), OE = II->operands_end();

6978

OI != OE; ++OI) {

6979

if (!OI->isReg()) continue;

6980

DefRegs[OI->getReg()] = true;

6981

}

6982

6983

MachineInstrBuilder MIB(*MF, &*II);

6984

6985

for (unsigned i = 0; SavedRegs[i] != 0; ++i) {

6986

unsigned Reg = SavedRegs[i];

6987

if (Subtarget->isThumb2() &&

6988

!ARM::tGPRRegClass.contains(Reg) &&

6989

!ARM::hGPRRegClass.contains(Reg))

6990

continue;

6991

if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))

6992

continue;

6993

if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))

6994

continue;

6995

if (!DefRegs[Reg])

6996

MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);

6997

}

6998

6999

break;

7000

}

7001

}

7002

7003

// Mark all former landing pads as non-landing pads. The dispatch is the only

7004

// landing pad now.

7005

for (SmallVectorImpl<MachineBasicBlock*>::iterator

7006

I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)

7007

(*I)->setIsLandingPad(false);

7008

7009

// The instruction is gone now.

7010

MI->eraseFromParent();

7011

7012

return MBB;

7013

}

7014

7015

static

7016

MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {

7017

for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),

7018

E = MBB->succ_end(); I != E; ++I)

7019

if (*I != Succ)

7020

return *I;

7021

llvm_unreachable("Expecting a BB with two successors!")::llvm::llvm_unreachable_internal("Expecting a BB with two successors!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7021);

7022

}

7023

7024

/// Return the load opcode for a given load size. If load size >= 8,

7025

/// neon opcode will be returned.

7026

static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {

7027

if (LdSize >= 8)

7028

return LdSize == 16 ? ARM::VLD1q32wb_fixed

7029

: LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;

7030

if (IsThumb1)

7031

return LdSize == 4 ? ARM::tLDRi

7032

: LdSize == 2 ? ARM::tLDRHi

7033

: LdSize == 1 ? ARM::tLDRBi : 0;

7034

if (IsThumb2)

7035

return LdSize == 4 ? ARM::t2LDR_POST

7036

: LdSize == 2 ? ARM::t2LDRH_POST

7037

: LdSize == 1 ? ARM::t2LDRB_POST : 0;

7038

return LdSize == 4 ? ARM::LDR_POST_IMM

7039

: LdSize == 2 ? ARM::LDRH_POST

7040

: LdSize == 1 ? ARM::LDRB_POST_IMM : 0;

7041

}

7042

7043

/// Return the store opcode for a given store size. If store size >= 8,

7044

/// neon opcode will be returned.

7045

static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {

7046

if (StSize >= 8)

7047

return StSize == 16 ? ARM::VST1q32wb_fixed

7048

: StSize == 8 ? ARM::VST1d32wb_fixed : 0;

7049

if (IsThumb1)

7050

return StSize == 4 ? ARM::tSTRi

7051

: StSize == 2 ? ARM::tSTRHi

7052

: StSize == 1 ? ARM::tSTRBi : 0;

7053

if (IsThumb2)

7054

return StSize == 4 ? ARM::t2STR_POST

7055

: StSize == 2 ? ARM::t2STRH_POST

7056

: StSize == 1 ? ARM::t2STRB_POST : 0;

7057

return StSize == 4 ? ARM::STR_POST_IMM

7058

: StSize == 2 ? ARM::STRH_POST

7059

: StSize == 1 ? ARM::STRB_POST_IMM : 0;

7060

}

7061

7062

/// Emit a post-increment load operation with given size. The instructions

7063

/// will be added to BB at Pos.

7064

static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,

7065

const TargetInstrInfo *TII, DebugLoc dl,

7066

unsigned LdSize, unsigned Data, unsigned AddrIn,

7067

unsigned AddrOut, bool IsThumb1, bool IsThumb2) {

7068

unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);

7069

assert(LdOpc != 0 && "Should have a load opcode")((LdOpc != 0 && "Should have a load opcode") ? static_cast
<void> (0) : __assert_fail ("LdOpc != 0 && \"Should have a load opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7069, __PRETTY_FUNCTION__));

7070

if (LdSize >= 8) {

7071

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

7072

.addReg(AddrOut, RegState::Define).addReg(AddrIn)

7073

.addImm(0));

7074

} else if (IsThumb1) {

7075

// load + update AddrIn

7076

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

7077

.addReg(AddrIn).addImm(0));

7078

MachineInstrBuilder MIB =

7079

BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);

7080

MIB = AddDefaultT1CC(MIB);

7081

MIB.addReg(AddrIn).addImm(LdSize);

7082

AddDefaultPred(MIB);

7083

} else if (IsThumb2) {

7084

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

7085

.addReg(AddrOut, RegState::Define).addReg(AddrIn)

7086

.addImm(LdSize));

7087

} else { // arm

7088

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

7089

.addReg(AddrOut, RegState::Define).addReg(AddrIn)

7090

.addReg(0).addImm(LdSize));

7091

}

7092

}

7093

7094

/// Emit a post-increment store operation with given size. The instructions

7095

/// will be added to BB at Pos.

7096

static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,

7097

const TargetInstrInfo *TII, DebugLoc dl,

7098

unsigned StSize, unsigned Data, unsigned AddrIn,

7099

unsigned AddrOut, bool IsThumb1, bool IsThumb2) {

7100

unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);

7101

assert(StOpc != 0 && "Should have a store opcode")((StOpc != 0 && "Should have a store opcode") ? static_cast
<void> (0) : __assert_fail ("StOpc != 0 && \"Should have a store opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7101, __PRETTY_FUNCTION__));

7102

if (StSize >= 8) {

7103

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

7104

.addReg(AddrIn).addImm(0).addReg(Data));

7105

} else if (IsThumb1) {

7106

// store + update AddrIn

7107

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)

7108

.addReg(AddrIn).addImm(0));

7109

MachineInstrBuilder MIB =

7110

BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);

7111

MIB = AddDefaultT1CC(MIB);

7112

MIB.addReg(AddrIn).addImm(StSize);

7113

AddDefaultPred(MIB);

7114

} else if (IsThumb2) {

7115

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

7116

.addReg(Data).addReg(AddrIn).addImm(StSize));

7117

} else { // arm

7118

AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

7119

.addReg(Data).addReg(AddrIn).addReg(0)

7120

.addImm(StSize));

7121

}

7122

}

7123

7124

MachineBasicBlock *

7125

ARMTargetLowering::EmitStructByval(MachineInstr *MI,

7126

MachineBasicBlock *BB) const {

7127

// This pseudo instruction has 3 operands: dst, src, size

7128

// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().

7129

// Otherwise, we will generate unrolled scalar copies.

7130

const TargetInstrInfo *TII =

7131

getTargetMachine().getSubtargetImpl()->getInstrInfo();

7132

const BasicBlock *LLVM_BB = BB->getBasicBlock();

7133

MachineFunction::iterator It = BB;

7134

++It;

7135

7136

unsigned dest = MI->getOperand(0).getReg();

7137

unsigned src = MI->getOperand(1).getReg();

7138

unsigned SizeVal = MI->getOperand(2).getImm();

7139

unsigned Align = MI->getOperand(3).getImm();

7140

DebugLoc dl = MI->getDebugLoc();

7141

7142

MachineFunction *MF = BB->getParent();

7143

MachineRegisterInfo &MRI = MF->getRegInfo();

7144

unsigned UnitSize = 0;

7145

const TargetRegisterClass *TRC = nullptr;

7146

const TargetRegisterClass *VecTRC = nullptr;

7147

7148

bool IsThumb1 = Subtarget->isThumb1Only();

7149

bool IsThumb2 = Subtarget->isThumb2();

7150

7151

if (Align & 1) {

7152

UnitSize = 1;

7153

} else if (Align & 2) {

7154

UnitSize = 2;

7155

} else {

7156

// Check whether we can use NEON instructions.

7157

if (!MF->getFunction()->getAttributes().

7158

hasAttribute(AttributeSet::FunctionIndex,

7159

Attribute::NoImplicitFloat) &&

7160

Subtarget->hasNEON()) {

7161

if ((Align % 16 == 0) && SizeVal >= 16)

7162

UnitSize = 16;

7163

else if ((Align % 8 == 0) && SizeVal >= 8)

7164

UnitSize = 8;

7165

}

7166

// Can't use NEON instructions.

7167

if (UnitSize == 0)

7168

UnitSize = 4;

7169

}

7170

7171

// Select the correct opcode and register class for unit size load/store

7172

bool IsNeon = UnitSize >= 8;

7173

TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;

7174

if (IsNeon)

7175

VecTRC = UnitSize == 16 ? &ARM::DPairRegClass

7176

: UnitSize == 8 ? &ARM::DPRRegClass

7177

: nullptr;

7178

7179

unsigned BytesLeft = SizeVal % UnitSize;

7180

unsigned LoopSize = SizeVal - BytesLeft;

7181

7182

if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {

7183

// Use LDR and STR to copy.

7184

// [scratch, srcOut] = LDR_POST(srcIn, UnitSize)

7185

// [destOut] = STR_POST(scratch, destIn, UnitSize)

7186

unsigned srcIn = src;

7187

unsigned destIn = dest;

7188

for (unsigned i = 0; i < LoopSize; i+=UnitSize) {

7189

unsigned srcOut = MRI.createVirtualRegister(TRC);

7190

unsigned destOut = MRI.createVirtualRegister(TRC);

7191

unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);

7192

emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,

7193

IsThumb1, IsThumb2);

7194

emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,

7195

IsThumb1, IsThumb2);

7196

srcIn = srcOut;

7197

destIn = destOut;

7198

}

7199

7200

// Handle the leftover bytes with LDRB and STRB.

7201

// [scratch, srcOut] = LDRB_POST(srcIn, 1)

7202

// [destOut] = STRB_POST(scratch, destIn, 1)

7203

for (unsigned i = 0; i < BytesLeft; i++) {

7204

unsigned srcOut = MRI.createVirtualRegister(TRC);

7205

unsigned destOut = MRI.createVirtualRegister(TRC);

7206

unsigned scratch = MRI.createVirtualRegister(TRC);

7207

emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,

7208

IsThumb1, IsThumb2);

7209

emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,

7210

IsThumb1, IsThumb2);

7211

srcIn = srcOut;

7212

destIn = destOut;

7213

}

7214

MI->eraseFromParent(); // The instruction is gone now.

7215

return BB;

7216

}

7217

7218

// Expand the pseudo op to a loop.

7219

// thisMBB:

7220

// ...

7221

// movw varEnd, # --> with thumb2

7222

// movt varEnd, #

7223

// ldrcp varEnd, idx --> without thumb2

7224

// fallthrough --> loopMBB

7225

// loopMBB:

7226

// PHI varPhi, varEnd, varLoop

7227

// PHI srcPhi, src, srcLoop

7228

// PHI destPhi, dst, destLoop

7229

// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)

7230

// [destLoop] = STR_POST(scratch, destPhi, UnitSize)

7231

// subs varLoop, varPhi, #UnitSize

7232

// bne loopMBB

7233

// fallthrough --> exitMBB

7234

// exitMBB:

7235

// epilogue to handle left-over bytes

7236

// [scratch, srcOut] = LDRB_POST(srcLoop, 1)

7237

// [destOut] = STRB_POST(scratch, destLoop, 1)

7238

MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);

7239

MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);

7240

MF->insert(It, loopMBB);

7241

MF->insert(It, exitMBB);

7242

7243

// Transfer the remainder of BB and its successor edges to exitMBB.

7244

exitMBB->splice(exitMBB->begin(), BB,

7245

std::next(MachineBasicBlock::iterator(MI)), BB->end());

7246

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

7247

7248

// Load an immediate to varEnd.

7249

unsigned varEnd = MRI.createVirtualRegister(TRC);

7250

if (IsThumb2) {

7251

unsigned Vtmp = varEnd;

7252

if ((LoopSize & 0xFFFF0000) != 0)

7253

Vtmp = MRI.createVirtualRegister(TRC);

7254

AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)

7255

.addImm(LoopSize & 0xFFFF));

7256

7257

if ((LoopSize & 0xFFFF0000) != 0)

7258

AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)

7259

.addReg(Vtmp).addImm(LoopSize >> 16));

7260

} else {

7261

MachineConstantPool *ConstantPool = MF->getConstantPool();

7262

Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());

7263

const Constant *C = ConstantInt::get(Int32Ty, LoopSize);

7264

7265

// MachineConstantPool wants an explicit alignment.

7266

unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

7267

if (Align == 0)

7268

Align = getDataLayout()->getTypeAllocSize(C->getType());

7269

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

7270

7271

if (IsThumb1)

7272

AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(

7273

varEnd, RegState::Define).addConstantPoolIndex(Idx));

7274

else

7275

AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(

7276

varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));

7277

}

7278

BB->addSuccessor(loopMBB);

7279

7280

// Generate the loop body:

7281

// varPhi = PHI(varLoop, varEnd)

7282

// srcPhi = PHI(srcLoop, src)

7283

// destPhi = PHI(destLoop, dst)

7284

MachineBasicBlock *entryBB = BB;

7285

BB = loopMBB;

7286

unsigned varLoop = MRI.createVirtualRegister(TRC);

7287

unsigned varPhi = MRI.createVirtualRegister(TRC);

7288

unsigned srcLoop = MRI.createVirtualRegister(TRC);

7289

unsigned srcPhi = MRI.createVirtualRegister(TRC);

7290

unsigned destLoop = MRI.createVirtualRegister(TRC);

7291

unsigned destPhi = MRI.createVirtualRegister(TRC);

7292

7293

BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)

7294

.addReg(varLoop).addMBB(loopMBB)

7295

.addReg(varEnd).addMBB(entryBB);

7296

BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)

7297

.addReg(srcLoop).addMBB(loopMBB)

7298

.addReg(src).addMBB(entryBB);

7299

BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)

7300

.addReg(destLoop).addMBB(loopMBB)

7301

.addReg(dest).addMBB(entryBB);

7302

7303

// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)

7304

// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)

7305

unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);

7306

emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,

7307

IsThumb1, IsThumb2);

7308

emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,

7309

IsThumb1, IsThumb2);

7310

7311

// Decrement loop variable by UnitSize.

7312

if (IsThumb1) {

7313

MachineInstrBuilder MIB =

7314

BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);

7315

MIB = AddDefaultT1CC(MIB);

7316

MIB.addReg(varPhi).addImm(UnitSize);

7317

AddDefaultPred(MIB);

7318

} else {

7319

MachineInstrBuilder MIB =

7320

BuildMI(*BB, BB->end(), dl,

7321

TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);

7322

AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));

7323

MIB->getOperand(5).setReg(ARM::CPSR);

7324

MIB->getOperand(5).setIsDef(true);

7325

}

7326

BuildMI(*BB, BB->end(), dl,

7327

TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))

7328

.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);

7329

7330

// loopMBB can loop back to loopMBB or fall through to exitMBB.

7331

BB->addSuccessor(loopMBB);

7332

BB->addSuccessor(exitMBB);

7333

7334

// Add epilogue to handle BytesLeft.

7335

BB = exitMBB;

7336

MachineInstr *StartOfExit = exitMBB->begin();

7337

7338

// [scratch, srcOut] = LDRB_POST(srcLoop, 1)

7339

// [destOut] = STRB_POST(scratch, destLoop, 1)

7340

unsigned srcIn = srcLoop;

7341

unsigned destIn = destLoop;

7342

for (unsigned i = 0; i < BytesLeft; i++) {

7343

unsigned srcOut = MRI.createVirtualRegister(TRC);

7344

unsigned destOut = MRI.createVirtualRegister(TRC);

7345

unsigned scratch = MRI.createVirtualRegister(TRC);

7346

emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,

7347

IsThumb1, IsThumb2);

7348

emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,

7349

IsThumb1, IsThumb2);

7350

srcIn = srcOut;

7351

destIn = destOut;

7352

}

7353

7354

MI->eraseFromParent(); // The instruction is gone now.

7355

return BB;

7356

}

7357

7358

MachineBasicBlock *

7359

ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,

7360

MachineBasicBlock *MBB) const {

7361

const TargetMachine &TM = getTargetMachine();

7362

const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();

7363

DebugLoc DL = MI->getDebugLoc();

7364

7365

assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7366, __PRETTY_FUNCTION__))

7366

"__chkstk is only supported on Windows")((Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7366, __PRETTY_FUNCTION__));

7367

assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode")((Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isThumb2() && \"Windows on ARM requires Thumb-2 mode\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7367, __PRETTY_FUNCTION__));

7368

7369

// __chkstk takes the number of words to allocate on the stack in R4, and

7370

// returns the stack adjustment in number of bytes in R4. This will not

7371

// clober any other registers (other than the obvious lr).

7372

7373

// Although, technically, IP should be considered a register which may be

7374

// clobbered, the call itself will not touch it. Windows on ARM is a pure

7375

// thumb-2 environment, so there is no interworking required. As a result, we

7376

// do not expect a veneer to be emitted by the linker, clobbering IP.

7377

7378

// Each module receives its own copy of __chkstk, so no import thunk is

7379

// required, again, ensuring that IP is not clobbered.

7380

7381

// Finally, although some linkers may theoretically provide a trampoline for

7382

// out of range calls (which is quite common due to a 32M range limitation of

7383

// branches for Thumb), we can generate the long-call version via

7384

// -mcmodel=large, alleviating the need for the trampoline which may clobber

7385

// IP.

7386

7387

switch (TM.getCodeModel()) {

7388

case CodeModel::Small:

7389

case CodeModel::Medium:

7390

case CodeModel::Default:

7391

case CodeModel::Kernel:

7392

BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))

7393

.addImm((unsigned)ARMCC::AL).addReg(0)

7394

.addExternalSymbol("__chkstk")

7395

.addReg(ARM::R4, RegState::Implicit | RegState::Kill)

7396

.addReg(ARM::R4, RegState::Implicit | RegState::Define)

7397

.addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);

7398

break;

7399

case CodeModel::Large:

7400

case CodeModel::JITDefault: {

7401

MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();

7402

unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

7403

7404

BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)

7405

.addExternalSymbol("__chkstk");

7406

BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))

7407

.addImm((unsigned)ARMCC::AL).addReg(0)

7408

.addReg(Reg, RegState::Kill)

7409

.addReg(ARM::R4, RegState::Implicit | RegState::Kill)

7410

.addReg(ARM::R4, RegState::Implicit | RegState::Define)

7411

.addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);

7412

break;

7413

}

7414

}

7415

7416

AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),

7417

ARM::SP)

7418

.addReg(ARM::SP).addReg(ARM::R4)));

7419

7420

MI->eraseFromParent();

7421

return MBB;

7422

}

7423

7424

MachineBasicBlock *

7425

ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

7426

MachineBasicBlock *BB) const {

7427

const TargetInstrInfo *TII =

7428

getTargetMachine().getSubtargetImpl()->getInstrInfo();

7429

DebugLoc dl = MI->getDebugLoc();

7430

bool isThumb2 = Subtarget->isThumb2();

7431

switch (MI->getOpcode()) {

7432

default: {

7433

MI->dump();

7434

llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7434);

7435

}

7436

// The Thumb2 pre-indexed stores have the same MI operands, they just

7437

// define them differently in the .td files from the isel patterns, so

7438

// they need pseudos.

7439

case ARM::t2STR_preidx:

7440

MI->setDesc(TII->get(ARM::t2STR_PRE));

7441

return BB;

7442

case ARM::t2STRB_preidx:

7443

MI->setDesc(TII->get(ARM::t2STRB_PRE));

7444

return BB;

7445

case ARM::t2STRH_preidx:

7446

MI->setDesc(TII->get(ARM::t2STRH_PRE));

7447

return BB;

7448

7449

case ARM::STRi_preidx:

7450

case ARM::STRBi_preidx: {

7451

unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?

7452

ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;

7453

// Decode the offset.

7454

unsigned Offset = MI->getOperand(4).getImm();

7455

bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;

7456

Offset = ARM_AM::getAM2Offset(Offset);

7457

if (isSub)

7458

Offset = -Offset;

7459

7460

MachineMemOperand *MMO = *MI->memoperands_begin();

7461

BuildMI(*BB, MI, dl, TII->get(NewOpc))

7462

.addOperand(MI->getOperand(0)) // Rn_wb

7463

.addOperand(MI->getOperand(1)) // Rt

7464

.addOperand(MI->getOperand(2)) // Rn

7465

.addImm(Offset) // offset (skip GPR==zero_reg)

7466

.addOperand(MI->getOperand(5)) // pred

7467

.addOperand(MI->getOperand(6))

7468

.addMemOperand(MMO);

7469

MI->eraseFromParent();

7470

return BB;

7471

}

7472

case ARM::STRr_preidx:

7473

case ARM::STRBr_preidx:

7474

case ARM::STRH_preidx: {

7475

unsigned NewOpc;

7476

switch (MI->getOpcode()) {

7477

default: llvm_unreachable("unexpected opcode!")::llvm::llvm_unreachable_internal("unexpected opcode!", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7477);

7478

case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;

7479

case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;

7480

case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;

7481

}

7482

MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));

7483

for (unsigned i = 0; i < MI->getNumOperands(); ++i)

7484

MIB.addOperand(MI->getOperand(i));

7485

MI->eraseFromParent();

7486

return BB;

7487

}

7488

7489

case ARM::tMOVCCr_pseudo: {

7490

// To "insert" a SELECT_CC instruction, we actually have to insert the

7491

// diamond control-flow pattern. The incoming instruction knows the

7492

// destination vreg to set, the condition code register to branch on, the

7493

// true/false values to select between, and a branch opcode to use.

7494

const BasicBlock *LLVM_BB = BB->getBasicBlock();

7495

MachineFunction::iterator It = BB;

7496

++It;

7497

7498

// thisMBB:

7499

// ...

7500

// TrueVal = ...

7501

// cmpTY ccX, r1, r2

7502

// bCC copy1MBB

7503

// fallthrough --> copy0MBB

7504

MachineBasicBlock *thisMBB = BB;

7505

MachineFunction *F = BB->getParent();

7506

MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);

7507

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

7508

F->insert(It, copy0MBB);

7509

F->insert(It, sinkMBB);

7510

7511

// Transfer the remainder of BB and its successor edges to sinkMBB.

7512

sinkMBB->splice(sinkMBB->begin(), BB,

7513

std::next(MachineBasicBlock::iterator(MI)), BB->end());

7514

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

7515

7516

BB->addSuccessor(copy0MBB);

7517

BB->addSuccessor(sinkMBB);

7518

7519

BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)

7520

.addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());

7521

7522

// copy0MBB:

7523

// %FalseValue = ...

7524

// # fallthrough to sinkMBB

7525

BB = copy0MBB;

7526

7527

// Update machine-CFG edges

7528

BB->addSuccessor(sinkMBB);

7529

7530

// sinkMBB:

7531

// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

7532

// ...

7533

BB = sinkMBB;

7534

BuildMI(*BB, BB->begin(), dl,

7535

TII->get(ARM::PHI), MI->getOperand(0).getReg())

7536

.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)

7537

.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);

7538

7539

MI->eraseFromParent(); // The pseudo instruction is gone now.

7540

return BB;

7541

}

7542

7543

case ARM::BCCi64:

7544

case ARM::BCCZi64: {

7545

// If there is an unconditional branch to the other successor, remove it.

7546

BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());

7547

7548

// Compare both parts that make up the double comparison separately for

7549

// equality.

7550

bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;

7551

7552

unsigned LHS1 = MI->getOperand(1).getReg();

7553

unsigned LHS2 = MI->getOperand(2).getReg();

7554

if (RHSisZero) {

7555

AddDefaultPred(BuildMI(BB, dl,

7556

TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))

7557

.addReg(LHS1).addImm(0));

7558

BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))

7559

.addReg(LHS2).addImm(0)

7560

.addImm(ARMCC::EQ).addReg(ARM::CPSR);

7561

} else {

7562

unsigned RHS1 = MI->getOperand(3).getReg();

7563

unsigned RHS2 = MI->getOperand(4).getReg();

7564

AddDefaultPred(BuildMI(BB, dl,

7565

TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))

7566

.addReg(LHS1).addReg(RHS1));

7567

BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))

7568

.addReg(LHS2).addReg(RHS2)

7569

.addImm(ARMCC::EQ).addReg(ARM::CPSR);

7570

}

7571

7572

MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();

7573

MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);

7574

if (MI->getOperand(0).getImm() == ARMCC::NE)

7575

std::swap(destMBB, exitMBB);

7576

7577

BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))

7578

.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);

7579

if (isThumb2)

7580

AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));

7581

else

7582

BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);

7583

7584

MI->eraseFromParent(); // The pseudo instruction is gone now.

7585

return BB;

7586

}

7587

7588

case ARM::Int_eh_sjlj_setjmp:

7589

case ARM::Int_eh_sjlj_setjmp_nofp:

7590

case ARM::tInt_eh_sjlj_setjmp:

7591

case ARM::t2Int_eh_sjlj_setjmp:

7592

case ARM::t2Int_eh_sjlj_setjmp_nofp:

7593

EmitSjLjDispatchBlock(MI, BB);

7594

return BB;

7595

7596

case ARM::ABS:

7597

case ARM::t2ABS: {

7598

// To insert an ABS instruction, we have to insert the

7599

// diamond control-flow pattern. The incoming instruction knows the

7600

// source vreg to test against 0, the destination vreg to set,

7601

// the condition code register to branch on, the

7602

// true/false values to select between, and a branch opcode to use.

7603

// It transforms

7604

// V1 = ABS V0

7605

// into

7606

// V2 = MOVS V0

7607

// BCC (branch to SinkBB if V0 >= 0)

7608

// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)

7609

// SinkBB: V1 = PHI(V2, V3)

7610

const BasicBlock *LLVM_BB = BB->getBasicBlock();

7611

MachineFunction::iterator BBI = BB;

7612

++BBI;

7613

MachineFunction *Fn = BB->getParent();

7614

MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);

7615

MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);

7616

Fn->insert(BBI, RSBBB);

7617

Fn->insert(BBI, SinkBB);

7618

7619

unsigned int ABSSrcReg = MI->getOperand(1).getReg();

7620

unsigned int ABSDstReg = MI->getOperand(0).getReg();

7621

bool isThumb2 = Subtarget->isThumb2();

7622

MachineRegisterInfo &MRI = Fn->getRegInfo();

7623

// In Thumb mode S must not be specified if source register is the SP or

7624

// PC and if destination register is the SP, so restrict register class

7625

unsigned NewRsbDstReg =

7626

MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);

7627

7628

// Transfer the remainder of BB and its successor edges to sinkMBB.

7629

SinkBB->splice(SinkBB->begin(), BB,

7630

std::next(MachineBasicBlock::iterator(MI)), BB->end());

7631

SinkBB->transferSuccessorsAndUpdatePHIs(BB);

7632

7633

BB->addSuccessor(RSBBB);

7634

BB->addSuccessor(SinkBB);

7635

7636

// fall through to SinkMBB

7637

RSBBB->addSuccessor(SinkBB);

7638

7639

// insert a cmp at the end of BB

7640

AddDefaultPred(BuildMI(BB, dl,

7641

TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))

7642

.addReg(ABSSrcReg).addImm(0));

7643

7644

// insert a bcc with opposite CC to ARMCC::MI at the end of BB

7645

BuildMI(BB, dl,

7646

TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)

7647

.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);

7648

7649

// insert rsbri in RSBBB

7650

// Note: BCC and rsbri will be converted into predicated rsbmi

7651

// by if-conversion pass

7652

BuildMI(*RSBBB, RSBBB->begin(), dl,

7653

TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)

7654

.addReg(ABSSrcReg, RegState::Kill)

7655

.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);

7656

7657

// insert PHI in SinkBB,

7658

// reuse ABSDstReg to not change uses of ABS instruction

7659

BuildMI(*SinkBB, SinkBB->begin(), dl,

7660

TII->get(ARM::PHI), ABSDstReg)

7661

.addReg(NewRsbDstReg).addMBB(RSBBB)

7662

.addReg(ABSSrcReg).addMBB(BB);

7663

7664

// remove ABS instruction

7665

MI->eraseFromParent();

7666

7667

// return last added BB

7668

return SinkBB;

7669

}

7670

case ARM::COPY_STRUCT_BYVAL_I32:

7671

++NumLoopByVals;

7672

return EmitStructByval(MI, BB);

7673

case ARM::WIN__CHKSTK:

7674

return EmitLowered__chkstk(MI, BB);

7675

}

7676

}

7677

7678

void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,

7679

SDNode *Node) const {

7680

const MCInstrDesc *MCID = &MI->getDesc();

7681

// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,

7682

// RSC. Coming out of isel, they have an implicit CPSR def, but the optional

7683

// operand is still set to noreg. If needed, set the optional operand's

7684

// register to CPSR, and remove the redundant implicit def.

7685

7686

// e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).

7687

7688

// Rename pseudo opcodes.

7689

unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());

7690

if (NewOpc) {

7691

const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(

7692

getTargetMachine().getSubtargetImpl()->getInstrInfo());

7693

MCID = &TII->get(NewOpc);

7694

7695

assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&((MCID->getNumOperands() == MI->getDesc().getNumOperands
() + 1 && "converted opcode should be the same except for cc_out"
) ? static_cast<void> (0) : __assert_fail ("MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && \"converted opcode should be the same except for cc_out\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7696, __PRETTY_FUNCTION__))

7696

"converted opcode should be the same except for cc_out")((MCID->getNumOperands() == MI->getDesc().getNumOperands
() + 1 && "converted opcode should be the same except for cc_out"
) ? static_cast<void> (0) : __assert_fail ("MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && \"converted opcode should be the same except for cc_out\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7696, __PRETTY_FUNCTION__));

7697

7698

MI->setDesc(*MCID);

7699

7700

// Add the optional cc_out operand

7701

MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));

7702

}

7703

unsigned ccOutIdx = MCID->getNumOperands() - 1;

7704

7705

// Any ARM instruction that sets the 's' bit should specify an optional

7706

// "cc_out" operand in the last operand position.

7707

if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {

7708

assert(!NewOpc && "Optional cc_out operand required")((!NewOpc && "Optional cc_out operand required") ? static_cast
<void> (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7708, __PRETTY_FUNCTION__));

7709

return;

7710

}

7711

// Look for an implicit def of CPSR added by MachineInstr ctor. Remove it

7712

// since we already have an optional CPSR def.

7713

bool definesCPSR = false;

7714

bool deadCPSR = false;

7715

for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();

7716

i != e; ++i) {

7717

const MachineOperand &MO = MI->getOperand(i);

7718

if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {

7719

definesCPSR = true;

7720

if (MO.isDead())

7721

deadCPSR = true;

7722

MI->RemoveOperand(i);

7723

break;

7724

}

7725

}

7726

if (!definesCPSR) {

7727

7728

return;

7729

}

7730

assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag")((deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"
) ? static_cast<void> (0) : __assert_fail ("deadCPSR == !Node->hasAnyUseOfValue(1) && \"inconsistent dead flag\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7730, __PRETTY_FUNCTION__));

7731

if (deadCPSR) {

7732

assert(!MI->getOperand(ccOutIdx).getReg() &&((!MI->getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"
) ? static_cast<void> (0) : __assert_fail ("!MI->getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7733, __PRETTY_FUNCTION__))

7733

"expect uninitialized optional cc_out operand")((!MI->getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"
) ? static_cast<void> (0) : __assert_fail ("!MI->getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7733, __PRETTY_FUNCTION__));

7734

return;

7735

}

7736

7737

// If this instruction was defined with an optional CPSR def and its dag node

7738

// had a live implicit CPSR def, then activate the optional CPSR def.

7739

MachineOperand &MO = MI->getOperand(ccOutIdx);

7740

MO.setReg(ARM::CPSR);

7741

MO.setIsDef(true);

7742

}

7743

7744

//===----------------------------------------------------------------------===//

7745

// ARM Optimization Hooks

7746

//===----------------------------------------------------------------------===//

7747

7748

// Helper function that checks if N is a null or all ones constant.

7749

static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {

7750

ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);

7751

if (!C)

7752

return false;

7753

return AllOnes ? C->isAllOnesValue() : C->isNullValue();

7754

}

7755

7756

// Return true if N is conditionally 0 or all ones.

7757

// Detects these expressions where cc is an i1 value:

7758

7759

// (select cc 0, y) [AllOnes=0]

7760

// (select cc y, 0) [AllOnes=0]

7761

// (zext cc) [AllOnes=0]

7762

// (sext cc) [AllOnes=0/1]

7763

// (select cc -1, y) [AllOnes=1]

7764

// (select cc y, -1) [AllOnes=1]

7765

7766

// Invert is set when N is the null/all ones constant when CC is false.

7767

// OtherOp is set to the alternative value of N.

7768

static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,

7769

SDValue &CC, bool &Invert,

7770

SDValue &OtherOp,

7771

SelectionDAG &DAG) {

7772

switch (N->getOpcode()) {

7773

default: return false;

7774

case ISD::SELECT: {

7775

CC = N->getOperand(0);

7776

SDValue N1 = N->getOperand(1);

7777

SDValue N2 = N->getOperand(2);

7778

if (isZeroOrAllOnes(N1, AllOnes)) {

7779

Invert = false;

7780

OtherOp = N2;

7781

return true;

7782

}

7783

if (isZeroOrAllOnes(N2, AllOnes)) {

7784

Invert = true;

7785

OtherOp = N1;

7786

return true;

7787

}

7788

return false;

7789

}

7790

case ISD::ZERO_EXTEND:

7791

// (zext cc) can never be the all ones value.

7792

if (AllOnes)

7793

return false;

7794

// Fall through.

7795

case ISD::SIGN_EXTEND: {

7796

EVT VT = N->getValueType(0);

7797

CC = N->getOperand(0);

7798

if (CC.getValueType() != MVT::i1)

7799

return false;

7800

Invert = !AllOnes;

7801

if (AllOnes)

7802

// When looking for an AllOnes constant, N is an sext, and the 'other'

7803

// value is 0.

7804

OtherOp = DAG.getConstant(0, VT);

7805

else if (N->getOpcode() == ISD::ZERO_EXTEND)

7806

// When looking for a 0 constant, N can be zext or sext.

7807

OtherOp = DAG.getConstant(1, VT);

7808

else

7809

OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);

7810

return true;

7811

}

7812

}

7813

}

7814

7815

// Combine a constant select operand into its use:

7816

7817

// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))

7818

// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))

7819

// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]

7820

// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))

7821

// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))

7822

7823

// The transform is rejected if the select doesn't have a constant operand that

7824

// is null, or all ones when AllOnes is set.

7825

7826

// Also recognize sext/zext from i1:

7827

7828

// (add (zext cc), x) -> (select cc (add x, 1), x)

7829

// (add (sext cc), x) -> (select cc (add x, -1), x)

7830

7831

// These transformations eventually create predicated instructions.

7832

7833

// @param N The node to transform.

7834

// @param Slct The N operand that is a select.

7835

// @param OtherOp The other N operand (x above).

7836

// @param DCI Context.

7837

// @param AllOnes Require the select constant to be all ones instead of null.

7838

// @returns The new node, or SDValue() on failure.

7839

static

7840

SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,

7841

TargetLowering::DAGCombinerInfo &DCI,

7842

bool AllOnes = false) {

7843

SelectionDAG &DAG = DCI.DAG;

7844

EVT VT = N->getValueType(0);

7845

SDValue NonConstantVal;

7846

SDValue CCOp;

7847

bool SwapSelectOps;

7848

if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,

7849

NonConstantVal, DAG))

7850

return SDValue();

7851

7852

// Slct is now know to be the desired identity constant when CC is true.

7853

SDValue TrueVal = OtherOp;

7854

SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,

7855

OtherOp, NonConstantVal);

7856

// Unless SwapSelectOps says CC should be false.

7857

if (SwapSelectOps)

7858

std::swap(TrueVal, FalseVal);

7859

7860

return DAG.getNode(ISD::SELECT, SDLoc(N), VT,

7861

CCOp, TrueVal, FalseVal);

7862

}

7863

7864

// Attempt combineSelectAndUse on each operand of a commutative operator N.

7865

static

7866

SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,

7867

TargetLowering::DAGCombinerInfo &DCI) {

7868

SDValue N0 = N->getOperand(0);

7869

SDValue N1 = N->getOperand(1);

7870

if (N0.getNode()->hasOneUse()) {

7871

SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);

7872

if (Result.getNode())

7873

return Result;

7874

}

7875

if (N1.getNode()->hasOneUse()) {

7876

SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);

7877

if (Result.getNode())

7878

return Result;

7879

}

7880

return SDValue();

7881

}

7882

7883

// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction

7884

// (only after legalization).

7885

static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,

7886

TargetLowering::DAGCombinerInfo &DCI,

7887

const ARMSubtarget *Subtarget) {

7888

7889

// Only perform optimization if after legalize, and if NEON is available. We

7890

// also expected both operands to be BUILD_VECTORs.

7891

if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()

7892

|| N0.getOpcode() != ISD::BUILD_VECTOR

7893

|| N1.getOpcode() != ISD::BUILD_VECTOR)

7894

return SDValue();

7895

7896

// Check output type since VPADDL operand elements can only be 8, 16, or 32.

7897

EVT VT = N->getValueType(0);

7898

if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)

7899

return SDValue();

7900

7901

// Check that the vector operands are of the right form.

7902

// N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR

7903

// operands, where N is the size of the formed vector.

7904

// Each EXTRACT_VECTOR should have the same input vector and odd or even

7905

// index such that we have a pair wise add pattern.

7906

7907

// Grab the vector that all EXTRACT_VECTOR nodes should be referencing.

7908

if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)

7909

return SDValue();

7910

SDValue Vec = N0->getOperand(0)->getOperand(0);

7911

SDNode *V = Vec.getNode();

7912

unsigned nextIndex = 0;

7913

7914

// For each operands to the ADD which are BUILD_VECTORs,

7915

// check to see if each of their operands are an EXTRACT_VECTOR with

7916

// the same vector and appropriate index.

7917

for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {

7918

if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT

7919

&& N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

7920

7921

SDValue ExtVec0 = N0->getOperand(i);

7922

SDValue ExtVec1 = N1->getOperand(i);

7923

7924

// First operand is the vector, verify its the same.

7925

if (V != ExtVec0->getOperand(0).getNode() ||

7926

V != ExtVec1->getOperand(0).getNode())

7927

return SDValue();

7928

7929

// Second is the constant, verify its correct.

7930

ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));

7931

ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));

7932

7933

// For the constant, we want to see all the even or all the odd.

7934

if (!C0 || !C1 || C0->getZExtValue() != nextIndex

7935

|| C1->getZExtValue() != nextIndex+1)

7936

return SDValue();

7937

7938

// Increment index.

7939

nextIndex+=2;

7940

} else

7941

return SDValue();

7942

}

7943

7944

// Create VPADDL node.

7945

SelectionDAG &DAG = DCI.DAG;

7946

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

7947

7948

// Build operand list.

7949

SmallVector<SDValue, 8> Ops;

7950

Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,

7951

TLI.getPointerTy()));

7952

7953

// Input is the vector.

7954

Ops.push_back(Vec);

7955

7956

// Get widened type and narrowed type.

7957

MVT widenType;

7958

unsigned numElem = VT.getVectorNumElements();

7959

7960

EVT inputLaneType = Vec.getValueType().getVectorElementType();

7961

switch (inputLaneType.getSimpleVT().SimpleTy) {

7962

case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;

7963

case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;

7964

case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;

7965

default:

7966

llvm_unreachable("Invalid vector element type for padd optimization.")::llvm::llvm_unreachable_internal("Invalid vector element type for padd optimization."
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 7966);

7967

}

7968

7969

SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);

7970

unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;

7971

return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);

7972

}

7973

7974

static SDValue findMUL_LOHI(SDValue V) {

7975

if (V->getOpcode() == ISD::UMUL_LOHI ||

7976

V->getOpcode() == ISD::SMUL_LOHI)

7977

return V;

7978

return SDValue();

7979

}

7980

7981

static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,

7982

TargetLowering::DAGCombinerInfo &DCI,

7983

const ARMSubtarget *Subtarget) {

7984

7985

if (Subtarget->isThumb1Only()) return SDValue();

7986

7987

// Only perform the checks after legalize when the pattern is available.

7988

if (DCI.isBeforeLegalize()) return SDValue();

7989

7990

// Look for multiply add opportunities.

7991

// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where

7992

// each add nodes consumes a value from ISD::UMUL_LOHI and there is

7993

// a glue link from the first add to the second add.

7994

// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by

7995

// a S/UMLAL instruction.

7996

// loAdd UMUL_LOHI

7997

// \ / :lo \ :hi

7998

// \ / \ [no multiline comment]

7999

// ADDC | hiAdd

8000

// \ :glue / /

8001

// \ / /

8002

// ADDE

8003

8004

assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC")((AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"
) ? static_cast<void> (0) : __assert_fail ("AddcNode->getOpcode() == ISD::ADDC && \"Expect an ADDC\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8004, __PRETTY_FUNCTION__));

8005

SDValue AddcOp0 = AddcNode->getOperand(0);

8006

SDValue AddcOp1 = AddcNode->getOperand(1);

8007

8008

// Check if the two operands are from the same mul_lohi node.

8009

if (AddcOp0.getNode() == AddcOp1.getNode())

8010

return SDValue();

8011

8012

assert(AddcNode->getNumValues() == 2 &&((AddcNode->getNumValues() == 2 && AddcNode->getValueType
(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8014, __PRETTY_FUNCTION__))

8013

AddcNode->getValueType(0) == MVT::i32 &&((AddcNode->getNumValues() == 2 && AddcNode->getValueType
(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8014, __PRETTY_FUNCTION__))

8014

"Expect ADDC with two result values. First: i32")((AddcNode->getNumValues() == 2 && AddcNode->getValueType
(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8014, __PRETTY_FUNCTION__));

8015

8016

// Check that we have a glued ADDC node.

8017

if (AddcNode->getValueType(1) != MVT::Glue)

8018

return SDValue();

8019

8020

// Check that the ADDC adds the low result of the S/UMUL_LOHI.

8021

if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&

8022

AddcOp0->getOpcode() != ISD::SMUL_LOHI &&

8023

AddcOp1->getOpcode() != ISD::UMUL_LOHI &&

8024

AddcOp1->getOpcode() != ISD::SMUL_LOHI)

8025

return SDValue();

8026

8027

// Look for the glued ADDE.

8028

SDNode* AddeNode = AddcNode->getGluedUser();

8029

if (!AddeNode)

8030

return SDValue();

8031

8032

// Make sure it is really an ADDE.

8033

if (AddeNode->getOpcode() != ISD::ADDE)

8034

return SDValue();

8035

8036

assert(AddeNode->getNumOperands() == 3 &&((AddeNode->getNumOperands() == 3 && AddeNode->
getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeNode->getNumOperands() == 3 && AddeNode->getOperand(2).getValueType() == MVT::Glue && \"ADDE node has the wrong inputs\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8038, __PRETTY_FUNCTION__))

8037

AddeNode->getOperand(2).getValueType() == MVT::Glue &&((AddeNode->getNumOperands() == 3 && AddeNode->
getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeNode->getNumOperands() == 3 && AddeNode->getOperand(2).getValueType() == MVT::Glue && \"ADDE node has the wrong inputs\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8038, __PRETTY_FUNCTION__))

8038

"ADDE node has the wrong inputs")((AddeNode->getNumOperands() == 3 && AddeNode->
getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeNode->getNumOperands() == 3 && AddeNode->getOperand(2).getValueType() == MVT::Glue && \"ADDE node has the wrong inputs\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8038, __PRETTY_FUNCTION__));

8039

8040

// Check for the triangle shape.

8041

SDValue AddeOp0 = AddeNode->getOperand(0);

8042

SDValue AddeOp1 = AddeNode->getOperand(1);

8043

8044

// Make sure that the ADDE operands are not coming from the same node.

8045

if (AddeOp0.getNode() == AddeOp1.getNode())

8046

return SDValue();

8047

8048

// Find the MUL_LOHI node walking up ADDE's operands.

8049

bool IsLeftOperandMUL = false;

8050

SDValue MULOp = findMUL_LOHI(AddeOp0);

8051

if (MULOp == SDValue())

8052

MULOp = findMUL_LOHI(AddeOp1);

8053

else

8054

IsLeftOperandMUL = true;

8055

if (MULOp == SDValue())

8056

return SDValue();

8057

8058

// Figure out the right opcode.

8059

unsigned Opc = MULOp->getOpcode();

8060

unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;

8061

8062

// Figure out the high and low input values to the MLAL node.

8063

SDValue* HiMul = &MULOp;

8064

SDValue* HiAdd = nullptr;

8065

SDValue* LoMul = nullptr;

8066

SDValue* LowAdd = nullptr;

8067

8068

if (IsLeftOperandMUL)

8069

HiAdd = &AddeOp1;

8070

else

8071

HiAdd = &AddeOp0;

8072

8073

8074

if (AddcOp0->getOpcode() == Opc) {

8075

LoMul = &AddcOp0;

8076

LowAdd = &AddcOp1;

8077

}

8078

if (AddcOp1->getOpcode() == Opc) {

8079

LoMul = &AddcOp1;

8080

LowAdd = &AddcOp0;

8081

}

8082

8083

if (!LoMul)

8084

return SDValue();

8085

8086

if (LoMul->getNode() != HiMul->getNode())

8087

return SDValue();

8088

8089

// Create the merged node.

8090

SelectionDAG &DAG = DCI.DAG;

8091

8092

// Build operand list.

8093

SmallVector<SDValue, 8> Ops;

8094

Ops.push_back(LoMul->getOperand(0));

8095

Ops.push_back(LoMul->getOperand(1));

8096

Ops.push_back(*LowAdd);

8097

Ops.push_back(*HiAdd);

8098

8099

SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),

8100

DAG.getVTList(MVT::i32, MVT::i32), Ops);

8101

8102

// Replace the ADDs' nodes uses by the MLA node's values.

8103

SDValue HiMLALResult(MLALNode.getNode(), 1);

8104

DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);

8105

8106

SDValue LoMLALResult(MLALNode.getNode(), 0);

8107

DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);

8108

8109

// Return original node to notify the driver to stop replacing.

8110

SDValue resNode(AddcNode, 0);

8111

return resNode;

8112

}

8113

8114

/// PerformADDCCombine - Target-specific dag combine transform from

8115

/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.

8116

static SDValue PerformADDCCombine(SDNode *N,

8117

TargetLowering::DAGCombinerInfo &DCI,

8118

const ARMSubtarget *Subtarget) {

8119

8120

return AddCombineTo64bitMLAL(N, DCI, Subtarget);

8121

8122

}

8123

8124

/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with

8125

/// operands N0 and N1. This is a helper for PerformADDCombine that is

8126

/// called with the default operands, and if that fails, with commuted

8127

/// operands.

8128

static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,

8129

TargetLowering::DAGCombinerInfo &DCI,

8130

const ARMSubtarget *Subtarget){

8131

8132

// Attempt to create vpaddl for this add.

8133

SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);

8134

if (Result.getNode())

8135

return Result;

8136

8137

// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))

8138

if (N0.getNode()->hasOneUse()) {

8139

SDValue Result = combineSelectAndUse(N, N0, N1, DCI);

8140

if (Result.getNode()) return Result;

8141

}

8142

return SDValue();

8143

}

8144

8145

/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.

8146

///

8147

static SDValue PerformADDCombine(SDNode *N,

8148

TargetLowering::DAGCombinerInfo &DCI,

8149

const ARMSubtarget *Subtarget) {

8150

SDValue N0 = N->getOperand(0);

8151

SDValue N1 = N->getOperand(1);

8152

8153

// First try with the default operand order.

8154

SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);

8155

if (Result.getNode())

8156

return Result;

8157

8158

// If that didn't work, try again with the operands commuted.

8159

return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);

8160

}

8161

8162

/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.

8163

///

8164

static SDValue PerformSUBCombine(SDNode *N,

8165

TargetLowering::DAGCombinerInfo &DCI) {

8166

SDValue N0 = N->getOperand(0);

8167

SDValue N1 = N->getOperand(1);

8168

8169

// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))

8170

if (N1.getNode()->hasOneUse()) {

8171

SDValue Result = combineSelectAndUse(N, N1, N0, DCI);

8172

if (Result.getNode()) return Result;

8173

}

8174

8175

return SDValue();

8176

}

8177

8178

/// PerformVMULCombine

8179

/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the

8180

/// special multiplier accumulator forwarding.

8181

/// vmul d3, d0, d2

8182

/// vmla d3, d1, d2

8183

/// is faster than

8184

/// vadd d3, d0, d1

8185

/// vmul d3, d3, d2

8186

// However, for (A + B) * (A + B),

8187

// vadd d2, d0, d1

8188

// vmul d3, d0, d2

8189

// vmla d3, d1, d2

8190

// is slower than

8191

// vadd d2, d0, d1

8192

// vmul d3, d2, d2

8193

static SDValue PerformVMULCombine(SDNode *N,

8194

TargetLowering::DAGCombinerInfo &DCI,

8195

const ARMSubtarget *Subtarget) {

8196

if (!Subtarget->hasVMLxForwarding())

8197

return SDValue();

8198

8199

SelectionDAG &DAG = DCI.DAG;

8200

SDValue N0 = N->getOperand(0);

8201

SDValue N1 = N->getOperand(1);

8202

unsigned Opcode = N0.getOpcode();

8203

if (Opcode != ISD::ADD && Opcode != ISD::SUB &&

8204

Opcode != ISD::FADD && Opcode != ISD::FSUB) {

8205

Opcode = N1.getOpcode();

8206

if (Opcode != ISD::ADD && Opcode != ISD::SUB &&

8207

Opcode != ISD::FADD && Opcode != ISD::FSUB)

8208

return SDValue();

8209

std::swap(N0, N1);

8210

}

8211

8212

if (N0 == N1)

8213

return SDValue();

8214

8215

EVT VT = N->getValueType(0);

8216

SDLoc DL(N);

8217

SDValue N00 = N0->getOperand(0);

8218

SDValue N01 = N0->getOperand(1);

8219

return DAG.getNode(Opcode, DL, VT,

8220

DAG.getNode(ISD::MUL, DL, VT, N00, N1),

8221

DAG.getNode(ISD::MUL, DL, VT, N01, N1));

8222

}

8223

8224

static SDValue PerformMULCombine(SDNode *N,

8225

TargetLowering::DAGCombinerInfo &DCI,

8226

const ARMSubtarget *Subtarget) {

8227

SelectionDAG &DAG = DCI.DAG;

8228

8229

if (Subtarget->isThumb1Only())

8230

return SDValue();

8231

8232

if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

8233

return SDValue();

8234

8235

EVT VT = N->getValueType(0);

8236

if (VT.is64BitVector() || VT.is128BitVector())

8237

return PerformVMULCombine(N, DCI, Subtarget);

8238

if (VT != MVT::i32)

8239

return SDValue();

8240

8241

ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));

8242

if (!C)

8243

return SDValue();

8244

8245

int64_t MulAmt = C->getSExtValue();

8246

unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);

8247

8248

ShiftAmt = ShiftAmt & (32 - 1);

8249

SDValue V = N->getOperand(0);

8250

SDLoc DL(N);

8251

8252

SDValue Res;

8253

MulAmt >>= ShiftAmt;

8254

8255

if (MulAmt >= 0) {

8256

if (isPowerOf2_32(MulAmt - 1)) {

8257

// (mul x, 2^N + 1) => (add (shl x, N), x)

8258

Res = DAG.getNode(ISD::ADD, DL, VT,

8259

8260

DAG.getNode(ISD::SHL, DL, VT,

8261

8262

DAG.getConstant(Log2_32(MulAmt - 1),

8263

MVT::i32)));

8264

} else if (isPowerOf2_32(MulAmt + 1)) {

8265

// (mul x, 2^N - 1) => (sub (shl x, N), x)

8266

Res = DAG.getNode(ISD::SUB, DL, VT,

8267

DAG.getNode(ISD::SHL, DL, VT,

8268

8269

DAG.getConstant(Log2_32(MulAmt + 1),

8270

MVT::i32)),

8271

V);

8272

} else

8273

return SDValue();

8274

} else {

8275

uint64_t MulAmtAbs = -MulAmt;

8276

if (isPowerOf2_32(MulAmtAbs + 1)) {

8277

// (mul x, -(2^N - 1)) => (sub x, (shl x, N))

8278

Res = DAG.getNode(ISD::SUB, DL, VT,

8279

8280

DAG.getNode(ISD::SHL, DL, VT,

8281

8282

DAG.getConstant(Log2_32(MulAmtAbs + 1),

8283

MVT::i32)));

8284

} else if (isPowerOf2_32(MulAmtAbs - 1)) {

8285

// (mul x, -(2^N + 1)) => - (add (shl x, N), x)

8286

Res = DAG.getNode(ISD::ADD, DL, VT,

8287

8288

DAG.getNode(ISD::SHL, DL, VT,

8289

8290

DAG.getConstant(Log2_32(MulAmtAbs-1),

8291

MVT::i32)));

8292

Res = DAG.getNode(ISD::SUB, DL, VT,

8293

DAG.getConstant(0, MVT::i32),Res);

8294

8295

} else

8296

return SDValue();

8297

}

8298

8299

if (ShiftAmt != 0)

8300

Res = DAG.getNode(ISD::SHL, DL, VT,

8301

Res, DAG.getConstant(ShiftAmt, MVT::i32));

8302

8303

// Do not add new nodes to DAG combiner worklist.

8304

DCI.CombineTo(N, Res, false);

8305

return SDValue();

8306

}

8307

8308

static SDValue PerformANDCombine(SDNode *N,

8309

TargetLowering::DAGCombinerInfo &DCI,

8310

const ARMSubtarget *Subtarget) {

8311

8312

// Attempt to use immediate-form VBIC

8313

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));

8314

SDLoc dl(N);

8315

EVT VT = N->getValueType(0);

8316

SelectionDAG &DAG = DCI.DAG;

8317

8318

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))

8319

return SDValue();

8320

8321

APInt SplatBits, SplatUndef;

8322

unsigned SplatBitSize;

8323

bool HasAnyUndefs;

8324

if (BVN &&

8325

BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

8326

if (SplatBitSize <= 64) {

8327

EVT VbicVT;

8328

SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),

8329

SplatUndef.getZExtValue(), SplatBitSize,

8330

DAG, VbicVT, VT.is128BitVector(),

8331

OtherModImm);

8332

if (Val.getNode()) {

8333

SDValue Input =

8334

DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));

8335

SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);

8336

return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);

8337

}

8338

}

8339

}

8340

8341

if (!Subtarget->isThumb1Only()) {

8342

// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))

8343

SDValue Result = combineSelectAndUseCommutative(N, true, DCI);

8344

if (Result.getNode())

8345

return Result;

8346

}

8347

8348

return SDValue();

8349

}

8350

8351

/// PerformORCombine - Target-specific dag combine xforms for ISD::OR

8352

static SDValue PerformORCombine(SDNode *N,

8353

TargetLowering::DAGCombinerInfo &DCI,

8354

const ARMSubtarget *Subtarget) {

8355

// Attempt to use immediate-form VORR

8356

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));

8357

SDLoc dl(N);

8358

EVT VT = N->getValueType(0);

8359

SelectionDAG &DAG = DCI.DAG;

8360

8361

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))

8362

return SDValue();

8363

8364

APInt SplatBits, SplatUndef;

8365

unsigned SplatBitSize;

8366

bool HasAnyUndefs;

8367

if (BVN && Subtarget->hasNEON() &&

8368

BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

8369

if (SplatBitSize <= 64) {

8370

EVT VorrVT;

8371

SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),

8372

SplatUndef.getZExtValue(), SplatBitSize,

8373

DAG, VorrVT, VT.is128BitVector(),

8374

OtherModImm);

8375

if (Val.getNode()) {

8376

SDValue Input =

8377

DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));

8378

SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);

8379

return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);

8380

}

8381

}

8382

}

8383

8384

if (!Subtarget->isThumb1Only()) {

8385

// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))

8386

SDValue Result = combineSelectAndUseCommutative(N, false, DCI);

8387

if (Result.getNode())

8388

return Result;

8389

}

8390

8391

// The code below optimizes (or (and X, Y), Z).

8392

// The AND operand needs to have a single user to make these optimizations

8393

// profitable.

8394

SDValue N0 = N->getOperand(0);

8395

if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())

8396

return SDValue();

8397

SDValue N1 = N->getOperand(1);

8398

8399

// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.

8400

if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&

8401

DAG.getTargetLoweringInfo().isTypeLegal(VT)) {

8402

APInt SplatUndef;

8403

unsigned SplatBitSize;

8404

bool HasAnyUndefs;

8405

8406

APInt SplatBits0, SplatBits1;

8407

BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));

8408

BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));

8409

// Ensure that the second operand of both ands are constants

8410

if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,

8411

HasAnyUndefs) && !HasAnyUndefs) {

8412

if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,

8413

HasAnyUndefs) && !HasAnyUndefs) {

8414

// Ensure that the bit width of the constants are the same and that

8415

// the splat arguments are logical inverses as per the pattern we

8416

// are trying to simplify.

8417

if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&

8418

SplatBits0 == ~SplatBits1) {

8419

// Canonicalize the vector type to make instruction selection

8420

// simpler.

8421

EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;

8422

SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,

8423

N0->getOperand(1),

8424

N0->getOperand(0),

8425

N1->getOperand(0));

8426

return DAG.getNode(ISD::BITCAST, dl, VT, Result);

8427

}

8428

}

8429

}

8430

}

8431

8432

// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when

8433

// reasonable.

8434

8435

// BFI is only available on V6T2+

8436

if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())

8437

return SDValue();

8438

8439

SDLoc DL(N);

8440

// 1) or (and A, mask), val => ARMbfi A, val, mask

8441

// iff (val & mask) == val

8442

8443

// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask

8444

// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)

8445

// && mask == ~mask2

8446

// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)

8447

// && ~mask == mask2

8448

// (i.e., copy a bitfield value into another bitfield of the same width)

8449

8450

if (VT != MVT::i32)

8451

return SDValue();

8452

8453

SDValue N00 = N0.getOperand(0);

8454

8455

// The value and the mask need to be constants so we can verify this is

8456

// actually a bitfield set. If the mask is 0xffff, we can do better

8457

// via a movt instruction, so don't use BFI in that case.

8458

SDValue MaskOp = N0.getOperand(1);

8459

ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);

8460

if (!MaskC)

8461

return SDValue();

8462

unsigned Mask = MaskC->getZExtValue();

8463

if (Mask == 0xffff)

8464

return SDValue();

8465

SDValue Res;

8466

// Case (1): or (and A, mask), val => ARMbfi A, val, mask

8467

ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

8468

if (N1C) {

8469

unsigned Val = N1C->getZExtValue();

8470

if ((Val & ~Mask) != Val)

8471

return SDValue();

8472

8473

if (ARM::isBitFieldInvertedMask(Mask)) {

8474

Val >>= countTrailingZeros(~Mask);

8475

8476

Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,

8477

DAG.getConstant(Val, MVT::i32),

8478

DAG.getConstant(Mask, MVT::i32));

8479

8480

// Do not add new nodes to DAG combiner worklist.

8481

DCI.CombineTo(N, Res, false);

8482

return SDValue();

8483

}

8484

} else if (N1.getOpcode() == ISD::AND) {

8485

// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask

8486

ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));

8487

if (!N11C)

8488

return SDValue();

8489

unsigned Mask2 = N11C->getZExtValue();

8490

8491

// Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern

8492

// as is to match.

8493

if (ARM::isBitFieldInvertedMask(Mask) &&

8494

(Mask == ~Mask2)) {

8495

// The pack halfword instruction works better for masks that fit it,

8496

// so use that when it's available.

8497

if (Subtarget->hasT2ExtractPack() &&

8498

(Mask == 0xffff || Mask == 0xffff0000))

8499

return SDValue();

8500

// 2a

8501

unsigned amt = countTrailingZeros(Mask2);

8502

Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),

8503

DAG.getConstant(amt, MVT::i32));

8504

Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,

8505

DAG.getConstant(Mask, MVT::i32));

8506

// Do not add new nodes to DAG combiner worklist.

8507

DCI.CombineTo(N, Res, false);

8508

return SDValue();

8509

} else if (ARM::isBitFieldInvertedMask(~Mask) &&

8510

(~Mask == Mask2)) {

8511

// The pack halfword instruction works better for masks that fit it,

8512

// so use that when it's available.

8513

if (Subtarget->hasT2ExtractPack() &&

8514

(Mask2 == 0xffff || Mask2 == 0xffff0000))

8515

return SDValue();

8516

// 2b

8517

unsigned lsb = countTrailingZeros(Mask);

8518

Res = DAG.getNode(ISD::SRL, DL, VT, N00,

8519

DAG.getConstant(lsb, MVT::i32));

8520

Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,

8521

DAG.getConstant(Mask2, MVT::i32));

8522

// Do not add new nodes to DAG combiner worklist.

8523

DCI.CombineTo(N, Res, false);

8524

return SDValue();

8525

}

8526

}

8527

8528

if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&

8529

N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&

8530

ARM::isBitFieldInvertedMask(~Mask)) {

8531

// Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask

8532

// where lsb(mask) == #shamt and masked bits of B are known zero.

8533

SDValue ShAmt = N00.getOperand(1);

8534

unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();

8535

unsigned LSB = countTrailingZeros(Mask);

8536

if (ShAmtC != LSB)

8537

return SDValue();

8538

8539

Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),

8540

DAG.getConstant(~Mask, MVT::i32));

8541

8542

// Do not add new nodes to DAG combiner worklist.

8543

DCI.CombineTo(N, Res, false);

8544

}

8545

8546

return SDValue();

8547

}

8548

8549

static SDValue PerformXORCombine(SDNode *N,

8550

TargetLowering::DAGCombinerInfo &DCI,

8551

const ARMSubtarget *Subtarget) {

8552

EVT VT = N->getValueType(0);

8553

SelectionDAG &DAG = DCI.DAG;

8554

8555

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))

8556

return SDValue();

8557

8558

if (!Subtarget->isThumb1Only()) {

8559

// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))

8560

SDValue Result = combineSelectAndUseCommutative(N, false, DCI);

8561

if (Result.getNode())

8562

return Result;

8563

}

8564

8565

return SDValue();

8566

}

8567

8568

/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff

8569

/// the bits being cleared by the AND are not demanded by the BFI.

8570

static SDValue PerformBFICombine(SDNode *N,

8571

TargetLowering::DAGCombinerInfo &DCI) {

8572

SDValue N1 = N->getOperand(1);

8573

if (N1.getOpcode() == ISD::AND) {

Taking true branch

→

8574

ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));

8575

if (!N11C)

←

Assuming 'N11C' is non-null

→

←

Taking false branch

→

8576

return SDValue();

8577

unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();

8578

unsigned LSB = countTrailingZeros(~InvMask);

8579

unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;

8580

unsigned Mask = (1 << Width)-1;

←

The result of the '<<' expression is undefined

8581

unsigned Mask2 = N11C->getZExtValue();

8582

if ((Mask & (~Mask2)) == 0)

8583

return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),

8584

N->getOperand(0), N1.getOperand(0),

8585

N->getOperand(2));

8586

}

8587

return SDValue();

8588

}

8589

8590

/// PerformVMOVRRDCombine - Target-specific dag combine xforms for

8591

/// ARMISD::VMOVRRD.

8592

static SDValue PerformVMOVRRDCombine(SDNode *N,

8593

TargetLowering::DAGCombinerInfo &DCI,

8594

const ARMSubtarget *Subtarget) {

8595

// vmovrrd(vmovdrr x, y) -> x,y

8596

SDValue InDouble = N->getOperand(0);

8597

if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())

8598

return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));

8599

8600

// vmovrrd(load f64) -> (load i32), (load i32)

8601

SDNode *InNode = InDouble.getNode();

8602

if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&

8603

InNode->getValueType(0) == MVT::f64 &&

8604

InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&

8605

!cast<LoadSDNode>(InNode)->isVolatile()) {

8606

// TODO: Should this be done for non-FrameIndex operands?

8607

LoadSDNode *LD = cast<LoadSDNode>(InNode);

8608

8609

SelectionDAG &DAG = DCI.DAG;

8610

SDLoc DL(LD);

8611

SDValue BasePtr = LD->getBasePtr();

8612

SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,

8613

LD->getPointerInfo(), LD->isVolatile(),

8614

LD->isNonTemporal(), LD->isInvariant(),

8615

LD->getAlignment());

8616

8617

SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,

8618

DAG.getConstant(4, MVT::i32));

8619

SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,

8620

LD->getPointerInfo(), LD->isVolatile(),

8621

LD->isNonTemporal(), LD->isInvariant(),

8622

std::min(4U, LD->getAlignment() / 2));

8623

8624

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));

8625

if (DCI.DAG.getTargetLoweringInfo().isBigEndian())

8626

std::swap (NewLD1, NewLD2);

8627

SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);

8628

return Result;

8629

}

8630

8631

return SDValue();

8632

}

8633

8634

/// PerformVMOVDRRCombine - Target-specific dag combine xforms for

8635

/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.

8636

static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {

8637

// N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)

8638

SDValue Op0 = N->getOperand(0);

8639

SDValue Op1 = N->getOperand(1);

8640

if (Op0.getOpcode() == ISD::BITCAST)

8641

Op0 = Op0.getOperand(0);

8642

if (Op1.getOpcode() == ISD::BITCAST)

8643

Op1 = Op1.getOperand(0);

8644

if (Op0.getOpcode() == ARMISD::VMOVRRD &&

8645

Op0.getNode() == Op1.getNode() &&

8646

Op0.getResNo() == 0 && Op1.getResNo() == 1)

8647

return DAG.getNode(ISD::BITCAST, SDLoc(N),

8648

N->getValueType(0), Op0.getOperand(0));

8649

return SDValue();

8650

}

8651

8652

/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node

8653

/// are normal, non-volatile loads. If so, it is profitable to bitcast an

8654

/// i64 vector to have f64 elements, since the value can then be loaded

8655

/// directly into a VFP register.

8656

static bool hasNormalLoadOperand(SDNode *N) {

8657

unsigned NumElts = N->getValueType(0).getVectorNumElements();

8658

for (unsigned i = 0; i < NumElts; ++i) {

8659

SDNode *Elt = N->getOperand(i).getNode();

8660

if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())

8661

return true;

8662

}

8663

return false;

8664

}

8665

8666

/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for

8667

/// ISD::BUILD_VECTOR.

8668

static SDValue PerformBUILD_VECTORCombine(SDNode *N,

8669

TargetLowering::DAGCombinerInfo &DCI,

8670

const ARMSubtarget *Subtarget) {

8671

// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):

8672

// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value

8673

// into a pair of GPRs, which is fine when the value is used as a scalar,

8674

// but if the i64 value is converted to a vector, we need to undo the VMOVRRD.

8675

SelectionDAG &DAG = DCI.DAG;

8676

if (N->getNumOperands() == 2) {

8677

SDValue RV = PerformVMOVDRRCombine(N, DAG);

8678

if (RV.getNode())

8679

return RV;

8680

}

8681

8682

// Load i64 elements as f64 values so that type legalization does not split

8683

// them up into i32 values.

8684

EVT VT = N->getValueType(0);

8685

if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))

8686

return SDValue();

8687

SDLoc dl(N);

8688

SmallVector<SDValue, 8> Ops;

8689

unsigned NumElts = VT.getVectorNumElements();

8690

for (unsigned i = 0; i < NumElts; ++i) {

8691

SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));

8692

Ops.push_back(V);

8693

// Make the DAGCombiner fold the bitcast.

8694

DCI.AddToWorklist(V.getNode());

8695

}

8696

EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);

8697

SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);

8698

return DAG.getNode(ISD::BITCAST, dl, VT, BV);

8699

}

8700

8701

/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.

8702

static SDValue

8703

PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

8704

// ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.

8705

// At that time, we may have inserted bitcasts from integer to float.

8706

// If these bitcasts have survived DAGCombine, change the lowering of this

8707

// BUILD_VECTOR in something more vector friendly, i.e., that does not

8708

// force to use floating point types.

8709

8710

// Make sure we can change the type of the vector.

8711

// This is possible iff:

8712

// 1. The vector is only used in a bitcast to a integer type. I.e.,

8713

// 1.1. Vector is used only once.

8714

// 1.2. Use is a bit convert to an integer type.

8715

// 2. The size of its operands are 32-bits (64-bits are not legal).

8716

EVT VT = N->getValueType(0);

8717

EVT EltVT = VT.getVectorElementType();

8718

8719

// Check 1.1. and 2.

8720

if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())

8721

return SDValue();

8722

8723

// By construction, the input type must be float.

8724

assert(EltVT == MVT::f32 && "Unexpected type!")((EltVT == MVT::f32 && "Unexpected type!") ? static_cast
<void> (0) : __assert_fail ("EltVT == MVT::f32 && \"Unexpected type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8724, __PRETTY_FUNCTION__));

8725

8726

// Check 1.2.

8727

SDNode *Use = *N->use_begin();

8728

if (Use->getOpcode() != ISD::BITCAST ||

8729

Use->getValueType(0).isFloatingPoint())

8730

return SDValue();

8731

8732

// Check profitability.

8733

// Model is, if more than half of the relevant operands are bitcast from

8734

// i32, turn the build_vector into a sequence of insert_vector_elt.

8735

// Relevant operands are everything that is not statically

8736

// (i.e., at compile time) bitcasted.

8737

unsigned NumOfBitCastedElts = 0;

8738

unsigned NumElts = VT.getVectorNumElements();

8739

unsigned NumOfRelevantElts = NumElts;

8740

for (unsigned Idx = 0; Idx < NumElts; ++Idx) {

8741

SDValue Elt = N->getOperand(Idx);

8742

if (Elt->getOpcode() == ISD::BITCAST) {

8743

// Assume only bit cast to i32 will go away.

8744

if (Elt->getOperand(0).getValueType() == MVT::i32)

8745

++NumOfBitCastedElts;

8746

} else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))

8747

// Constants are statically casted, thus do not count them as

8748

// relevant operands.

8749

--NumOfRelevantElts;

8750

}

8751

8752

// Check if more than half of the elements require a non-free bitcast.

8753

if (NumOfBitCastedElts <= NumOfRelevantElts / 2)

8754

return SDValue();

8755

8756

SelectionDAG &DAG = DCI.DAG;

8757

// Create the new vector type.

8758

EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);

8759

// Check if the type is legal.

8760

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

8761

if (!TLI.isTypeLegal(VecVT))

8762

return SDValue();

8763

8764

// Combine:

8765

// ARMISD::BUILD_VECTOR E1, E2, ..., EN.

8766

// => BITCAST INSERT_VECTOR_ELT

8767

// (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),

8768

// (BITCAST EN), N.

8769

SDValue Vec = DAG.getUNDEF(VecVT);

8770

SDLoc dl(N);

8771

for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {

8772

SDValue V = N->getOperand(Idx);

8773

if (V.getOpcode() == ISD::UNDEF)

8774

continue;

8775

if (V.getOpcode() == ISD::BITCAST &&

8776

V->getOperand(0).getValueType() == MVT::i32)

8777

// Fold obvious case.

8778

V = V.getOperand(0);

8779

else {

8780

V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);

8781

// Make the DAGCombiner fold the bitcasts.

8782

DCI.AddToWorklist(V.getNode());

8783

}

8784

SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32);

8785

Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);

8786

}

8787

Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);

8788

// Make the DAGCombiner fold the bitcasts.

8789

DCI.AddToWorklist(Vec.getNode());

8790

return Vec;

8791

}

8792

8793

/// PerformInsertEltCombine - Target-specific dag combine xforms for

8794

/// ISD::INSERT_VECTOR_ELT.

8795

static SDValue PerformInsertEltCombine(SDNode *N,

8796

TargetLowering::DAGCombinerInfo &DCI) {

8797

// Bitcast an i64 load inserted into a vector to f64.

8798

// Otherwise, the i64 value will be legalized to a pair of i32 values.

8799

EVT VT = N->getValueType(0);

8800

SDNode *Elt = N->getOperand(1).getNode();

8801

if (VT.getVectorElementType() != MVT::i64 ||

8802

!ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())

8803

return SDValue();

8804

8805

SelectionDAG &DAG = DCI.DAG;

8806

SDLoc dl(N);

8807

EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,

8808

VT.getVectorNumElements());

8809

SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));

8810

SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));

8811

// Make the DAGCombiner fold the bitcasts.

8812

DCI.AddToWorklist(Vec.getNode());

8813

DCI.AddToWorklist(V.getNode());

8814

SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,

8815

Vec, V, N->getOperand(2));

8816

return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);

8817

}

8818

8819

/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for

8820

/// ISD::VECTOR_SHUFFLE.

8821

static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {

8822

// The LLVM shufflevector instruction does not require the shuffle mask

8823

// length to match the operand vector length, but ISD::VECTOR_SHUFFLE does

8824

// have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the

8825

// operands do not match the mask length, they are extended by concatenating

8826

// them with undef vectors. That is probably the right thing for other

8827

// targets, but for NEON it is better to concatenate two double-register

8828

// size vector operands into a single quad-register size vector. Do that

8829

// transformation here:

8830

// shuffle(concat(v1, undef), concat(v2, undef)) ->

8831

// shuffle(concat(v1, v2), undef)

8832

SDValue Op0 = N->getOperand(0);

8833

SDValue Op1 = N->getOperand(1);

8834

if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||

8835

Op1.getOpcode() != ISD::CONCAT_VECTORS ||

8836

Op0.getNumOperands() != 2 ||

8837

Op1.getNumOperands() != 2)

8838

return SDValue();

8839

SDValue Concat0Op1 = Op0.getOperand(1);

8840

SDValue Concat1Op1 = Op1.getOperand(1);

8841

if (Concat0Op1.getOpcode() != ISD::UNDEF ||

8842

Concat1Op1.getOpcode() != ISD::UNDEF)

8843

return SDValue();

8844

// Skip the transformation if any of the types are illegal.

8845

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

8846

EVT VT = N->getValueType(0);

8847

if (!TLI.isTypeLegal(VT) ||

8848

!TLI.isTypeLegal(Concat0Op1.getValueType()) ||

8849

!TLI.isTypeLegal(Concat1Op1.getValueType()))

8850

return SDValue();

8851

8852

SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,

8853

Op0.getOperand(0), Op1.getOperand(0));

8854

// Translate the shuffle mask.

8855

SmallVector<int, 16> NewMask;

8856

unsigned NumElts = VT.getVectorNumElements();

8857

unsigned HalfElts = NumElts/2;

8858

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

8859

for (unsigned n = 0; n < NumElts; ++n) {

8860

int MaskElt = SVN->getMaskElt(n);

8861

int NewElt = -1;

8862

if (MaskElt < (int)HalfElts)

8863

NewElt = MaskElt;

8864

else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))

8865

NewElt = HalfElts + MaskElt - NumElts;

8866

NewMask.push_back(NewElt);

8867

}

8868

return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,

8869

DAG.getUNDEF(VT), NewMask.data());

8870

}

8871

8872

/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,

8873

/// NEON load/store intrinsics, and generic vector load/stores, to merge

8874

/// base address updates.

8875

/// For generic load/stores, the memory type is assumed to be a vector.

8876

/// The caller is assumed to have checked legality.

8877

static SDValue CombineBaseUpdate(SDNode *N,

8878

TargetLowering::DAGCombinerInfo &DCI) {

8879

SelectionDAG &DAG = DCI.DAG;

8880

bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||

8881

N->getOpcode() == ISD::INTRINSIC_W_CHAIN);

8882

bool isStore = N->getOpcode() == ISD::STORE;

8883

unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);

8884

SDValue Addr = N->getOperand(AddrOpIdx);

8885

8886

// Search for a use of the address operand that is an increment.

8887

for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),

8888

UE = Addr.getNode()->use_end(); UI != UE; ++UI) {

8889

SDNode *User = *UI;

8890

if (User->getOpcode() != ISD::ADD ||

8891

UI.getUse().getResNo() != Addr.getResNo())

8892

continue;

8893

8894

// Check that the add is independent of the load/store. Otherwise, folding

8895

// it would create a cycle.

8896

if (User->isPredecessorOf(N) || N->isPredecessorOf(User))

8897

continue;

8898

8899

// Find the new opcode for the updating load/store.

8900

bool isLoad = true;

8901

bool isLaneOp = false;

8902

unsigned NewOpc = 0;

8903

unsigned NumVecs = 0;

8904

if (isIntrinsic) {

8905

unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();

8906

switch (IntNo) {

8907

default: llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8907);

8908

case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;

8909

NumVecs = 1; break;

8910

case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;

8911

NumVecs = 2; break;

8912

case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;

8913

NumVecs = 3; break;

8914

case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;

8915

NumVecs = 4; break;

8916

case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;

8917

NumVecs = 2; isLaneOp = true; break;

8918

case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;

8919

NumVecs = 3; isLaneOp = true; break;

8920

case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;

8921

NumVecs = 4; isLaneOp = true; break;

8922

case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;

8923

NumVecs = 1; isLoad = false; break;

8924

case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;

8925

NumVecs = 2; isLoad = false; break;

8926

case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;

8927

NumVecs = 3; isLoad = false; break;

8928

case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;

8929

NumVecs = 4; isLoad = false; break;

8930

case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;

8931

NumVecs = 2; isLoad = false; isLaneOp = true; break;

8932

case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;

8933

NumVecs = 3; isLoad = false; isLaneOp = true; break;

8934

case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;

8935

NumVecs = 4; isLoad = false; isLaneOp = true; break;

8936

}

8937

} else {

8938

isLaneOp = true;

8939

switch (N->getOpcode()) {

8940

default: llvm_unreachable("unexpected opcode for Neon base update")::llvm::llvm_unreachable_internal("unexpected opcode for Neon base update"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 8940);

8941

case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;

8942

case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;

8943

case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;

8944

case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;

8945

NumVecs = 1; isLaneOp = false; break;

8946

case ISD::STORE: NewOpc = ARMISD::VST1_UPD;

8947

NumVecs = 1; isLoad = false; isLaneOp = false; break;

8948

}

8949

}

8950

8951

// Find the size of memory referenced by the load/store.

8952

EVT VecTy;

8953

if (isLoad)

8954

VecTy = N->getValueType(0);

8955

else if (isIntrinsic)

8956

VecTy = N->getOperand(AddrOpIdx+1).getValueType();

8957

else

8958

VecTy = N->getOperand(1).getValueType();

8959

8960

unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;

8961

if (isLaneOp)

8962

NumBytes /= VecTy.getVectorNumElements();

8963

8964

// If the increment is a constant, it must match the memory ref size.

8965

SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);

8966

if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {

8967

uint64_t IncVal = CInc->getZExtValue();

8968

if (IncVal != NumBytes)

8969

continue;

8970

} else if (NumBytes >= 3 * 16) {

8971

// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two

8972

// separate instructions that make it harder to use a non-constant update.

8973

continue;

8974

}

8975

8976

// Create the new updating load/store node.

8977

// First, create an SDVTList for the new updating node's results.

8978

EVT Tys[6];

8979

unsigned NumResultVecs = (isLoad ? NumVecs : 0);

8980

unsigned n;

8981

for (n = 0; n < NumResultVecs; ++n)

8982

Tys[n] = VecTy;

8983

Tys[n++] = MVT::i32;

8984

Tys[n] = MVT::Other;

8985

SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));

8986

8987

// Then, gather the new node's operands.

8988

SmallVector<SDValue, 8> Ops;

8989

Ops.push_back(N->getOperand(0)); // incoming chain

8990

Ops.push_back(N->getOperand(AddrOpIdx));

8991

Ops.push_back(Inc);

8992

if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {

8993

// Try to match the intrinsic's signature

8994

Ops.push_back(StN->getValue());

8995

Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32));

8996

} else {

8997

for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i)

8998

Ops.push_back(N->getOperand(i));

8999

}

9000

MemSDNode *MemInt = cast<MemSDNode>(N);

9001

SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,

9002

Ops, MemInt->getMemoryVT(),

9003

MemInt->getMemOperand());

9004

9005

// Update the uses.

9006

std::vector<SDValue> NewResults;

9007

for (unsigned i = 0; i < NumResultVecs; ++i) {

9008

NewResults.push_back(SDValue(UpdN.getNode(), i));

9009

}

9010

NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain

9011

DCI.CombineTo(N, NewResults);

9012

DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));

9013

9014

break;

9015

}

9016

return SDValue();

9017

}

9018

9019

static SDValue PerformVLDCombine(SDNode *N,

9020

TargetLowering::DAGCombinerInfo &DCI) {

9021

if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

9022

return SDValue();

9023

9024

return CombineBaseUpdate(N, DCI);

9025

}

9026

9027

/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a

9028

/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic

9029

/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and

9030

/// return true.

9031

static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

9032

SelectionDAG &DAG = DCI.DAG;

9033

EVT VT = N->getValueType(0);

9034

// vldN-dup instructions only support 64-bit vectors for N > 1.

9035

if (!VT.is64BitVector())

9036

return false;

9037

9038

// Check if the VDUPLANE operand is a vldN-dup intrinsic.

9039

SDNode *VLD = N->getOperand(0).getNode();

9040

if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)

9041

return false;

9042

unsigned NumVecs = 0;

9043

unsigned NewOpc = 0;

9044

unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();

9045

if (IntNo == Intrinsic::arm_neon_vld2lane) {

9046

NumVecs = 2;

9047

NewOpc = ARMISD::VLD2DUP;

9048

} else if (IntNo == Intrinsic::arm_neon_vld3lane) {

9049

NumVecs = 3;

9050

NewOpc = ARMISD::VLD3DUP;

9051

} else if (IntNo == Intrinsic::arm_neon_vld4lane) {

9052

NumVecs = 4;

9053

NewOpc = ARMISD::VLD4DUP;

9054

} else {

9055

return false;

9056

}

9057

9058

// First check that all the vldN-lane uses are VDUPLANEs and that the lane

9059

// numbers match the load.

9060

unsigned VLDLaneNo =

9061

cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();

9062

for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();

9063

UI != UE; ++UI) {

9064

// Ignore uses of the chain result.

9065

if (UI.getUse().getResNo() == NumVecs)

9066

continue;

9067

SDNode *User = *UI;

9068

if (User->getOpcode() != ARMISD::VDUPLANE ||

9069

VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())

9070

return false;

9071

}

9072

9073

// Create the vldN-dup node.

9074

EVT Tys[5];

9075

unsigned n;

9076

for (n = 0; n < NumVecs; ++n)

9077

Tys[n] = VT;

9078

Tys[n] = MVT::Other;

9079

SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));

9080

SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };

9081

MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);

9082

SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,

9083

Ops, VLDMemInt->getMemoryVT(),

9084

VLDMemInt->getMemOperand());

9085

9086

// Update the uses.

9087

for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();

9088

UI != UE; ++UI) {

9089

unsigned ResNo = UI.getUse().getResNo();

9090

// Ignore uses of the chain result.

9091

if (ResNo == NumVecs)

9092

continue;

9093

SDNode *User = *UI;

9094

DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));

9095

}

9096

9097

// Now the vldN-lane intrinsic is dead except for its chain result.

9098

// Update uses of the chain.

9099

std::vector<SDValue> VLDDupResults;

9100

for (unsigned n = 0; n < NumVecs; ++n)

9101

VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));

9102

VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));

9103

DCI.CombineTo(VLD, VLDDupResults);

9104

9105

return true;

9106

}

9107

9108

/// PerformVDUPLANECombine - Target-specific dag combine xforms for

9109

/// ARMISD::VDUPLANE.

9110

static SDValue PerformVDUPLANECombine(SDNode *N,

9111

TargetLowering::DAGCombinerInfo &DCI) {

9112

SDValue Op = N->getOperand(0);

9113

9114

// If the source is a vldN-lane (N > 1) intrinsic, and all the other uses

9115

// of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.

9116

if (CombineVLDDUP(N, DCI))

9117

return SDValue(N, 0);

9118

9119

// If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is

9120

// redundant. Ignore bit_converts for now; element sizes are checked below.

9121

while (Op.getOpcode() == ISD::BITCAST)

9122

Op = Op.getOperand(0);

9123

if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)

9124

return SDValue();

9125

9126

// Make sure the VMOV element size is not bigger than the VDUPLANE elements.

9127

unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();

9128

// The canonical VMOV for a zero vector uses a 32-bit element size.

9129

unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

9130

unsigned EltBits;

9131

if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)

9132

EltSize = 8;

9133

EVT VT = N->getValueType(0);

9134

if (EltSize > VT.getVectorElementType().getSizeInBits())

9135

return SDValue();

9136

9137

return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);

9138

}

9139

9140

static SDValue PerformLOADCombine(SDNode *N,

9141

TargetLowering::DAGCombinerInfo &DCI) {

9142

EVT VT = N->getValueType(0);

9143

9144

// If this is a legal vector load, try to combine it into a VLD1_UPD.

9145

if (ISD::isNormalLoad(N) && VT.isVector() &&

9146

DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))

9147

return CombineBaseUpdate(N, DCI);

9148

9149

return SDValue();

9150

}

9151

9152

/// PerformSTORECombine - Target-specific dag combine xforms for

9153

/// ISD::STORE.

9154

static SDValue PerformSTORECombine(SDNode *N,

9155

TargetLowering::DAGCombinerInfo &DCI) {

9156

StoreSDNode *St = cast<StoreSDNode>(N);

9157

if (St->isVolatile())

9158

return SDValue();

9159

9160

// Optimize trunc store (of multiple scalars) to shuffle and store. First,

9161

// pack all of the elements in one place. Next, store to memory in fewer

9162

// chunks.

9163

SDValue StVal = St->getValue();

9164

EVT VT = StVal.getValueType();

9165

if (St->isTruncatingStore() && VT.isVector()) {

9166

SelectionDAG &DAG = DCI.DAG;

9167

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

9168

EVT StVT = St->getMemoryVT();

9169

unsigned NumElems = VT.getVectorNumElements();

9170

assert(StVT != VT && "Cannot truncate to the same type")((StVT != VT && "Cannot truncate to the same type") ?
static_cast<void> (0) : __assert_fail ("StVT != VT && \"Cannot truncate to the same type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9170, __PRETTY_FUNCTION__));

9171

unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();

9172

unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();

9173

9174

// From, To sizes and ElemCount must be pow of two

9175

if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();

9176

9177

// We are going to use the original vector elt for storing.

9178

// Accumulated smaller vector elements must be a multiple of the store size.

9179

if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();

9180

9181

unsigned SizeRatio = FromEltSz / ToEltSz;

9182

assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits())((SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9182, __PRETTY_FUNCTION__));

9183

9184

// Create a type on which we perform the shuffle.

9185

EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),

9186

NumElems*SizeRatio);

9187

assert(WideVecVT.getSizeInBits() == VT.getSizeInBits())((WideVecVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("WideVecVT.getSizeInBits() == VT.getSizeInBits()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9187, __PRETTY_FUNCTION__));

9188

9189

SDLoc DL(St);

9190

SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);

9191

SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);

9192

for (unsigned i = 0; i < NumElems; ++i)

9193

ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;

9194

9195

// Can't shuffle using an illegal type.

9196

if (!TLI.isTypeLegal(WideVecVT)) return SDValue();

9197

9198

SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,

9199

DAG.getUNDEF(WideVec.getValueType()),

9200

ShuffleVec.data());

9201

// At this point all of the data is stored at the bottom of the

9202

// register. We now need to save it to mem.

9203

9204

// Find the largest store unit

9205

MVT StoreType = MVT::i8;

9206

for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;

9207

tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {

9208

MVT Tp = (MVT::SimpleValueType)tp;

9209

if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)

9210

StoreType = Tp;

9211

}

9212

// Didn't find a legal store type.

9213

if (!TLI.isTypeLegal(StoreType))

9214

return SDValue();

9215

9216

// Bitcast the original vector into a vector of store-size units

9217

EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),

9218

StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());

9219

assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits())((StoreVecVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("StoreVecVT.getSizeInBits() == VT.getSizeInBits()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9219, __PRETTY_FUNCTION__));

9220

SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);

9221

SmallVector<SDValue, 8> Chains;

9222

SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,

9223

TLI.getPointerTy());

9224

SDValue BasePtr = St->getBasePtr();

9225

9226

// Perform one or more big stores into memory.

9227

unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();

9228

for (unsigned I = 0; I < E; I++) {

9229

SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,

9230

StoreType, ShuffWide,

9231

DAG.getIntPtrConstant(I));

9232

SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,

9233

St->getPointerInfo(), St->isVolatile(),

9234

St->isNonTemporal(), St->getAlignment());

9235

BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,

9236

Increment);

9237

Chains.push_back(Ch);

9238

}

9239

return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);

9240

}

9241

9242

if (!ISD::isNormalStore(St))

9243

return SDValue();

9244

9245

// Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and

9246

// ARM stores of arguments in the same cache line.

9247

if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&

9248

StVal.getNode()->hasOneUse()) {

9249

SelectionDAG &DAG = DCI.DAG;

9250

bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();

9251

SDLoc DL(St);

9252

SDValue BasePtr = St->getBasePtr();

9253

SDValue NewST1 = DAG.getStore(St->getChain(), DL,

9254

StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),

9255

BasePtr, St->getPointerInfo(), St->isVolatile(),

9256

St->isNonTemporal(), St->getAlignment());

9257

9258

SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,

9259

DAG.getConstant(4, MVT::i32));

9260

return DAG.getStore(NewST1.getValue(0), DL,

9261

StVal.getNode()->getOperand(isBigEndian ? 0 : 1),

9262

OffsetPtr, St->getPointerInfo(), St->isVolatile(),

9263

St->isNonTemporal(),

9264

std::min(4U, St->getAlignment() / 2));

9265

}

9266

9267

if (StVal.getValueType() == MVT::i64 &&

9268

StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

9269

9270

// Bitcast an i64 store extracted from a vector to f64.

9271

// Otherwise, the i64 value will be legalized to a pair of i32 values.

9272

SelectionDAG &DAG = DCI.DAG;

9273

SDLoc dl(StVal);

9274

SDValue IntVec = StVal.getOperand(0);

9275

EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,

9276

IntVec.getValueType().getVectorNumElements());

9277

SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);

9278

SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,

9279

Vec, StVal.getOperand(1));

9280

dl = SDLoc(N);

9281

SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);

9282

// Make the DAGCombiner fold the bitcasts.

9283

DCI.AddToWorklist(Vec.getNode());

9284

DCI.AddToWorklist(ExtElt.getNode());

9285

DCI.AddToWorklist(V.getNode());

9286

return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),

9287

St->getPointerInfo(), St->isVolatile(),

9288

St->isNonTemporal(), St->getAlignment(),

9289

St->getAAInfo());

9290

}

9291

9292

// If this is a legal vector store, try to combine it into a VST1_UPD.

9293

if (ISD::isNormalStore(N) && VT.isVector() &&

9294

DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))

9295

return CombineBaseUpdate(N, DCI);

9296

9297

return SDValue();

9298

}

9299

9300

// isConstVecPow2 - Return true if each vector element is a power of 2, all

9301

// elements are the same constant, C, and Log2(C) ranges from 1 to 32.

9302

static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)

9303

{

9304

integerPart cN;

9305

integerPart c0 = 0;

9306

for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();

9307

I != E; I++) {

9308

ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));

9309

if (!C)

9310

return false;

9311

9312

bool isExact;

9313

APFloat APF = C->getValueAPF();

9314

if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)

9315

!= APFloat::opOK || !isExact)

9316

return false;

9317

9318

c0 = (I == 0) ? cN : c0;

9319

if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)

9320

return false;

9321

}

9322

C = c0;

9323

return true;

9324

}

9325

9326

/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)

9327

/// can replace combinations of VMUL and VCVT (floating-point to integer)

9328

/// when the VMUL has a constant operand that is a power of 2.

9329

///

9330

/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):

9331

/// vmul.f32 d16, d17, d16

9332

/// vcvt.s32.f32 d16, d16

9333

/// becomes:

9334

/// vcvt.s32.f32 d16, d16, #3

9335

static SDValue PerformVCVTCombine(SDNode *N,

9336

TargetLowering::DAGCombinerInfo &DCI,

9337

const ARMSubtarget *Subtarget) {

9338

SelectionDAG &DAG = DCI.DAG;

9339

SDValue Op = N->getOperand(0);

9340

9341

if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||

9342

Op.getOpcode() != ISD::FMUL)

9343

return SDValue();

9344

9345

uint64_t C;

9346

SDValue N0 = Op->getOperand(0);

9347

SDValue ConstVec = Op->getOperand(1);

9348

bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;

9349

9350

if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||

9351

!isConstVecPow2(ConstVec, isSigned, C))

9352

return SDValue();

9353

9354

MVT FloatTy = Op.getSimpleValueType().getVectorElementType();

9355

MVT IntTy = N->getSimpleValueType(0).getVectorElementType();

9356

if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {

9357

// These instructions only exist converting from f32 to i32. We can handle

9358

// smaller integers by generating an extra truncate, but larger ones would

9359

// be lossy.

9360

return SDValue();

9361

}

9362

9363

unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :

9364

Intrinsic::arm_neon_vcvtfp2fxu;

9365

unsigned NumLanes = Op.getValueType().getVectorNumElements();

9366

SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),

9367

NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,

9368

DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,

9369

DAG.getConstant(Log2_64(C), MVT::i32));

9370

9371

if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())

9372

FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);

9373

9374

return FixConv;

9375

}

9376

9377

/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)

9378

/// can replace combinations of VCVT (integer to floating-point) and VDIV

9379

/// when the VDIV has a constant operand that is a power of 2.

9380

///

9381

/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):

9382

/// vcvt.f32.s32 d16, d16

9383

/// vdiv.f32 d16, d17, d16

9384

/// becomes:

9385

/// vcvt.f32.s32 d16, d16, #3

9386

static SDValue PerformVDIVCombine(SDNode *N,

9387

TargetLowering::DAGCombinerInfo &DCI,

9388

const ARMSubtarget *Subtarget) {

9389

SelectionDAG &DAG = DCI.DAG;

9390

SDValue Op = N->getOperand(0);

9391

unsigned OpOpcode = Op.getNode()->getOpcode();

9392

9393

if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||

9394

(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))

9395

return SDValue();

9396

9397

uint64_t C;

9398

SDValue ConstVec = N->getOperand(1);

9399

bool isSigned = OpOpcode == ISD::SINT_TO_FP;

9400

9401

if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||

9402

!isConstVecPow2(ConstVec, isSigned, C))

9403

return SDValue();

9404

9405

MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();

9406

MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();

9407

if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {

9408

// These instructions only exist converting from i32 to f32. We can handle

9409

// smaller integers by generating an extra extend, but larger ones would

9410

// be lossy.

9411

return SDValue();

9412

}

9413

9414

SDValue ConvInput = Op.getOperand(0);

9415

unsigned NumLanes = Op.getValueType().getVectorNumElements();

9416

if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())

9417

ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,

9418

SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,

9419

ConvInput);

9420

9421

unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :

9422

Intrinsic::arm_neon_vcvtfxu2fp;

9423

return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),

9424

Op.getValueType(),

9425

DAG.getConstant(IntrinsicOpcode, MVT::i32),

9426

ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));

9427

}

9428

9429

/// Getvshiftimm - Check if this is a valid build_vector for the immediate

9430

/// operand of a vector shift operation, where all the elements of the

9431

/// build_vector must have the same constant integer value.

9432

static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {

9433

// Ignore bit_converts.

9434

while (Op.getOpcode() == ISD::BITCAST)

9435

Op = Op.getOperand(0);

9436

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());

9437

APInt SplatBits, SplatUndef;

9438

unsigned SplatBitSize;

9439

bool HasAnyUndefs;

9440

if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,

9441

HasAnyUndefs, ElementBits) ||

9442

SplatBitSize > ElementBits)

9443

return false;

9444

Cnt = SplatBits.getSExtValue();

9445

return true;

9446

}

9447

9448

/// isVShiftLImm - Check if this is a valid build_vector for the immediate

9449

/// operand of a vector shift left operation. That value must be in the range:

9450

/// 0 <= Value < ElementBits for a left shift; or

9451

/// 0 <= Value <= ElementBits for a long left shift.

9452

static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {

9453

assert(VT.isVector() && "vector shift count is not a vector type")((VT.isVector() && "vector shift count is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9453, __PRETTY_FUNCTION__));

9454

unsigned ElementBits = VT.getVectorElementType().getSizeInBits();

9455

if (! getVShiftImm(Op, ElementBits, Cnt))

9456

return false;

9457

return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);

9458

}

9459

9460

/// isVShiftRImm - Check if this is a valid build_vector for the immediate

9461

/// operand of a vector shift right operation. For a shift opcode, the value

9462

/// is positive, but for an intrinsic the value count must be negative. The

9463

/// absolute value must be in the range:

9464

/// 1 <= |Value| <= ElementBits for a right shift; or

9465

/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.

9466

static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,

9467

int64_t &Cnt) {

9468

9469

unsigned ElementBits = VT.getVectorElementType().getSizeInBits();

9470

if (! getVShiftImm(Op, ElementBits, Cnt))

9471

return false;

9472

if (isIntrinsic)

9473

Cnt = -Cnt;

9474

return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));

9475

}

9476

9477

/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.

9478

static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {

9479

unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();

9480

switch (IntNo) {

9481

default:

9482

// Don't do anything for most intrinsics.

9483

break;

9484

9485

// Vector shifts: check for immediate versions and lower them.

9486

// Note: This is done during DAG combining instead of DAG legalizing because

9487

// the build_vectors for 64-bit vector element shift counts are generally

9488

// not legal, and it is hard to see their values after they get legalized to

9489

// loads from a constant pool.

9490

case Intrinsic::arm_neon_vshifts:

9491

case Intrinsic::arm_neon_vshiftu:

9492

case Intrinsic::arm_neon_vrshifts:

9493

case Intrinsic::arm_neon_vrshiftu:

9494

case Intrinsic::arm_neon_vrshiftn:

9495

case Intrinsic::arm_neon_vqshifts:

9496

case Intrinsic::arm_neon_vqshiftu:

9497

case Intrinsic::arm_neon_vqshiftsu:

9498

case Intrinsic::arm_neon_vqshiftns:

9499

case Intrinsic::arm_neon_vqshiftnu:

9500

case Intrinsic::arm_neon_vqshiftnsu:

9501

case Intrinsic::arm_neon_vqrshiftns:

9502

case Intrinsic::arm_neon_vqrshiftnu:

9503

case Intrinsic::arm_neon_vqrshiftnsu: {

9504

EVT VT = N->getOperand(1).getValueType();

9505

int64_t Cnt;

9506

unsigned VShiftOpc = 0;

9507

9508

switch (IntNo) {

9509

case Intrinsic::arm_neon_vshifts:

9510

case Intrinsic::arm_neon_vshiftu:

9511

if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {

9512

VShiftOpc = ARMISD::VSHL;

9513

break;

9514

}

9515

if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {

9516

VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?

9517

ARMISD::VSHRs : ARMISD::VSHRu);

9518

break;

9519

}

9520

return SDValue();

9521

9522

case Intrinsic::arm_neon_vrshifts:

9523

case Intrinsic::arm_neon_vrshiftu:

9524

if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))

9525

break;

9526

return SDValue();

9527

9528

case Intrinsic::arm_neon_vqshifts:

9529

case Intrinsic::arm_neon_vqshiftu:

9530

if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))

9531

break;

9532

return SDValue();

9533

9534

case Intrinsic::arm_neon_vqshiftsu:

9535

if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))

9536

break;

9537

llvm_unreachable("invalid shift count for vqshlu intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vqshlu intrinsic"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9537);

9538

9539

case Intrinsic::arm_neon_vrshiftn:

9540

case Intrinsic::arm_neon_vqshiftns:

9541

case Intrinsic::arm_neon_vqshiftnu:

9542

case Intrinsic::arm_neon_vqshiftnsu:

9543

case Intrinsic::arm_neon_vqrshiftns:

9544

case Intrinsic::arm_neon_vqrshiftnu:

9545

case Intrinsic::arm_neon_vqrshiftnsu:

9546

// Narrowing shifts require an immediate right shift.

9547

if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))

9548

break;

9549

llvm_unreachable("invalid shift count for narrowing vector shift "::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9550)

9550

"intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9550);

9551

9552

default:

9553

llvm_unreachable("unhandled vector shift")::llvm::llvm_unreachable_internal("unhandled vector shift", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9553);

9554

}

9555

9556

switch (IntNo) {

9557

case Intrinsic::arm_neon_vshifts:

9558

case Intrinsic::arm_neon_vshiftu:

9559

// Opcode already set above.

9560

break;

9561

case Intrinsic::arm_neon_vrshifts:

9562

VShiftOpc = ARMISD::VRSHRs; break;

9563

case Intrinsic::arm_neon_vrshiftu:

9564

VShiftOpc = ARMISD::VRSHRu; break;

9565

case Intrinsic::arm_neon_vrshiftn:

9566

VShiftOpc = ARMISD::VRSHRN; break;

9567

case Intrinsic::arm_neon_vqshifts:

9568

VShiftOpc = ARMISD::VQSHLs; break;

9569

case Intrinsic::arm_neon_vqshiftu:

9570

VShiftOpc = ARMISD::VQSHLu; break;

9571

case Intrinsic::arm_neon_vqshiftsu:

9572

VShiftOpc = ARMISD::VQSHLsu; break;

9573

case Intrinsic::arm_neon_vqshiftns:

9574

VShiftOpc = ARMISD::VQSHRNs; break;

9575

case Intrinsic::arm_neon_vqshiftnu:

9576

VShiftOpc = ARMISD::VQSHRNu; break;

9577

case Intrinsic::arm_neon_vqshiftnsu:

9578

VShiftOpc = ARMISD::VQSHRNsu; break;

9579

case Intrinsic::arm_neon_vqrshiftns:

9580

VShiftOpc = ARMISD::VQRSHRNs; break;

9581

case Intrinsic::arm_neon_vqrshiftnu:

9582

VShiftOpc = ARMISD::VQRSHRNu; break;

9583

case Intrinsic::arm_neon_vqrshiftnsu:

9584

VShiftOpc = ARMISD::VQRSHRNsu; break;

9585

}

9586

9587

return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),

9588

N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));

9589

}

9590

9591

case Intrinsic::arm_neon_vshiftins: {

9592

EVT VT = N->getOperand(1).getValueType();

9593

int64_t Cnt;

9594

unsigned VShiftOpc = 0;

9595

9596

if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))

9597

VShiftOpc = ARMISD::VSLI;

9598

else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))

9599

VShiftOpc = ARMISD::VSRI;

9600

else {

9601

llvm_unreachable("invalid shift count for vsli/vsri intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vsli/vsri intrinsic"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9601);

9602

}

9603

9604

return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),

9605

N->getOperand(1), N->getOperand(2),

9606

DAG.getConstant(Cnt, MVT::i32));

9607

}

9608

9609

case Intrinsic::arm_neon_vqrshifts:

9610

case Intrinsic::arm_neon_vqrshiftu:

9611

// No immediate versions of these to check for.

9612

break;

9613

}

9614

9615

return SDValue();

9616

}

9617

9618

/// PerformShiftCombine - Checks for immediate versions of vector shifts and

9619

/// lowers them. As with the vector shift intrinsics, this is done during DAG

9620

/// combining instead of DAG legalizing because the build_vectors for 64-bit

9621

/// vector element shift counts are generally not legal, and it is hard to see

9622

/// their values after they get legalized to loads from a constant pool.

9623

static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,

9624

const ARMSubtarget *ST) {

9625

EVT VT = N->getValueType(0);

9626

if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {

9627

// Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high

9628

// 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.

9629

SDValue N1 = N->getOperand(1);

9630

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {

9631

SDValue N0 = N->getOperand(0);

9632

if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&

9633

DAG.MaskedValueIsZero(N0.getOperand(0),

9634

APInt::getHighBitsSet(32, 16)))

9635

return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);

9636

}

9637

}

9638

9639

// Nothing to be done for scalar shifts.

9640

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

9641

if (!VT.isVector() || !TLI.isTypeLegal(VT))

9642

return SDValue();

9643

9644

9645

int64_t Cnt;

9646

9647

switch (N->getOpcode()) {

9648

default: llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9648);

9649

9650

case ISD::SHL:

9651

if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))

9652

return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),

9653

DAG.getConstant(Cnt, MVT::i32));

9654

break;

9655

9656

case ISD::SRA:

9657

case ISD::SRL:

9658

if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {

9659

unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?

9660

ARMISD::VSHRs : ARMISD::VSHRu);

9661

return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),

9662

DAG.getConstant(Cnt, MVT::i32));

9663

}

9664

}

9665

return SDValue();

9666

}

9667

9668

/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,

9669

/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.

9670

static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,

9671

const ARMSubtarget *ST) {

9672

SDValue N0 = N->getOperand(0);

9673

9674

// Check for sign- and zero-extensions of vector extract operations of 8-

9675

// and 16-bit vector elements. NEON supports these directly. They are

9676

// handled during DAG combining because type legalization will promote them

9677

// to 32-bit types and it is messy to recognize the operations after that.

9678

if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

9679

SDValue Vec = N0.getOperand(0);

9680

SDValue Lane = N0.getOperand(1);

9681

EVT VT = N->getValueType(0);

9682

EVT EltVT = N0.getValueType();

9683

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

9684

9685

if (VT == MVT::i32 &&

9686

(EltVT == MVT::i8 || EltVT == MVT::i16) &&

9687

TLI.isTypeLegal(Vec.getValueType()) &&

9688

isa<ConstantSDNode>(Lane)) {

9689

9690

unsigned Opc = 0;

9691

switch (N->getOpcode()) {

9692

default: llvm_unreachable("unexpected opcode")::llvm::llvm_unreachable_internal("unexpected opcode", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 9692);

9693

case ISD::SIGN_EXTEND:

9694

Opc = ARMISD::VGETLANEs;

9695

break;

9696

case ISD::ZERO_EXTEND:

9697

case ISD::ANY_EXTEND:

9698

Opc = ARMISD::VGETLANEu;

9699

break;

9700

}

9701

return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);

9702

}

9703

}

9704

9705

return SDValue();

9706

}

9707

9708

/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC

9709

/// to match f32 max/min patterns to use NEON vmax/vmin instructions.

9710

static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,

9711

const ARMSubtarget *ST) {

9712

// If the target supports NEON, try to use vmax/vmin instructions for f32

9713

// selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,

9714

// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is

9715

// a NaN; only do the transformation when it matches that behavior.

9716

9717

// For now only do this when using NEON for FP operations; if using VFP, it

9718

// is not obvious that the benefit outweighs the cost of switching to the

9719

// NEON pipeline.

9720

if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||

9721

N->getValueType(0) != MVT::f32)

9722

return SDValue();

9723

9724

SDValue CondLHS = N->getOperand(0);

9725

SDValue CondRHS = N->getOperand(1);

9726

SDValue LHS = N->getOperand(2);

9727

SDValue RHS = N->getOperand(3);

9728

ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();

9729

9730

unsigned Opcode = 0;

9731

bool IsReversed;

9732

if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {

9733

IsReversed = false; // x CC y ? x : y

9734

} else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {

9735

IsReversed = true ; // x CC y ? y : x

9736

} else {

9737

return SDValue();

9738

}

9739

9740

bool IsUnordered;

9741

switch (CC) {

9742

default: break;

9743

case ISD::SETOLT:

9744

case ISD::SETOLE:

9745

case ISD::SETLT:

9746

case ISD::SETLE:

9747

case ISD::SETULT:

9748

case ISD::SETULE:

9749

// If LHS is NaN, an ordered comparison will be false and the result will

9750

// be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS

9751

// != NaN. Likewise, for unordered comparisons, check for RHS != NaN.

9752

IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);

9753

if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))

9754

break;

9755

// For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin

9756

// will return -0, so vmin can only be used for unsafe math or if one of

9757

// the operands is known to be nonzero.

9758

if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&

9759

!DAG.getTarget().Options.UnsafeFPMath &&

9760

!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))

9761

break;

9762

Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;

9763

break;

9764

9765

case ISD::SETOGT:

9766

case ISD::SETOGE:

9767

case ISD::SETGT:

9768

case ISD::SETGE:

9769

case ISD::SETUGT:

9770

case ISD::SETUGE:

9771

// If LHS is NaN, an ordered comparison will be false and the result will

9772

// be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS

9773

// != NaN. Likewise, for unordered comparisons, check for RHS != NaN.

9774

IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);

9775

if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))

9776

break;

9777

// For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax

9778

// will return +0, so vmax can only be used for unsafe math or if one of

9779

// the operands is known to be nonzero.

9780

if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&

9781

!DAG.getTarget().Options.UnsafeFPMath &&

9782

!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))

9783

break;

9784

Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;

9785

break;

9786

}

9787

9788

if (!Opcode)

9789

return SDValue();

9790

return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);

9791

}

9792

9793

/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.

9794

SDValue

9795

ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {

9796

SDValue Cmp = N->getOperand(4);

9797

if (Cmp.getOpcode() != ARMISD::CMPZ)

9798

// Only looking at EQ and NE cases.

9799

return SDValue();

9800

9801

EVT VT = N->getValueType(0);

9802

SDLoc dl(N);

9803

SDValue LHS = Cmp.getOperand(0);

9804

SDValue RHS = Cmp.getOperand(1);

9805

SDValue FalseVal = N->getOperand(0);

9806

SDValue TrueVal = N->getOperand(1);

9807

SDValue ARMcc = N->getOperand(2);

9808

ARMCC::CondCodes CC =

9809

(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();

9810

9811

// Simplify

9812

// mov r1, r0

9813

// cmp r1, x

9814

// mov r0, y

9815

// moveq r0, x

9816

// to

9817

// cmp r0, x

9818

// movne r0, y

9819

9820

// mov r1, r0

9821

// cmp r1, x

9822

// mov r0, x

9823

// movne r0, y

9824

// to

9825

// cmp r0, x

9826

// movne r0, y

9827

/// FIXME: Turn this into a target neutral optimization?

9828

SDValue Res;

9829

if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {

9830

Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,

9831

N->getOperand(3), Cmp);

9832

} else if (CC == ARMCC::EQ && TrueVal == RHS) {

9833

SDValue ARMcc;

9834

SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);

9835

Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,

9836

N->getOperand(3), NewCmp);

9837

}

9838

9839

if (Res.getNode()) {

9840

APInt KnownZero, KnownOne;

9841

DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);

9842

// Capture demanded bits information that would be otherwise lost.

9843

if (KnownZero == 0xfffffffe)

9844

Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

9845

DAG.getValueType(MVT::i1));

9846

else if (KnownZero == 0xffffff00)

9847

Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

9848

DAG.getValueType(MVT::i8));

9849

else if (KnownZero == 0xffff0000)

9850

Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

9851

DAG.getValueType(MVT::i16));

9852

}

9853

9854

return Res;

9855

}

9856

9857

SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,

9858

DAGCombinerInfo &DCI) const {

9859

switch (N->getOpcode()) {

9860

default: break;

9861

case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);

9862

case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);

9863

case ISD::SUB: return PerformSUBCombine(N, DCI);

9864

case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);

9865

case ISD::OR: return PerformORCombine(N, DCI, Subtarget);

9866

case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);

9867

case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);

9868

case ARMISD::BFI: return PerformBFICombine(N, DCI);

9869

case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);

9870

case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);

9871

case ISD::STORE: return PerformSTORECombine(N, DCI);

9872

case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);

9873

case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);

9874

case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);

9875

case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);

9876

case ISD::FP_TO_SINT:

9877

case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);

9878

case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);

9879

case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);

9880

case ISD::SHL:

9881

case ISD::SRA:

9882

case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);

9883

case ISD::SIGN_EXTEND:

9884

case ISD::ZERO_EXTEND:

9885

case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);

9886

case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);

9887

case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);

9888

case ISD::LOAD: return PerformLOADCombine(N, DCI);

9889

case ARMISD::VLD2DUP:

9890

case ARMISD::VLD3DUP:

9891

case ARMISD::VLD4DUP:

9892

return PerformVLDCombine(N, DCI);

9893

case ARMISD::BUILD_VECTOR:

9894

return PerformARMBUILD_VECTORCombine(N, DCI);

9895

case ISD::INTRINSIC_VOID:

9896

case ISD::INTRINSIC_W_CHAIN:

9897

switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

9898

case Intrinsic::arm_neon_vld1:

9899

case Intrinsic::arm_neon_vld2:

9900

case Intrinsic::arm_neon_vld3:

9901

case Intrinsic::arm_neon_vld4:

9902

case Intrinsic::arm_neon_vld2lane:

9903

case Intrinsic::arm_neon_vld3lane:

9904

case Intrinsic::arm_neon_vld4lane:

9905

case Intrinsic::arm_neon_vst1:

9906

case Intrinsic::arm_neon_vst2:

9907

case Intrinsic::arm_neon_vst3:

9908

case Intrinsic::arm_neon_vst4:

9909

case Intrinsic::arm_neon_vst2lane:

9910

case Intrinsic::arm_neon_vst3lane:

9911

case Intrinsic::arm_neon_vst4lane:

9912

return PerformVLDCombine(N, DCI);

9913

default: break;

9914

}

9915

break;

9916

}

9917

return SDValue();

9918

}

9919

9920

bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,

9921

EVT VT) const {

9922

return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);

9923

}

9924

9925

bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,

9926

unsigned,

9927

unsigned,

9928

bool *Fast) const {

9929

// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus

9930

bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

9931

9932

switch (VT.getSimpleVT().SimpleTy) {

9933

default:

9934

return false;

9935

case MVT::i8:

9936

case MVT::i16:

9937

case MVT::i32: {

9938

// Unaligned access can use (for example) LRDB, LRDH, LDR

9939

if (AllowsUnaligned) {

9940

if (Fast)

9941

*Fast = Subtarget->hasV7Ops();

9942

return true;

9943

}

9944

return false;

9945

}

9946

case MVT::f64:

9947

case MVT::v2f64: {

9948

// For any little-endian targets with neon, we can support unaligned ld/st

9949

// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.

9950

// A big-endian target may also explicitly support unaligned accesses

9951

if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {

9952

if (Fast)

9953

*Fast = true;

9954

return true;

9955

}

9956

return false;

9957

}

9958

}

9959

}

9960

9961

static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,

9962

unsigned AlignCheck) {

9963

return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&

9964

(DstAlign == 0 || DstAlign % AlignCheck == 0));

9965

}

9966

9967

EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,

9968

unsigned DstAlign, unsigned SrcAlign,

9969

bool IsMemset, bool ZeroMemset,

9970

bool MemcpyStrSrc,

9971

MachineFunction &MF) const {

9972

const Function *F = MF.getFunction();

9973

9974

// See if we can use NEON instructions for this...

9975

if ((!IsMemset || ZeroMemset) &&

9976

Subtarget->hasNEON() &&

9977

!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,

9978

Attribute::NoImplicitFloat)) {

9979

bool Fast;

9980

if (Size >= 16 &&

9981

(memOpAlign(SrcAlign, DstAlign, 16) ||

9982

(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {

9983

return MVT::v2f64;

9984

} else if (Size >= 8 &&

9985

(memOpAlign(SrcAlign, DstAlign, 8) ||

9986

(allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&

9987

Fast))) {

9988

return MVT::f64;

9989

}

9990

}

9991

9992

// Lowering to i32/i16 if the size permits.

9993

if (Size >= 4)

9994

return MVT::i32;

9995

else if (Size >= 2)

9996

return MVT::i16;

9997

9998

// Let the target-independent logic figure it out.

9999

return MVT::Other;

10000

}

10001

10002

bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {

10003

if (Val.getOpcode() != ISD::LOAD)

10004

return false;

10005

10006

EVT VT1 = Val.getValueType();

10007

if (!VT1.isSimple() || !VT1.isInteger() ||

10008

!VT2.isSimple() || !VT2.isInteger())

10009

return false;

10010

10011

switch (VT1.getSimpleVT().SimpleTy) {

10012

default: break;

10013

case MVT::i1:

10014

case MVT::i8:

10015

case MVT::i16:

10016

// 8-bit and 16-bit loads implicitly zero-extend to 32-bits.

10017

return true;

10018

}

10019

10020

return false;

10021

}

10022

10023

bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {

10024

if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())

10025

return false;

10026

10027

if (!isTypeLegal(EVT::getEVT(Ty1)))

10028

return false;

10029

10030

assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop")((Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"
) ? static_cast<void> (0) : __assert_fail ("Ty1->getPrimitiveSizeInBits() <= 64 && \"i128 is probably not a noop\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10030, __PRETTY_FUNCTION__));

10031

10032

// Assuming the caller doesn't have a zeroext or signext return parameter,

10033

// truncation all the way down to i1 is valid.

10034

return true;

10035

}

10036

10037

10038

static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {

10039

if (V < 0)

10040

return false;

10041

10042

unsigned Scale = 1;

10043

switch (VT.getSimpleVT().SimpleTy) {

10044

default: return false;

10045

case MVT::i1:

10046

case MVT::i8:

10047

// Scale == 1;

10048

break;

10049

case MVT::i16:

10050

// Scale == 2;

10051

Scale = 2;

10052

break;

10053

case MVT::i32:

10054

// Scale == 4;

10055

Scale = 4;

10056

break;

10057

}

10058

10059

if ((V & (Scale - 1)) != 0)

10060

return false;

10061

V /= Scale;

10062

return V == (V & ((1LL << 5) - 1));

10063

}

10064

10065

static bool isLegalT2AddressImmediate(int64_t V, EVT VT,

10066

const ARMSubtarget *Subtarget) {

10067

bool isNeg = false;

10068

if (V < 0) {

10069

isNeg = true;

10070

V = - V;

10071

}

10072

10073

switch (VT.getSimpleVT().SimpleTy) {

10074

default: return false;

10075

case MVT::i1:

10076

case MVT::i8:

10077

case MVT::i16:

10078

case MVT::i32:

10079

// + imm12 or - imm8

10080

if (isNeg)

10081

return V == (V & ((1LL << 8) - 1));

10082

return V == (V & ((1LL << 12) - 1));

10083

case MVT::f32:

10084

case MVT::f64:

10085

// Same as ARM mode. FIXME: NEON?

10086

if (!Subtarget->hasVFP2())

10087

return false;

10088

if ((V & 3) != 0)

10089

return false;

10090

V >>= 2;

10091

return V == (V & ((1LL << 8) - 1));

10092

}

10093

}

10094

10095

/// isLegalAddressImmediate - Return true if the integer value can be used

10096

/// as the offset of the target addressing mode for load / store of the

10097

/// given type.

10098

static bool isLegalAddressImmediate(int64_t V, EVT VT,

10099

const ARMSubtarget *Subtarget) {

10100

if (V == 0)

10101

return true;

10102

10103

if (!VT.isSimple())

10104

return false;

10105

10106

if (Subtarget->isThumb1Only())

10107

return isLegalT1AddressImmediate(V, VT);

10108

else if (Subtarget->isThumb2())

10109

return isLegalT2AddressImmediate(V, VT, Subtarget);

10110

10111

// ARM mode.

10112

if (V < 0)

10113

V = - V;

10114

switch (VT.getSimpleVT().SimpleTy) {

10115

default: return false;

10116

case MVT::i1:

10117

case MVT::i8:

10118

case MVT::i32:

10119

// +- imm12

10120

return V == (V & ((1LL << 12) - 1));

10121

case MVT::i16:

10122

// +- imm8

10123

return V == (V & ((1LL << 8) - 1));

10124

case MVT::f32:

10125

case MVT::f64:

10126

if (!Subtarget->hasVFP2()) // FIXME: NEON?

10127

return false;

10128

if ((V & 3) != 0)

10129

return false;

10130

V >>= 2;

10131

return V == (V & ((1LL << 8) - 1));

10132

}

10133

}

10134

10135

bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,

10136

EVT VT) const {

10137

int Scale = AM.Scale;

10138

if (Scale < 0)

10139

return false;

10140

10141

switch (VT.getSimpleVT().SimpleTy) {

10142

default: return false;

10143

case MVT::i1:

10144

case MVT::i8:

10145

case MVT::i16:

10146

case MVT::i32:

10147

if (Scale == 1)

10148

return true;

10149

// r + r << imm

10150

Scale = Scale & ~1;

10151

return Scale == 2 || Scale == 4 || Scale == 8;

10152

case MVT::i64:

10153

// r + r

10154

if (((unsigned)AM.HasBaseReg + Scale) <= 2)

10155

return true;

10156

return false;

10157

case MVT::isVoid:

10158

// Note, we allow "void" uses (basically, uses that aren't loads or

10159

// stores), because arm allows folding a scale into many arithmetic

10160

// operations. This should be made more precise and revisited later.

10161

10162

// Allow r << imm, but the imm has to be a multiple of two.

10163

if (Scale & 1) return false;

10164

return isPowerOf2_32(Scale);

10165

}

10166

}

10167

10168

/// isLegalAddressingMode - Return true if the addressing mode represented

10169

/// by AM is legal for this target, for a load/store of the specified type.

10170

bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,

10171

Type *Ty) const {

10172

EVT VT = getValueType(Ty, true);

10173

if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))

10174

return false;

10175

10176

// Can never fold addr of global into load/store.

10177

if (AM.BaseGV)

10178

return false;

10179

10180

switch (AM.Scale) {

10181

case 0: // no scale reg, must be "r+i" or "r", or "i".

10182

break;

10183

case 1:

10184

if (Subtarget->isThumb1Only())

10185

return false;

10186

// FALL THROUGH.

10187

default:

10188

// ARM doesn't support any R+R*scale+imm addr modes.

10189

if (AM.BaseOffs)

10190

return false;

10191

10192

if (!VT.isSimple())

10193

return false;

10194

10195

if (Subtarget->isThumb2())

10196

return isLegalT2ScaledAddressingMode(AM, VT);

10197

10198

int Scale = AM.Scale;

10199

switch (VT.getSimpleVT().SimpleTy) {

10200

default: return false;

10201

case MVT::i1:

10202

case MVT::i8:

10203

case MVT::i32:

10204

if (Scale < 0) Scale = -Scale;

10205

if (Scale == 1)

10206

return true;

10207

// r + r << imm

10208

return isPowerOf2_32(Scale & ~1);

10209

case MVT::i16:

10210

case MVT::i64:

10211

// r + r

10212

if (((unsigned)AM.HasBaseReg + Scale) <= 2)

10213

return true;

10214

return false;

10215

10216

case MVT::isVoid:

10217

// Note, we allow "void" uses (basically, uses that aren't loads or

10218

// stores), because arm allows folding a scale into many arithmetic

10219

// operations. This should be made more precise and revisited later.

10220

10221

// Allow r << imm, but the imm has to be a multiple of two.

10222

if (Scale & 1) return false;

10223

return isPowerOf2_32(Scale);

10224

}

10225

}

10226

return true;

10227

}

10228

10229

/// isLegalICmpImmediate - Return true if the specified immediate is legal

10230

/// icmp immediate, that is the target has icmp instructions which can compare

10231

/// a register against the immediate without having to materialize the

10232

/// immediate into a register.

10233

bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

10234

// Thumb2 and ARM modes can use cmn for negative immediates.

10235

if (!Subtarget->isThumb())

10236

return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;

10237

if (Subtarget->isThumb2())

10238

return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;

10239

// Thumb1 doesn't have cmn, and only 8-bit immediates.

10240

return Imm >= 0 && Imm <= 255;

10241

}

10242

10243

/// isLegalAddImmediate - Return true if the specified immediate is a legal add

10244

/// *or sub* immediate, that is the target has add or sub instructions which can

10245

/// add a register with the immediate without having to materialize the

10246

/// immediate into a register.

10247

bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {

10248

// Same encoding for add/sub, just flip the sign.

10249

int64_t AbsImm = llvm::abs64(Imm);

10250

if (!Subtarget->isThumb())

10251

return ARM_AM::getSOImmVal(AbsImm) != -1;

10252

if (Subtarget->isThumb2())

10253

return ARM_AM::getT2SOImmVal(AbsImm) != -1;

10254

// Thumb1 only has 8-bit unsigned immediate.

10255

return AbsImm >= 0 && AbsImm <= 255;

10256

}

10257

10258

static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,

10259

bool isSEXTLoad, SDValue &Base,

10260

SDValue &Offset, bool &isInc,

10261

SelectionDAG &DAG) {

10262

if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)

10263

return false;

10264

10265

if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {

10266

// AddressingMode 3

10267

Base = Ptr->getOperand(0);

10268

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

10269

int RHSC = (int)RHS->getZExtValue();

10270

if (RHSC < 0 && RHSC > -256) {

10271

assert(Ptr->getOpcode() == ISD::ADD)((Ptr->getOpcode() == ISD::ADD) ? static_cast<void> (
0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10271, __PRETTY_FUNCTION__));

10272

isInc = false;

10273

Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));

10274

return true;

10275

}

10276

}

10277

isInc = (Ptr->getOpcode() == ISD::ADD);

10278

Offset = Ptr->getOperand(1);

10279

return true;

10280

} else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {

10281

// AddressingMode 2

10282

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

10283

int RHSC = (int)RHS->getZExtValue();

10284

if (RHSC < 0 && RHSC > -0x1000) {

10285

10286

isInc = false;

10287

Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));

10288

Base = Ptr->getOperand(0);

10289

return true;

10290

}

10291

}

10292

10293

if (Ptr->getOpcode() == ISD::ADD) {

10294

isInc = true;

10295

ARM_AM::ShiftOpc ShOpcVal=

10296

ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());

10297

if (ShOpcVal != ARM_AM::no_shift) {

10298

Base = Ptr->getOperand(1);

10299

Offset = Ptr->getOperand(0);

10300

} else {

10301

Base = Ptr->getOperand(0);

10302

Offset = Ptr->getOperand(1);

10303

}

10304

return true;

10305

}

10306

10307

isInc = (Ptr->getOpcode() == ISD::ADD);

10308

Base = Ptr->getOperand(0);

10309

Offset = Ptr->getOperand(1);

10310

return true;

10311

}

10312

10313

// FIXME: Use VLDM / VSTM to emulate indexed FP load / store.

10314

return false;

10315

}

10316

10317

static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,

10318

bool isSEXTLoad, SDValue &Base,

10319

SDValue &Offset, bool &isInc,

10320

SelectionDAG &DAG) {

10321

if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)

10322

return false;

10323

10324

Base = Ptr->getOperand(0);

10325

if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

10326

int RHSC = (int)RHS->getZExtValue();

10327

if (RHSC < 0 && RHSC > -0x100) { // 8 bits.

10328

10329

isInc = false;

10330

Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));

10331

return true;

10332

} else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.

10333

isInc = Ptr->getOpcode() == ISD::ADD;

10334

Offset = DAG.getConstant(RHSC, RHS->getValueType(0));

10335

return true;

10336

}

10337

}

10338

10339

return false;

10340

}

10341

10342

/// getPreIndexedAddressParts - returns true by value, base pointer and

10343

/// offset pointer and addressing mode by reference if the node's address

10344

/// can be legally represented as pre-indexed load / store address.

10345

bool

10346

ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

10347

SDValue &Offset,

10348

ISD::MemIndexedMode &AM,

10349

SelectionDAG &DAG) const {

10350

if (Subtarget->isThumb1Only())

10351

return false;

10352

10353

EVT VT;

10354

SDValue Ptr;

10355

bool isSEXTLoad = false;

10356

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

10357

Ptr = LD->getBasePtr();

10358

VT = LD->getMemoryVT();

10359

isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

10360

} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

10361

Ptr = ST->getBasePtr();

10362

VT = ST->getMemoryVT();

10363

} else

10364

return false;

10365

10366

bool isInc;

10367

bool isLegal = false;

10368

if (Subtarget->isThumb2())

10369

isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,

10370

Offset, isInc, DAG);

10371

else

10372

isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,

10373

Offset, isInc, DAG);

10374

if (!isLegal)

10375

return false;

10376

10377

AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;

10378

return true;

10379

}

10380

10381

/// getPostIndexedAddressParts - returns true by value, base pointer and

10382

/// offset pointer and addressing mode by reference if this node can be

10383

/// combined with a load / store to form a post-indexed load / store.

10384

bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,

10385

SDValue &Base,

10386

SDValue &Offset,

10387

ISD::MemIndexedMode &AM,

10388

SelectionDAG &DAG) const {

10389

if (Subtarget->isThumb1Only())

10390

return false;

10391

10392

EVT VT;

10393

SDValue Ptr;

10394

bool isSEXTLoad = false;

10395

if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

10396

VT = LD->getMemoryVT();

10397

Ptr = LD->getBasePtr();

10398

isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

10399

} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

10400

VT = ST->getMemoryVT();

10401

Ptr = ST->getBasePtr();

10402

} else

10403

return false;

10404

10405

bool isInc;

10406

bool isLegal = false;

10407

if (Subtarget->isThumb2())

10408

isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,

10409

isInc, DAG);

10410

else

10411

isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,

10412

isInc, DAG);

10413

if (!isLegal)

10414

return false;

10415

10416

if (Ptr != Base) {

10417

// Swap base ptr and offset to catch more post-index load / store when

10418

// it's legal. In Thumb2 mode, offset must be an immediate.

10419

if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&

10420

!Subtarget->isThumb2())

10421

std::swap(Base, Offset);

10422

10423

// Post-indexed load / store update the base pointer.

10424

if (Ptr != Base)

10425

return false;

10426

}

10427

10428

AM = isInc ? ISD::POST_INC : ISD::POST_DEC;

10429

return true;

10430

}

10431

10432

void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

10433

APInt &KnownZero,

10434

APInt &KnownOne,

10435

const SelectionDAG &DAG,

10436

unsigned Depth) const {

10437

unsigned BitWidth = KnownOne.getBitWidth();

10438

KnownZero = KnownOne = APInt(BitWidth, 0);

10439

switch (Op.getOpcode()) {

10440

default: break;

10441

case ARMISD::ADDC:

10442

case ARMISD::ADDE:

10443

case ARMISD::SUBC:

10444

case ARMISD::SUBE:

10445

// These nodes' second result is a boolean

10446

if (Op.getResNo() == 0)

10447

break;

10448

KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);

10449

break;

10450

case ARMISD::CMOV: {

10451

// Bits are known zero/one if known on the LHS and RHS.

10452

DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);

10453

if (KnownZero == 0 && KnownOne == 0) return;

10454

10455

APInt KnownZeroRHS, KnownOneRHS;

10456

DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);

10457

KnownZero &= KnownZeroRHS;

10458

KnownOne &= KnownOneRHS;

10459

return;

10460

}

10461

case ISD::INTRINSIC_W_CHAIN: {

10462

ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));

10463

Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());

10464

switch (IntID) {

10465

default: return;

10466

case Intrinsic::arm_ldaex:

10467

case Intrinsic::arm_ldrex: {

10468

EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();

10469

unsigned MemBits = VT.getScalarType().getSizeInBits();

10470

KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);

10471

return;

10472

}

10473

}

10474

}

10475

}

10476

}

10477

10478

//===----------------------------------------------------------------------===//

10479

// ARM Inline Assembly Support

10480

//===----------------------------------------------------------------------===//

10481

10482

bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {

10483

// Looking for "rev" which is V6+.

10484

if (!Subtarget->hasV6Ops())

10485

return false;

10486

10487

InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());

10488

std::string AsmStr = IA->getAsmString();

10489

SmallVector<StringRef, 4> AsmPieces;

10490

SplitString(AsmStr, AsmPieces, ";\n");

10491

10492

switch (AsmPieces.size()) {

10493

default: return false;

10494

case 1:

10495

AsmStr = AsmPieces[0];

10496

AsmPieces.clear();

10497

SplitString(AsmStr, AsmPieces, " \t,");

10498

10499

// rev $0, $1

10500

if (AsmPieces.size() == 3 &&

10501

AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&

10502

IA->getConstraintString().compare(0, 4, "=l,l") == 0) {

10503

IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());

10504

if (Ty && Ty->getBitWidth() == 32)

10505

return IntrinsicLowering::LowerToByteSwap(CI);

10506

}

10507

break;

10508

}

10509

10510

return false;

10511

}

10512

10513

/// getConstraintType - Given a constraint letter, return the type of

10514

/// constraint it is for this target.

10515

ARMTargetLowering::ConstraintType

10516

ARMTargetLowering::getConstraintType(const std::string &Constraint) const {

10517

if (Constraint.size() == 1) {

10518

switch (Constraint[0]) {

10519

default: break;

10520

case 'l': return C_RegisterClass;

10521

case 'w': return C_RegisterClass;

10522

case 'h': return C_RegisterClass;

10523

case 'x': return C_RegisterClass;

10524

case 't': return C_RegisterClass;

10525

case 'j': return C_Other; // Constant for movw.

10526

// An address with a single base register. Due to the way we

10527

// currently handle addresses it is the same as an 'r' memory constraint.

10528

case 'Q': return C_Memory;

10529

}

10530

} else if (Constraint.size() == 2) {

10531

switch (Constraint[0]) {

10532

default: break;

10533

// All 'U+' constraints are addresses.

10534

case 'U': return C_Memory;

10535

}

10536

}

10537

return TargetLowering::getConstraintType(Constraint);

10538

}

10539

10540

/// Examine constraint type and operand type and determine a weight value.

10541

/// This object must already have been set up with the operand type

10542

/// and the current alternative constraint selected.

10543

TargetLowering::ConstraintWeight

10544

ARMTargetLowering::getSingleConstraintMatchWeight(

10545

AsmOperandInfo &info, const char *constraint) const {

10546

ConstraintWeight weight = CW_Invalid;

10547

Value *CallOperandVal = info.CallOperandVal;

10548

// If we don't have a value, we can't do a match,

10549

// but allow it at the lowest weight.

10550

if (!CallOperandVal)

10551

return CW_Default;

10552

Type *type = CallOperandVal->getType();

10553

// Look at the constraint type.

10554

switch (*constraint) {

10555

default:

10556

weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);

10557

break;

10558

case 'l':

10559

if (type->isIntegerTy()) {

10560

if (Subtarget->isThumb())

10561

weight = CW_SpecificReg;

10562

else

10563

weight = CW_Register;

10564

}

10565

break;

10566

case 'w':

10567

if (type->isFloatingPointTy())

10568

weight = CW_Register;

10569

break;

10570

}

10571

return weight;

10572

}

10573

10574

typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;

10575

RCPair

10576

ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,

10577

MVT VT) const {

10578

if (Constraint.size() == 1) {

10579

// GCC ARM Constraint Letters

10580

switch (Constraint[0]) {

10581

case 'l': // Low regs or general regs.

10582

if (Subtarget->isThumb())

10583

return RCPair(0U, &ARM::tGPRRegClass);

10584

return RCPair(0U, &ARM::GPRRegClass);

10585

case 'h': // High regs or no regs.

10586

if (Subtarget->isThumb())

10587

return RCPair(0U, &ARM::hGPRRegClass);

10588

break;

10589

case 'r':

10590

if (Subtarget->isThumb1Only())

10591

return RCPair(0U, &ARM::tGPRRegClass);

10592

return RCPair(0U, &ARM::GPRRegClass);

10593

case 'w':

10594

if (VT == MVT::Other)

10595

break;

10596

if (VT == MVT::f32)

10597

return RCPair(0U, &ARM::SPRRegClass);

10598

if (VT.getSizeInBits() == 64)

10599

return RCPair(0U, &ARM::DPRRegClass);

10600

if (VT.getSizeInBits() == 128)

10601

return RCPair(0U, &ARM::QPRRegClass);

10602

break;

10603

case 'x':

10604

if (VT == MVT::Other)

10605

break;

10606

if (VT == MVT::f32)

10607

return RCPair(0U, &ARM::SPR_8RegClass);

10608

if (VT.getSizeInBits() == 64)

10609

return RCPair(0U, &ARM::DPR_8RegClass);

10610

if (VT.getSizeInBits() == 128)

10611

return RCPair(0U, &ARM::QPR_8RegClass);

10612

break;

10613

case 't':

10614

if (VT == MVT::f32)

10615

return RCPair(0U, &ARM::SPRRegClass);

10616

break;

10617

}

10618

}

10619

if (StringRef("{cc}").equals_lower(Constraint))

10620

return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);

10621

10622

return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);

10623

}

10624

10625

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

10626

/// vector. If it is invalid, don't add anything to Ops.

10627

void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

10628

std::string &Constraint,

10629

std::vector<SDValue>&Ops,

10630

SelectionDAG &DAG) const {

10631

SDValue Result;

10632

10633

// Currently only support length 1 constraints.

10634

if (Constraint.length() != 1) return;

10635

10636

char ConstraintLetter = Constraint[0];

10637

switch (ConstraintLetter) {

10638

default: break;

10639

case 'j':

10640

case 'I': case 'J': case 'K': case 'L':

10641

case 'M': case 'N': case 'O':

10642

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

10643

if (!C)

10644

return;

10645

10646

int64_t CVal64 = C->getSExtValue();

10647

int CVal = (int) CVal64;

10648

// None of these constraints allow values larger than 32 bits. Check

10649

// that the value fits in an int.

10650

if (CVal != CVal64)

10651

return;

10652

10653

switch (ConstraintLetter) {

10654

case 'j':

10655

// Constant suitable for movw, must be between 0 and

10656

// 65535.

10657

if (Subtarget->hasV6T2Ops())

10658

if (CVal >= 0 && CVal <= 65535)

10659

break;

10660

return;

10661

case 'I':

10662

if (Subtarget->isThumb1Only()) {

10663

// This must be a constant between 0 and 255, for ADD

10664

// immediates.

10665

if (CVal >= 0 && CVal <= 255)

10666

break;

10667

} else if (Subtarget->isThumb2()) {

10668

// A constant that can be used as an immediate value in a

10669

// data-processing instruction.

10670

if (ARM_AM::getT2SOImmVal(CVal) != -1)

10671

break;

10672

} else {

10673

// A constant that can be used as an immediate value in a

10674

// data-processing instruction.

10675

if (ARM_AM::getSOImmVal(CVal) != -1)

10676

break;

10677

}

10678

return;

10679

10680

case 'J':

10681

if (Subtarget->isThumb()) { // FIXME thumb2

10682

// This must be a constant between -255 and -1, for negated ADD

10683

// immediates. This can be used in GCC with an "n" modifier that

10684

// prints the negated value, for use with SUB instructions. It is

10685

// not useful otherwise but is implemented for compatibility.

10686

if (CVal >= -255 && CVal <= -1)

10687

break;

10688

} else {

10689

// This must be a constant between -4095 and 4095. It is not clear

10690

// what this constraint is intended for. Implemented for

10691

// compatibility with GCC.

10692

if (CVal >= -4095 && CVal <= 4095)

10693

break;

10694

}

10695

return;

10696

10697

case 'K':

10698

if (Subtarget->isThumb1Only()) {

10699

// A 32-bit value where only one byte has a nonzero value. Exclude

10700

// zero to match GCC. This constraint is used by GCC internally for

10701

// constants that can be loaded with a move/shift combination.

10702

// It is not useful otherwise but is implemented for compatibility.

10703

if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))

10704

break;

10705

} else if (Subtarget->isThumb2()) {

10706

// A constant whose bitwise inverse can be used as an immediate

10707

// value in a data-processing instruction. This can be used in GCC

10708

// with a "B" modifier that prints the inverted value, for use with

10709

// BIC and MVN instructions. It is not useful otherwise but is

10710

// implemented for compatibility.

10711

if (ARM_AM::getT2SOImmVal(~CVal) != -1)

10712

break;

10713

} else {

10714

// A constant whose bitwise inverse can be used as an immediate

10715

// value in a data-processing instruction. This can be used in GCC

10716

// with a "B" modifier that prints the inverted value, for use with

10717

// BIC and MVN instructions. It is not useful otherwise but is

10718

// implemented for compatibility.

10719

if (ARM_AM::getSOImmVal(~CVal) != -1)

10720

break;

10721

}

10722

return;

10723

10724

case 'L':

10725

if (Subtarget->isThumb1Only()) {

10726

// This must be a constant between -7 and 7,

10727

// for 3-operand ADD/SUB immediate instructions.

10728

if (CVal >= -7 && CVal < 7)

10729

break;

10730

} else if (Subtarget->isThumb2()) {

10731

// A constant whose negation can be used as an immediate value in a

10732

// data-processing instruction. This can be used in GCC with an "n"

10733

// modifier that prints the negated value, for use with SUB

10734

// instructions. It is not useful otherwise but is implemented for

10735

// compatibility.

10736

if (ARM_AM::getT2SOImmVal(-CVal) != -1)

10737

break;

10738

} else {

10739

// A constant whose negation can be used as an immediate value in a

10740

// data-processing instruction. This can be used in GCC with an "n"

10741

// modifier that prints the negated value, for use with SUB

10742

// instructions. It is not useful otherwise but is implemented for

10743

// compatibility.

10744

if (ARM_AM::getSOImmVal(-CVal) != -1)

10745

break;

10746

}

10747

return;

10748

10749

case 'M':

10750

if (Subtarget->isThumb()) { // FIXME thumb2

10751

// This must be a multiple of 4 between 0 and 1020, for

10752

// ADD sp + immediate.

10753

if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))

10754

break;

10755

} else {

10756

// A power of two or a constant between 0 and 32. This is used in

10757

// GCC for the shift amount on shifted register operands, but it is

10758

// useful in general for any shift amounts.

10759

if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))

10760

break;

10761

}

10762

return;

10763

10764

case 'N':

10765

if (Subtarget->isThumb()) { // FIXME thumb2

10766

// This must be a constant between 0 and 31, for shift amounts.

10767

if (CVal >= 0 && CVal <= 31)

10768

break;

10769

}

10770

return;

10771

10772

case 'O':

10773

if (Subtarget->isThumb()) { // FIXME thumb2

10774

// This must be a multiple of 4 between -508 and 508, for

10775

// ADD/SUB sp = sp + immediate.

10776

if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))

10777

break;

10778

}

10779

return;

10780

}

10781

Result = DAG.getTargetConstant(CVal, Op.getValueType());

10782

break;

10783

}

10784

10785

if (Result.getNode()) {

10786

Ops.push_back(Result);

10787

return;

10788

}

10789

return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

10790

}

10791

10792

SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {

10793

assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only")((Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetAEABI() && \"Register-based DivRem lowering only\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10793, __PRETTY_FUNCTION__));

10794

unsigned Opcode = Op->getOpcode();

10795

assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&(((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering") ? static_cast<void
> (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10796, __PRETTY_FUNCTION__))

10796

"Invalid opcode for Div/Rem lowering")(((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering") ? static_cast<void
> (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10796, __PRETTY_FUNCTION__));

10797

bool isSigned = (Opcode == ISD::SDIVREM);

10798

EVT VT = Op->getValueType(0);

10799

Type *Ty = VT.getTypeForEVT(*DAG.getContext());

10800

10801

RTLIB::Libcall LC;

10802

switch (VT.getSimpleVT().SimpleTy) {

10803

default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10803);

10804

case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;

10805

case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;

10806

case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;

10807

case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;

10808

}

10809

10810

SDValue InChain = DAG.getEntryNode();

10811

10812

TargetLowering::ArgListTy Args;

10813

TargetLowering::ArgListEntry Entry;

10814

for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {

10815

EVT ArgVT = Op->getOperand(i).getValueType();

10816

Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());

10817

Entry.Node = Op->getOperand(i);

10818

Entry.Ty = ArgTy;

10819

Entry.isSExt = isSigned;

10820

Entry.isZExt = !isSigned;

10821

Args.push_back(Entry);

10822

}

10823

10824

SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

10825

getPointerTy());

10826

10827

Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);

10828

10829

SDLoc dl(Op);

10830

TargetLowering::CallLoweringInfo CLI(DAG);

10831

CLI.setDebugLoc(dl).setChain(InChain)

10832

.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)

10833

.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);

10834

10835

std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);

10836

return CallInfo.first;

10837

}

10838

10839

SDValue

10840

ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {

10841

assert(Subtarget->isTargetWindows() && "unsupported target platform")((Subtarget->isTargetWindows() && "unsupported target platform"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"unsupported target platform\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10841, __PRETTY_FUNCTION__));

10842

SDLoc DL(Op);

10843

10844

// Get the inputs.

10845

SDValue Chain = Op.getOperand(0);

10846

SDValue Size = Op.getOperand(1);

10847

10848

SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,

10849

DAG.getConstant(2, MVT::i32));

10850

10851

SDValue Flag;

10852

Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);

10853

Flag = Chain.getValue(1);

10854

10855

SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

10856

Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);

10857

10858

SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);

10859

Chain = NewSP.getValue(1);

10860

10861

SDValue Ops[2] = { NewSP, Chain };

10862

return DAG.getMergeValues(Ops, DL);

10863

}

10864

10865

SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

10866

assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&((Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP
() && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10867, __PRETTY_FUNCTION__))

10867

"Unexpected type for custom-lowering FP_EXTEND")((Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP
() && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10867, __PRETTY_FUNCTION__));

10868

10869

RTLIB::Libcall LC;

10870

LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());

10871

10872

SDValue SrcVal = Op.getOperand(0);

10873

return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,

10874

/*isSigned*/ false, SDLoc(Op)).first;

10875

}

10876

10877

SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {

10878

assert(Op.getOperand(0).getValueType() == MVT::f64 &&((Op.getOperand(0).getValueType() == MVT::f64 && Subtarget
->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && \"Unexpected type for custom-lowering FP_ROUND\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10880, __PRETTY_FUNCTION__))

10879

Subtarget->isFPOnlySP() &&((Op.getOperand(0).getValueType() == MVT::f64 && Subtarget
->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && \"Unexpected type for custom-lowering FP_ROUND\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10880, __PRETTY_FUNCTION__))

10880

"Unexpected type for custom-lowering FP_ROUND")((Op.getOperand(0).getValueType() == MVT::f64 && Subtarget
->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && \"Unexpected type for custom-lowering FP_ROUND\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 10880, __PRETTY_FUNCTION__));

10881

10882

RTLIB::Libcall LC;

10883

LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());

10884

10885

SDValue SrcVal = Op.getOperand(0);

10886

return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,

10887

/*isSigned*/ false, SDLoc(Op)).first;

10888

}

10889

10890

bool

10891

ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

10892

// The ARM target isn't yet aware of offsets.

10893

return false;

10894

}

10895

10896

bool ARM::isBitFieldInvertedMask(unsigned v) {

10897

if (v == 0xffffffff)

10898

return false;

10899

10900

// there can be 1's on either or both "outsides", all the "inside"

10901

// bits must be 0's

10902

unsigned TO = CountTrailingOnes_32(v);

10903

unsigned LO = CountLeadingOnes_32(v);

10904

v = (v >> TO) << TO;

10905

v = (v << LO) >> LO;

10906

return v == 0;

10907

}

10908

10909

/// isFPImmLegal - Returns true if the target can instruction select the

10910

/// specified FP immediate natively. If false, the legalizer will

10911

/// materialize the FP immediate as a load from a constant pool.

10912

bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {

10913

if (!Subtarget->hasVFP3())

10914

return false;

10915

if (VT == MVT::f32)

10916

return ARM_AM::getFP32Imm(Imm) != -1;

10917

if (VT == MVT::f64 && !Subtarget->isFPOnlySP())

10918

return ARM_AM::getFP64Imm(Imm) != -1;

10919

return false;

10920

}

10921

10922

/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as

10923

/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment

10924

/// specified in the intrinsic calls.

10925

bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

10926

const CallInst &I,

10927

unsigned Intrinsic) const {

10928

switch (Intrinsic) {

10929

case Intrinsic::arm_neon_vld1:

10930

case Intrinsic::arm_neon_vld2:

10931

case Intrinsic::arm_neon_vld3:

10932

case Intrinsic::arm_neon_vld4:

10933

case Intrinsic::arm_neon_vld2lane:

10934

case Intrinsic::arm_neon_vld3lane:

10935

case Intrinsic::arm_neon_vld4lane: {

10936

Info.opc = ISD::INTRINSIC_W_CHAIN;

10937

// Conservatively set memVT to the entire set of vectors loaded.

10938

uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;

10939

Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

10940

Info.ptrVal = I.getArgOperand(0);

10941

Info.offset = 0;

10942

Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);

10943

Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();

10944

Info.vol = false; // volatile loads with NEON intrinsics not supported

10945

Info.readMem = true;

10946

Info.writeMem = false;

10947

return true;

10948

}

10949

case Intrinsic::arm_neon_vst1:

10950

case Intrinsic::arm_neon_vst2:

10951

case Intrinsic::arm_neon_vst3:

10952

case Intrinsic::arm_neon_vst4:

10953

case Intrinsic::arm_neon_vst2lane:

10954

case Intrinsic::arm_neon_vst3lane:

10955

case Intrinsic::arm_neon_vst4lane: {

10956

Info.opc = ISD::INTRINSIC_VOID;

10957

// Conservatively set memVT to the entire set of vectors stored.

10958

unsigned NumElts = 0;

10959

for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {

10960

Type *ArgTy = I.getArgOperand(ArgI)->getType();

10961

if (!ArgTy->isVectorTy())

10962

break;

10963

NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;

10964

}

10965

Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

10966

Info.ptrVal = I.getArgOperand(0);

10967

Info.offset = 0;

10968

Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);

10969

Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();

10970

Info.vol = false; // volatile stores with NEON intrinsics not supported

10971

Info.readMem = false;

10972

Info.writeMem = true;

10973

return true;

10974

}

10975

case Intrinsic::arm_ldaex:

10976

case Intrinsic::arm_ldrex: {

10977

PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());

10978

Info.opc = ISD::INTRINSIC_W_CHAIN;

10979

Info.memVT = MVT::getVT(PtrTy->getElementType());

10980

Info.ptrVal = I.getArgOperand(0);

10981

Info.offset = 0;

10982

Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());

10983

Info.vol = true;

10984

Info.readMem = true;

10985

Info.writeMem = false;

10986

return true;

10987

}

10988

case Intrinsic::arm_stlex:

10989

case Intrinsic::arm_strex: {

10990

PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());

10991

Info.opc = ISD::INTRINSIC_W_CHAIN;

10992

Info.memVT = MVT::getVT(PtrTy->getElementType());

10993

Info.ptrVal = I.getArgOperand(1);

10994

Info.offset = 0;

10995

Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());

10996

Info.vol = true;

10997

Info.readMem = false;

10998

Info.writeMem = true;

10999

return true;

11000

}

11001

case Intrinsic::arm_stlexd:

11002

case Intrinsic::arm_strexd: {

11003

Info.opc = ISD::INTRINSIC_W_CHAIN;

11004

Info.memVT = MVT::i64;

11005

Info.ptrVal = I.getArgOperand(2);

11006

Info.offset = 0;

11007

Info.align = 8;

11008

Info.vol = true;

11009

Info.readMem = false;

11010

Info.writeMem = true;

11011

return true;

11012

}

11013

case Intrinsic::arm_ldaexd:

11014

case Intrinsic::arm_ldrexd: {

11015

Info.opc = ISD::INTRINSIC_W_CHAIN;

11016

Info.memVT = MVT::i64;

11017

Info.ptrVal = I.getArgOperand(0);

11018

Info.offset = 0;

11019

Info.align = 8;

11020

Info.vol = true;

11021

Info.readMem = true;

11022

Info.writeMem = false;

11023

return true;

11024

}

11025

default:

11026

break;

11027

}

11028

11029

return false;

11030

}

11031

11032

/// \brief Returns true if it is beneficial to convert a load of a constant

11033

/// to just the constant itself.

11034

bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

11035

Type *Ty) const {

11036

assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11036, __PRETTY_FUNCTION__));

11037

11038

unsigned Bits = Ty->getPrimitiveSizeInBits();

11039

if (Bits == 0 || Bits > 32)

11040

return false;

11041

return true;

11042

}

11043

11044

bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }

11045

11046

Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,

11047

ARM_MB::MemBOpt Domain) const {

11048

Module *M = Builder.GetInsertBlock()->getParent()->getParent();

11049

11050

// First, if the target has no DMB, see what fallback we can use.

11051

if (!Subtarget->hasDataBarrier()) {

11052

// Some ARMv6 cpus can support data barriers with an mcr instruction.

11053

// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get

11054

// here.

11055

if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {

11056

Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);

11057

Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),

11058

Builder.getInt32(0), Builder.getInt32(7),

11059

Builder.getInt32(10), Builder.getInt32(5)};

11060

return Builder.CreateCall(MCR, args);

11061

} else {

11062

// Instead of using barriers, atomic accesses on these subtargets use

11063

// libcalls.

11064

llvm_unreachable("makeDMB on a target so old that it has no barriers")::llvm::llvm_unreachable_internal("makeDMB on a target so old that it has no barriers"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11064);

11065

}

11066

} else {

11067

Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);

11068

// Only a full system barrier exists in the M-class architectures.

11069

Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;

11070

Constant *CDomain = Builder.getInt32(Domain);

11071

return Builder.CreateCall(DMB, CDomain);

11072

}

11073

}

11074

11075

// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html

11076

Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,

11077

AtomicOrdering Ord, bool IsStore,

11078

bool IsLoad) const {

11079

if (!getInsertFencesForAtomic())

11080

return nullptr;

11081

11082

switch (Ord) {

11083

case NotAtomic:

11084

case Unordered:

11085

llvm_unreachable("Invalid fence: unordered/non-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/non-atomic"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11085);

11086

case Monotonic:

11087

case Acquire:

11088

return nullptr; // Nothing to do

11089

case SequentiallyConsistent:

11090

if (!IsStore)

11091

return nullptr; // Nothing to do

11092

/*FALLTHROUGH*/

11093

case Release:

11094

case AcquireRelease:

11095

if (Subtarget->isSwift())

11096

return makeDMB(Builder, ARM_MB::ISHST);

11097

// FIXME: add a comment with a link to documentation justifying this.

11098

else

11099

return makeDMB(Builder, ARM_MB::ISH);

11100

}

11101

llvm_unreachable("Unknown fence ordering in emitLeadingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitLeadingFence"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11101);

11102

}

11103

11104

Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,

11105

AtomicOrdering Ord, bool IsStore,

11106

bool IsLoad) const {

11107

if (!getInsertFencesForAtomic())

11108

return nullptr;

11109

11110

switch (Ord) {

11111

case NotAtomic:

11112

case Unordered:

11113

llvm_unreachable("Invalid fence: unordered/not-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/not-atomic"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11113);

11114

case Monotonic:

11115

case Release:

11116

return nullptr; // Nothing to do

11117

case Acquire:

11118

case AcquireRelease:

11119

case SequentiallyConsistent:

11120

return makeDMB(Builder, ARM_MB::ISH);

11121

}

11122

llvm_unreachable("Unknown fence ordering in emitTrailingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitTrailingFence"
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11122);

11123

}

11124

11125

// Loads and stores less than 64-bits are already atomic; ones above that

11126

// are doomed anyway, so defer to the default libcall and blame the OS when

11127

// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit

11128

// anything for those.

11129

bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {

11130

unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();

11131

return (Size == 64) && !Subtarget->isMClass();

11132

}

11133

11134

// Loads and stores less than 64-bits are already atomic; ones above that

11135

// are doomed anyway, so defer to the default libcall and blame the OS when

11136

// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit

11137

// anything for those.

11138

// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that

11139

// guarantee, see DDI0406C ARM architecture reference manual,

11140

// sections A8.8.72-74 LDRD)

11141

bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {

11142

unsigned Size = LI->getType()->getPrimitiveSizeInBits();

11143

return (Size == 64) && !Subtarget->isMClass();

11144

}

11145

11146

// For the real atomic operations, we have ldrex/strex up to 32 bits,

11147

// and up to 64 bits on the non-M profiles

11148

bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

11149

unsigned Size = AI->getType()->getPrimitiveSizeInBits();

11150

return Size <= (Subtarget->isMClass() ? 32U : 64U);

11151

}

11152

11153

// This has so far only been implemented for MachO.

11154

bool ARMTargetLowering::useLoadStackGuardNode() const {

11155

return Subtarget->isTargetMachO();

11156

}

11157

11158

bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,

11159

unsigned &Cost) const {

11160

// If we do not have NEON, vector types are not natively supported.

11161

if (!Subtarget->hasNEON())

11162

return false;

11163

11164

// Floating point values and vector values map to the same register file.

11165

// Therefore, althought we could do a store extract of a vector type, this is

11166

// better to leave at float as we have more freedom in the addressing mode for

11167

// those.

11168

if (VectorTy->isFPOrFPVectorTy())

11169

return false;

11170

11171

// If the index is unknown at compile time, this is very expensive to lower

11172

// and it is not possible to combine the store with the extract.

11173

if (!isa<ConstantInt>(Idx))

11174

return false;

11175

11176

assert(VectorTy->isVectorTy() && "VectorTy is not a vector type")((VectorTy->isVectorTy() && "VectorTy is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VectorTy->isVectorTy() && \"VectorTy is not a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn224240/lib/Target/ARM/ARMISelLowering.cpp"
, 11176, __PRETTY_FUNCTION__));

11177

unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();

11178

// We can do a store + vector extract on any vector that fits perfectly in a D

11179

// or Q register.

11180

if (BitWidth == 64 || BitWidth == 128) {

11181

Cost = 0;

11182

return true;

11183

}

11184

return false;

11185

}

11186

11187

Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,

11188

AtomicOrdering Ord) const {

11189

Module *M = Builder.GetInsertBlock()->getParent()->getParent();

11190

Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();

11191

bool IsAcquire = isAtLeastAcquire(Ord);

11192

11193

// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd

11194

// intrinsic must return {i32, i32} and we have to recombine them into a

11195

// single i64 here.

11196

if (ValTy->getPrimitiveSizeInBits() == 64) {

11197

Intrinsic::ID Int =

11198

IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;

11199

Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);

11200

11201

Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));

11202

Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");

11203

11204

Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");

11205

Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");

11206

if (!Subtarget->isLittle())

11207

std::swap (Lo, Hi);

11208

Lo = Builder.CreateZExt(Lo, ValTy, "lo64");

11209

Hi = Builder.CreateZExt(Hi, ValTy, "hi64");

11210

return Builder.CreateOr(

11211

Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");

11212

}

11213

11214

Type *Tys[] = { Addr->getType() };

11215

Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;

11216

Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);

11217

11218

return Builder.CreateTruncOrBitCast(

11219

Builder.CreateCall(Ldrex, Addr),

11220

cast<PointerType>(Addr->getType())->getElementType());

11221

}

11222

11223

Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,

11224

Value *Addr,

11225

AtomicOrdering Ord) const {

11226

Module *M = Builder.GetInsertBlock()->getParent()->getParent();

11227

bool IsRelease = isAtLeastRelease(Ord);

11228

11229

// Since the intrinsics must have legal type, the i64 intrinsics take two

11230

// parameters: "i32, i32". We must marshal Val into the appropriate form

11231

// before the call.

11232

if (Val->getType()->getPrimitiveSizeInBits() == 64) {

11233

Intrinsic::ID Int =

11234

IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;

11235

Function *Strex = Intrinsic::getDeclaration(M, Int);

11236

Type *Int32Ty = Type::getInt32Ty(M->getContext());

11237

11238

Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");

11239

Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");

11240

if (!Subtarget->isLittle())

11241

std::swap (Lo, Hi);

11242

Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));

11243

return Builder.CreateCall3(Strex, Lo, Hi, Addr);

11244

}

11245

11246

Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;

11247

Type *Tys[] = { Addr->getType() };

11248

Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);

11249

11250

return Builder.CreateCall2(

11251

Strex, Builder.CreateZExtOrBitCast(

11252

Val, Strex->getFunctionType()->getParamType(0)),

11253

Addr);

11254

}

11255

11256

enum HABaseType {

11257

HA_UNKNOWN = 0,

11258

HA_FLOAT,

11259

HA_DOUBLE,

11260

HA_VECT64,

11261

HA_VECT128

11262

};

11263

11264

static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,

11265

uint64_t &Members) {

11266

if (const StructType *ST = dyn_cast<StructType>(Ty)) {

11267

for (unsigned i = 0; i < ST->getNumElements(); ++i) {

11268

uint64_t SubMembers = 0;

11269

if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))

11270

return false;

11271

Members += SubMembers;

11272

}

11273

} else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {

11274

uint64_t SubMembers = 0;

11275

if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))

11276

return false;

11277

Members += SubMembers * AT->getNumElements();

11278

} else if (Ty->isFloatTy()) {

11279

if (Base != HA_UNKNOWN && Base != HA_FLOAT)

11280

return false;

11281

Members = 1;

11282

Base = HA_FLOAT;

11283

} else if (Ty->isDoubleTy()) {

11284

if (Base != HA_UNKNOWN && Base != HA_DOUBLE)

11285

return false;

11286

Members = 1;

11287

Base = HA_DOUBLE;

11288

} else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {

11289

Members = 1;

11290

switch (Base) {

11291

case HA_FLOAT:

11292

case HA_DOUBLE:

11293

return false;

11294

case HA_VECT64:

11295

return VT->getBitWidth() == 64;

11296

case HA_VECT128:

11297

return VT->getBitWidth() == 128;

11298

case HA_UNKNOWN:

11299

switch (VT->getBitWidth()) {

11300

case 64:

11301

Base = HA_VECT64;

11302

return true;

11303

case 128:

11304

Base = HA_VECT128;

11305

return true;

11306

default:

11307

return false;

11308

}

11309

}

11310

}

11311

11312

return (Members > 0 && Members <= 4);

11313

}

11314

11315

/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.

11316

bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(

11317

Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {

11318

if (getEffectiveCallingConv(CallConv, isVarArg) !=

11319

CallingConv::ARM_AAPCS_VFP)

11320

return false;

11321

11322

HABaseType Base = HA_UNKNOWN;

11323

uint64_t Members = 0;

11324

bool result = isHomogeneousAggregate(Ty, Base, Members);

11325

DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "isHA: " << result <<
" "; Ty->dump(); } } while (0);

11326

return result;

11327

}