/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp

Bug Summary

File:	lib/Target/ARM/ARMFrameLowering.cpp
Location:	line 1756, column 9
Description:	Called C++ object pointer is null

Annotated Source Code

//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file contains the ARM implementation of TargetFrameLowering class.

//===----------------------------------------------------------------------===//

#include "ARMFrameLowering.h"

#include "ARMBaseInstrInfo.h"

#include "ARMBaseRegisterInfo.h"

#include "ARMConstantPoolValue.h"

#include "ARMMachineFunctionInfo.h"

#include "MCTargetDesc/ARMAddressingModes.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RegisterScavenging.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/Function.h"

#include "llvm/MC/MCContext.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Target/TargetOptions.h"

using namespace llvm;

static cl::opt<bool>

SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),

cl::desc("Align ARM NEON spills in prolog and epilog"));

static MachineBasicBlock::iterator

skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,

unsigned NumAlignedDPRCS2Regs);

ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)

: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),

STI(sti) {}

bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {

// iOS always has a FP for backtracking, force other targets to keep their FP

// when doing FastISel. The emitted code is currently superior, and in cases

// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.

return TargetFrameLowering::noFramePointerElim(MF) ||

MF.getSubtarget<ARMSubtarget>().useFastISel();

}

/// hasFP - Return true if the specified function should have a dedicated frame

/// pointer register. This is true if the function has variable sized allocas

/// or if frame pointer elimination is disabled.

bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {

const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

// iOS requires FP not to be clobbered for backtracing purpose.

if (STI.isTargetIOS())

return true;

const MachineFrameInfo *MFI = MF.getFrameInfo();

// Always eliminate non-leaf frame pointers.

return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&

MFI->hasCalls()) ||

RegInfo->needsStackRealignment(MF) ||

MFI->hasVarSizedObjects() ||

MFI->isFrameAddressTaken());

}

/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is

/// not required, we reserve argument space for call sites in the function

/// immediately on entry to the current function. This eliminates the need for

/// add/sub sp brackets around call sites. Returns true if the call frame is

/// included as part of the stack frame.

bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {

const MachineFrameInfo *FFI = MF.getFrameInfo();

unsigned CFSize = FFI->getMaxCallFrameSize();

// It's not always a good idea to include the call frame as part of the

// stack frame. ARM (especially Thumb) has small immediate offset to

// address the stack frame. So a large call frame can cause poor codegen

// and may even makes it impossible to scavenge a register.

if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12

return false;

return !MF.getFrameInfo()->hasVarSizedObjects();

}

/// canSimplifyCallFramePseudos - If there is a reserved call frame, the

/// call frame pseudos can be simplified. Unlike most targets, having a FP

/// is not sufficient here since we still may reference some objects via SP

/// even when FP is available in Thumb2 mode.

bool

ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {

return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();

}

100

static bool isCSRestore(MachineInstr *MI,

101

const ARMBaseInstrInfo &TII,

102

const MCPhysReg *CSRegs) {

103

// Integer spill area is handled with "pop".

104

if (isPopOpcode(MI->getOpcode())) {

105

// The first two operands are predicates. The last two are

106

// imp-def and imp-use of SP. Check everything in between.

107

for (int i = 5, e = MI->getNumOperands(); i != e; ++i)

108

if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))

109

return false;

110

return true;

111

}

112

if ((MI->getOpcode() == ARM::LDR_POST_IMM ||

113

MI->getOpcode() == ARM::LDR_POST_REG ||

114

MI->getOpcode() == ARM::t2LDR_POST) &&

115

isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&

116

MI->getOperand(1).getReg() == ARM::SP)

117

return true;

118

119

return false;

120

}

121

122

static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,

123

MachineBasicBlock::iterator &MBBI, DebugLoc dl,

124

const ARMBaseInstrInfo &TII, unsigned DestReg,

125

unsigned SrcReg, int NumBytes,

126

unsigned MIFlags = MachineInstr::NoFlags,

127

ARMCC::CondCodes Pred = ARMCC::AL,

128

unsigned PredReg = 0) {

129

if (isARM)

130

emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,

131

Pred, PredReg, TII, MIFlags);

132

else

133

emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,

134

Pred, PredReg, TII, MIFlags);

135

}

136

137

static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,

138

MachineBasicBlock::iterator &MBBI, DebugLoc dl,

139

const ARMBaseInstrInfo &TII, int NumBytes,

140

unsigned MIFlags = MachineInstr::NoFlags,

141

ARMCC::CondCodes Pred = ARMCC::AL,

142

unsigned PredReg = 0) {

143

emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,

144

MIFlags, Pred, PredReg);

145

}

146

147

static int sizeOfSPAdjustment(const MachineInstr *MI) {

148

int RegSize;

149

switch (MI->getOpcode()) {

150

case ARM::VSTMDDB_UPD:

151

RegSize = 8;

152

break;

153

case ARM::STMDB_UPD:

154

case ARM::t2STMDB_UPD:

155

RegSize = 4;

156

break;

157

case ARM::t2STR_PRE:

158

case ARM::STR_PRE_IMM:

159

return 4;

160

default:

161

llvm_unreachable("Unknown push or pop like instruction")::llvm::llvm_unreachable_internal("Unknown push or pop like instruction"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 161);

162

}

163

164

int count = 0;

165

// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+

166

// pred) so the list starts at 4.

167

for (int i = MI->getNumOperands() - 1; i >= 4; --i)

168

count += RegSize;

169

return count;

170

}

171

172

static bool WindowsRequiresStackProbe(const MachineFunction &MF,

173

size_t StackSizeInBytes) {

174

const MachineFrameInfo *MFI = MF.getFrameInfo();

175

const Function *F = MF.getFunction();

176

unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;

177

if (F->hasFnAttribute("stack-probe-size"))

178

F->getFnAttribute("stack-probe-size")

179

.getValueAsString()

180

.getAsInteger(0, StackProbeSize);

181

return StackSizeInBytes >= StackProbeSize;

182

}

183

184

namespace {

185

struct StackAdjustingInsts {

186

struct InstInfo {

187

MachineBasicBlock::iterator I;

188

unsigned SPAdjust;

189

bool BeforeFPSet;

190

};

191

192

SmallVector<InstInfo, 4> Insts;

193

194

void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,

195

bool BeforeFPSet = false) {

196

InstInfo Info = {I, SPAdjust, BeforeFPSet};

197

Insts.push_back(Info);

198

}

199

200

void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {

201

auto Info = std::find_if(Insts.begin(), Insts.end(),

202

[&](InstInfo &Info) { return Info.I == I; });

203

assert(Info != Insts.end() && "invalid sp adjusting instruction")((Info != Insts.end() && "invalid sp adjusting instruction"
) ? static_cast<void> (0) : __assert_fail ("Info != Insts.end() && \"invalid sp adjusting instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 203, __PRETTY_FUNCTION__));

204

Info->SPAdjust += ExtraBytes;

205

}

206

207

void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,

208

DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {

209

unsigned CFAOffset = 0;

210

for (auto &Info : Insts) {

211

if (HasFP && !Info.BeforeFPSet)

212

return;

213

214

CFAOffset -= Info.SPAdjust;

215

unsigned CFIIndex = MMI.addFrameInst(

216

MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));

217

BuildMI(MBB, std::next(Info.I), dl,

218

TII.get(TargetOpcode::CFI_INSTRUCTION))

219

.addCFIIndex(CFIIndex)

220

.setMIFlags(MachineInstr::FrameSetup);

221

}

222

}

223

};

224

}

225

226

/// Emit an instruction sequence that will align the address in

227

/// register Reg by zero-ing out the lower bits. For versions of the

228

/// architecture that support Neon, this must be done in a single

229

/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a

230

/// single instruction. That function only gets called when optimizing

231

/// spilling of D registers on a core with the Neon instruction set

232

/// present.

233

static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,

234

const TargetInstrInfo &TII,

235

MachineBasicBlock &MBB,

236

MachineBasicBlock::iterator MBBI,

237

DebugLoc DL, const unsigned Reg,

238

const unsigned Alignment,

239

const bool MustBeSingleInstruction) {

240

const ARMSubtarget &AST =

241

static_cast<const ARMSubtarget &>(MF.getSubtarget());

242

const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();

243

const unsigned AlignMask = Alignment - 1;

244

const unsigned NrBitsToZero = countTrailingZeros(Alignment);

245

assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported")((!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Thumb1 not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 245, __PRETTY_FUNCTION__));

246

if (!AFI->isThumbFunction()) {

247

// if the BFC instruction is available, use that to zero the lower

248

// bits:

249

// bfc Reg, #0, log2(Alignment)

250

// otherwise use BIC, if the mask to zero the required number of bits

251

// can be encoded in the bic immediate field

252

// bic Reg, Reg, Alignment-1

253

// otherwise, emit

254

// lsr Reg, Reg, log2(Alignment)

255

// lsl Reg, Reg, log2(Alignment)

256

if (CanUseBFC) {

257

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)

258

.addReg(Reg, RegState::Kill)

259

.addImm(~AlignMask));

260

} else if (AlignMask <= 255) {

261

AddDefaultCC(

262

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)

263

.addReg(Reg, RegState::Kill)

264

.addImm(AlignMask)));

265

} else {

266

assert(!MustBeSingleInstruction &&((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 269, __PRETTY_FUNCTION__))

267

"Shouldn't call emitAligningInstructions demanding a single "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 269, __PRETTY_FUNCTION__))

268

"instruction to be emitted for large stack alignment for a target "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 269, __PRETTY_FUNCTION__))

269

"without BFC.")((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 269, __PRETTY_FUNCTION__));

270

AddDefaultCC(AddDefaultPred(

271

BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)

272

.addReg(Reg, RegState::Kill)

273

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));

274

AddDefaultCC(AddDefaultPred(

275

BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)

276

.addReg(Reg, RegState::Kill)

277

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));

278

}

279

} else {

280

// Since this is only reached for Thumb-2 targets, the BFC instruction

281

// should always be available.

282

assert(CanUseBFC)((CanUseBFC) ? static_cast<void> (0) : __assert_fail ("CanUseBFC"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 282, __PRETTY_FUNCTION__));

283

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)

284

.addReg(Reg, RegState::Kill)

285

.addImm(~AlignMask));

286

}

287

}

288

289

void ARMFrameLowering::emitPrologue(MachineFunction &MF,

290

MachineBasicBlock &MBB) const {

291

assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented")((&MBB == &MF.front() && "Shrink-wrapping not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("&MBB == &MF.front() && \"Shrink-wrapping not yet implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 291, __PRETTY_FUNCTION__));

292

MachineBasicBlock::iterator MBBI = MBB.begin();

293

MachineFrameInfo *MFI = MF.getFrameInfo();

294

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

295

MachineModuleInfo &MMI = MF.getMMI();

296

MCContext &Context = MMI.getContext();

297

const TargetMachine &TM = MF.getTarget();

298

const MCRegisterInfo *MRI = Context.getRegisterInfo();

299

const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();

300

const ARMBaseInstrInfo &TII = *STI.getInstrInfo();

301

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 302, __PRETTY_FUNCTION__))

302

"This emitPrologue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 302, __PRETTY_FUNCTION__));

303

bool isARM = !AFI->isThumbFunction();

304

unsigned Align = STI.getFrameLowering()->getStackAlignment();

305

unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();

306

unsigned NumBytes = MFI->getStackSize();

307

const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();

308

DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

309

unsigned FramePtr = RegInfo->getFrameRegister(MF);

310

311

// Determine the sizes of each callee-save spill areas and record which frame

312

// belongs to which callee-save spill areas.

313

unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;

314

int FramePtrSpillFI = 0;

315

int D8SpillFI = 0;

316

317

// All calls are tail calls in GHC calling conv, and functions have no

318

// prologue/epilogue.

319

if (MF.getFunction()->getCallingConv() == CallingConv::GHC)

320

return;

321

322

StackAdjustingInsts DefCFAOffsetCandidates;

323

bool HasFP = hasFP(MF);

324

325

// Allocate the vararg register save area.

326

if (ArgRegsSaveSize) {

327

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,

328

MachineInstr::FrameSetup);

329

DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);

330

}

331

332

if (!AFI->hasStackFrame() &&

333

(!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {

334

if (NumBytes - ArgRegsSaveSize != 0) {

335

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),

336

MachineInstr::FrameSetup);

337

DefCFAOffsetCandidates.addInst(std::prev(MBBI),

338

NumBytes - ArgRegsSaveSize, true);

339

}

340

DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);

341

return;

342

}

343

344

// Determine spill area sizes.

345

for (unsigned i = 0, e = CSI.size(); i != e; ++i) {

346

unsigned Reg = CSI[i].getReg();

347

int FI = CSI[i].getFrameIdx();

348

switch (Reg) {

349

case ARM::R8:

350

case ARM::R9:

351

case ARM::R10:

352

case ARM::R11:

353

case ARM::R12:

354

if (STI.isTargetDarwin()) {

355

GPRCS2Size += 4;

356

break;

357

}

358

// fallthrough

359

case ARM::R0:

360

case ARM::R1:

361

case ARM::R2:

362

case ARM::R3:

363

case ARM::R4:

364

case ARM::R5:

365

case ARM::R6:

366

case ARM::R7:

367

case ARM::LR:

368

if (Reg == FramePtr)

369

FramePtrSpillFI = FI;

370

GPRCS1Size += 4;

371

break;

372

default:

373

// This is a DPR. Exclude the aligned DPRCS2 spills.

374

if (Reg == ARM::D8)

375

D8SpillFI = FI;

376

if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())

377

DPRCSSize += 8;

378

}

379

}

380

381

// Move past area 1.

382

MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;

383

if (GPRCS1Size > 0) {

384

GPRCS1Push = LastPush = MBBI++;

385

DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);

386

}

387

388

// Determine starting offsets of spill areas.

389

unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;

390

unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;

391

unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;

392

unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;

393

unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;

394

int FramePtrOffsetInPush = 0;

395

if (HasFP) {

396

FramePtrOffsetInPush =

397

MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;

398

AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +

399

NumBytes);

400

}

401

AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);

402

AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);

403

AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);

404

405

// Move past area 2.

406

if (GPRCS2Size > 0) {

407

GPRCS2Push = LastPush = MBBI++;

408

DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);

409

}

410

411

// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our

412

// .cfi_offset operations will reflect that.

413

if (DPRGapSize) {

414

assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs")((DPRGapSize == 4 && "unexpected alignment requirements for DPRs"
) ? static_cast<void> (0) : __assert_fail ("DPRGapSize == 4 && \"unexpected alignment requirements for DPRs\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 414, __PRETTY_FUNCTION__));

415

if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))

416

DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);

417

else {

418

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,

419

MachineInstr::FrameSetup);

420

DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);

421

}

422

}

423

424

// Move past area 3.

425

if (DPRCSSize > 0) {

426

// Since vpush register list cannot have gaps, there may be multiple vpush

427

// instructions in the prologue.

428

while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {

429

DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));

430

LastPush = MBBI++;

431

}

432

}

433

434

// Move past the aligned DPRCS2 area.

435

if (AFI->getNumAlignedDPRCS2Regs() > 0) {

436

MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());

437

// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and

438

// leaves the stack pointer pointing to the DPRCS2 area.

439

440

// Adjust NumBytes to represent the stack slots below the DPRCS2 area.

441

NumBytes += MFI->getObjectOffset(D8SpillFI);

442

} else

443

NumBytes = DPRCSOffset;

444

445

if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {

446

uint32_t NumWords = NumBytes >> 2;

447

448

if (NumWords < 65536)

449

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)

450

.addImm(NumWords)

451

.setMIFlags(MachineInstr::FrameSetup));

452

else

453

BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)

454

.addImm(NumWords)

455

.setMIFlags(MachineInstr::FrameSetup);

456

457

switch (TM.getCodeModel()) {

458

case CodeModel::Small:

459

case CodeModel::Medium:

460

case CodeModel::Default:

461

case CodeModel::Kernel:

462

BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))

463

.addImm((unsigned)ARMCC::AL).addReg(0)

464

.addExternalSymbol("__chkstk")

465

.addReg(ARM::R4, RegState::Implicit)

466

.setMIFlags(MachineInstr::FrameSetup);

467

break;

468

case CodeModel::Large:

469

case CodeModel::JITDefault:

470

BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)

471

.addExternalSymbol("__chkstk")

472

.setMIFlags(MachineInstr::FrameSetup);

473

474

BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))

475

.addImm((unsigned)ARMCC::AL).addReg(0)

476

.addReg(ARM::R12, RegState::Kill)

477

.addReg(ARM::R4, RegState::Implicit)

478

.setMIFlags(MachineInstr::FrameSetup);

479

break;

480

}

481

482

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),

483

ARM::SP)

484

.addReg(ARM::SP, RegState::Define)

485

.addReg(ARM::R4, RegState::Kill)

486

.setMIFlags(MachineInstr::FrameSetup)));

487

NumBytes = 0;

488

}

489

490

if (NumBytes) {

491

// Adjust SP after all the callee-save spills.

492

if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))

493

DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);

494

else {

495

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,

496

MachineInstr::FrameSetup);

497

DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);

498

}

499

500

if (HasFP && isARM)

501

// Restore from fp only in ARM mode: e.g. sub sp, r7, #24

502

// Note it's not safe to do this in Thumb2 mode because it would have

503

// taken two instructions:

504

// mov sp, r7

505

// sub sp, #24

506

// If an interrupt is taken between the two instructions, then sp is in

507

// an inconsistent state (pointing to the middle of callee-saved area).

508

// The interrupt handler can end up clobbering the registers.

509

AFI->setShouldRestoreSPFromFP(true);

510

}

511

512

// Set FP to point to the stack slot that contains the previous FP.

513

// For iOS, FP is R7, which has now been stored in spill area 1.

514

// Otherwise, if this is not iOS, all the callee-saved registers go

515

// into spill area 1, including the FP in R11. In either case, it

516

// is in area one and the adjustment needs to take place just after

517

// that push.

518

if (HasFP) {

519

MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);

520

unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);

521

emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,

522

dl, TII, FramePtr, ARM::SP,

523

PushSize + FramePtrOffsetInPush,

524

MachineInstr::FrameSetup);

525

if (FramePtrOffsetInPush + PushSize != 0) {

526

unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(

527

nullptr, MRI->getDwarfRegNum(FramePtr, true),

528

-(ArgRegsSaveSize - FramePtrOffsetInPush)));

529

BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

530

.addCFIIndex(CFIIndex)

531

.setMIFlags(MachineInstr::FrameSetup);

532

} else {

533

unsigned CFIIndex =

534

MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(

535

nullptr, MRI->getDwarfRegNum(FramePtr, true)));

536

BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

537

.addCFIIndex(CFIIndex)

538

.setMIFlags(MachineInstr::FrameSetup);

539

}

540

}

541

542

// Now that the prologue's actual instructions are finalised, we can insert

543

// the necessary DWARF cf instructions to describe the situation. Start by

544

// recording where each register ended up:

545

if (GPRCS1Size > 0) {

546

MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);

547

int CFIIndex;

548

for (const auto &Entry : CSI) {

549

unsigned Reg = Entry.getReg();

550

int FI = Entry.getFrameIdx();

551

switch (Reg) {

552

case ARM::R8:

553

case ARM::R9:

554

case ARM::R10:

555

case ARM::R11:

556

case ARM::R12:

557

if (STI.isTargetDarwin())

558

break;

559

// fallthrough

560

case ARM::R0:

561

case ARM::R1:

562

case ARM::R2:

563

case ARM::R3:

564

case ARM::R4:

565

case ARM::R5:

566

case ARM::R6:

567

case ARM::R7:

568

case ARM::LR:

569

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

570

nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));

571

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

572

.addCFIIndex(CFIIndex)

573

.setMIFlags(MachineInstr::FrameSetup);

574

break;

575

}

576

}

577

}

578

579

if (GPRCS2Size > 0) {

580

MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);

581

for (const auto &Entry : CSI) {

582

unsigned Reg = Entry.getReg();

583

int FI = Entry.getFrameIdx();

584

switch (Reg) {

585

case ARM::R8:

586

case ARM::R9:

587

case ARM::R10:

588

case ARM::R11:

589

case ARM::R12:

590

if (STI.isTargetDarwin()) {

591

unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);

592

unsigned Offset = MFI->getObjectOffset(FI);

593

unsigned CFIIndex = MMI.addFrameInst(

594

MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));

595

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

596

.addCFIIndex(CFIIndex)

597

.setMIFlags(MachineInstr::FrameSetup);

598

}

599

break;

600

}

601

}

602

}

603

604

if (DPRCSSize > 0) {

605

// Since vpush register list cannot have gaps, there may be multiple vpush

606

// instructions in the prologue.

607

MachineBasicBlock::iterator Pos = std::next(LastPush);

608

for (const auto &Entry : CSI) {

609

unsigned Reg = Entry.getReg();

610

int FI = Entry.getFrameIdx();

611

if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&

612

(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {

613

unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);

614

unsigned Offset = MFI->getObjectOffset(FI);

615

unsigned CFIIndex = MMI.addFrameInst(

616

MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));

617

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

618

.addCFIIndex(CFIIndex)

619

.setMIFlags(MachineInstr::FrameSetup);

620

}

621

}

622

}

623

624

// Now we can emit descriptions of where the canonical frame address was

625

// throughout the process. If we have a frame pointer, it takes over the job

626

// half-way through, so only the first few .cfi_def_cfa_offset instructions

627

// actually get emitted.

628

DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);

629

630

if (STI.isTargetELF() && hasFP(MF))

631

MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -

632

AFI->getFramePtrSpillOffset());

633

634

AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);

635

AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);

636

AFI->setDPRCalleeSavedGapSize(DPRGapSize);

637

AFI->setDPRCalleeSavedAreaSize(DPRCSSize);

638

639

// If we need dynamic stack realignment, do it here. Be paranoid and make

640

// sure if we also have VLAs, we have a base pointer for frame access.

641

// If aligned NEON registers were spilled, the stack has already been

642

// realigned.

643

if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {

644

unsigned MaxAlign = MFI->getMaxAlignment();

645

assert(!AFI->isThumb1OnlyFunction())((!AFI->isThumb1OnlyFunction()) ? static_cast<void> (
0) : __assert_fail ("!AFI->isThumb1OnlyFunction()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 645, __PRETTY_FUNCTION__));

646

if (!AFI->isThumbFunction()) {

647

emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,

648

false);

649

} else {

650

// We cannot use sp as source/dest register here, thus we're using r4 to

651

// perform the calculations. We're emitting the following sequence:

652

// mov r4, sp

653

// -- use emitAligningInstructions to produce best sequence to zero

654

// -- out lower bits in r4

655

// mov sp, r4

656

// FIXME: It will be better just to find spare register here.

657

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)

658

.addReg(ARM::SP, RegState::Kill));

659

emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,

660

false);

661

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)

662

.addReg(ARM::R4, RegState::Kill));

663

}

664

665

AFI->setShouldRestoreSPFromFP(true);

666

}

667

668

// If we need a base pointer, set it up here. It's whatever the value

669

// of the stack pointer is at this point. Any variable size objects

670

// will be allocated after this, so we can still use the base pointer

671

// to reference locals.

672

// FIXME: Clarify FrameSetup flags here.

673

if (RegInfo->hasBasePointer(MF)) {

674

if (isARM)

675

BuildMI(MBB, MBBI, dl,

676

TII.get(ARM::MOVr), RegInfo->getBaseRegister())

677

.addReg(ARM::SP)

678

.addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);

679

else

680

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

681

RegInfo->getBaseRegister())

682

.addReg(ARM::SP));

683

}

684

685

// If the frame has variable sized objects then the epilogue must restore

686

// the sp from fp. We can assume there's an FP here since hasFP already

687

// checks for hasVarSizedObjects.

688

if (MFI->hasVarSizedObjects())

689

AFI->setShouldRestoreSPFromFP(true);

690

}

691

692

// Resolve TCReturn pseudo-instruction

693

void ARMFrameLowering::fixTCReturn(MachineFunction &MF,

694

MachineBasicBlock &MBB) const {

695

MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();

696

assert(MBBI->isReturn() && "Can only insert epilog into returning blocks")((MBBI->isReturn() && "Can only insert epilog into returning blocks"
) ? static_cast<void> (0) : __assert_fail ("MBBI->isReturn() && \"Can only insert epilog into returning blocks\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 696, __PRETTY_FUNCTION__));

697

unsigned RetOpcode = MBBI->getOpcode();

698

DebugLoc dl = MBBI->getDebugLoc();

699

const ARMBaseInstrInfo &TII =

700

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

701

702

if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))

703

return;

704

705

// Tail call return: adjust the stack pointer and jump to callee.

706

MBBI = MBB.getLastNonDebugInstr();

707

MachineOperand &JumpTarget = MBBI->getOperand(0);

708

709

// Jump to label or value in register.

710

if (RetOpcode == ARM::TCRETURNdi) {

711

unsigned TCOpcode = STI.isThumb() ?

712

(STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :

713

ARM::TAILJMPd;

714

MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));

715

if (JumpTarget.isGlobal())

716

MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),

717

JumpTarget.getTargetFlags());

718

else {

719

assert(JumpTarget.isSymbol())((JumpTarget.isSymbol()) ? static_cast<void> (0) : __assert_fail
("JumpTarget.isSymbol()", "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 719, __PRETTY_FUNCTION__));

720

MIB.addExternalSymbol(JumpTarget.getSymbolName(),

721

JumpTarget.getTargetFlags());

722

}

723

724

// Add the default predicate in Thumb mode.

725

if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);

726

} else if (RetOpcode == ARM::TCRETURNri) {

727

BuildMI(MBB, MBBI, dl,

728

TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).

729

addReg(JumpTarget.getReg(), RegState::Kill);

730

}

731

732

MachineInstr *NewMI = std::prev(MBBI);

733

for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)

734

NewMI->addOperand(MBBI->getOperand(i));

735

736

// Delete the pseudo instruction TCRETURN.

737

MBB.erase(MBBI);

738

MBBI = NewMI;

739

}

740

741

void ARMFrameLowering::emitEpilogue(MachineFunction &MF,

742

MachineBasicBlock &MBB) const {

743

MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();

744

745

DebugLoc dl = MBBI->getDebugLoc();

746

MachineFrameInfo *MFI = MF.getFrameInfo();

747

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

748

const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

749

const ARMBaseInstrInfo &TII =

750

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

751

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 752, __PRETTY_FUNCTION__))

752

"This emitEpilogue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 752, __PRETTY_FUNCTION__));

753

bool isARM = !AFI->isThumbFunction();

754

755

unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();

756

int NumBytes = (int)MFI->getStackSize();

757

unsigned FramePtr = RegInfo->getFrameRegister(MF);

758

759

// All calls are tail calls in GHC calling conv, and functions have no

760

// prologue/epilogue.

761

if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {

762

fixTCReturn(MF, MBB);

763

return;

764

}

765

766

if (!AFI->hasStackFrame()) {

767

if (NumBytes - ArgRegsSaveSize != 0)

768

emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);

769

} else {

770

// Unwind MBBI to point to first LDR / VLDRD.

771

const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

772

if (MBBI != MBB.begin()) {

773

do {

774

--MBBI;

775

} while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));

776

if (!isCSRestore(MBBI, TII, CSRegs))

777

++MBBI;

778

}

779

780

// Move SP to start of FP callee save spill area.

781

NumBytes -= (ArgRegsSaveSize +

782

AFI->getGPRCalleeSavedArea1Size() +

783

AFI->getGPRCalleeSavedArea2Size() +

784

AFI->getDPRCalleeSavedGapSize() +

785

AFI->getDPRCalleeSavedAreaSize());

786

787

// Reset SP based on frame pointer only if the stack frame extends beyond

788

// frame pointer stack slot or target is ELF and the function has FP.

789

if (AFI->shouldRestoreSPFromFP()) {

790

NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;

791

if (NumBytes) {

792

if (isARM)

793

emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,

794

ARMCC::AL, 0, TII);

795

else {

796

// It's not possible to restore SP from FP in a single instruction.

797

// For iOS, this looks like:

798

// mov sp, r7

799

// sub sp, #24

800

// This is bad, if an interrupt is taken after the mov, sp is in an

801

// inconsistent state.

802

// Use the first callee-saved register as a scratch register.

803

assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&((MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"
) ? static_cast<void> (0) : __assert_fail ("MF.getRegInfo().isPhysRegUsed(ARM::R4) && \"No scratch register to restore SP from FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 804, __PRETTY_FUNCTION__))

804

"No scratch register to restore SP from FP!")((MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"
) ? static_cast<void> (0) : __assert_fail ("MF.getRegInfo().isPhysRegUsed(ARM::R4) && \"No scratch register to restore SP from FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 804, __PRETTY_FUNCTION__));

805

emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,

806

ARMCC::AL, 0, TII);

807

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

808

ARM::SP)

809

.addReg(ARM::R4));

810

}

811

} else {

812

// Thumb2 or ARM.

813

if (isARM)

814

BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)

815

.addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);

816

else

817

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

818

ARM::SP)

819

.addReg(FramePtr));

820

}

821

} else if (NumBytes &&

822

!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))

823

emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);

824

825

// Increment past our save areas.

826

if (AFI->getDPRCalleeSavedAreaSize()) {

827

MBBI++;

828

// Since vpop register list cannot have gaps, there may be multiple vpop

829

// instructions in the epilogue.

830

while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)

831

MBBI++;

832

}

833

if (AFI->getDPRCalleeSavedGapSize()) {

834

assert(AFI->getDPRCalleeSavedGapSize() == 4 &&((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap"
) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 835, __PRETTY_FUNCTION__))

835

"unexpected DPR alignment gap")((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap"
) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 835, __PRETTY_FUNCTION__));

836

emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());

837

}

838

839

if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;

840

if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;

841

}

842

843

fixTCReturn(MF, MBB);

844

845

if (ArgRegsSaveSize)

846

emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);

847

}

848

849

/// getFrameIndexReference - Provide a base+offset reference to an FI slot for

850

/// debug info. It's the same as what we use for resolving the code-gen

851

/// references for now. FIXME: This can go wrong when references are

852

/// SP-relative and simple call frames aren't used.

853

int

854

ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,

855

unsigned &FrameReg) const {

856

return ResolveFrameIndexReference(MF, FI, FrameReg, 0);

857

}

858

859

int

860

ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,

861

int FI, unsigned &FrameReg,

862

int SPAdj) const {

863

const MachineFrameInfo *MFI = MF.getFrameInfo();

864

const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(

865

MF.getSubtarget().getRegisterInfo());

866

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

867

int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();

868

int FPOffset = Offset - AFI->getFramePtrSpillOffset();

869

bool isFixed = MFI->isFixedObjectIndex(FI);

870

871

FrameReg = ARM::SP;

872

Offset += SPAdj;

873

874

// SP can move around if there are allocas. We may also lose track of SP

875

// when emergency spilling inside a non-reserved call frame setup.

876

bool hasMovingSP = !hasReservedCallFrame(MF);

877

878

// When dynamically realigning the stack, use the frame pointer for

879

// parameters, and the stack/base pointer for locals.

880

if (RegInfo->needsStackRealignment(MF)) {

881

assert (hasFP(MF) && "dynamic stack realignment without a FP!")((hasFP(MF) && "dynamic stack realignment without a FP!"
) ? static_cast<void> (0) : __assert_fail ("hasFP(MF) && \"dynamic stack realignment without a FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 881, __PRETTY_FUNCTION__));

882

if (isFixed) {

883

FrameReg = RegInfo->getFrameRegister(MF);

884

Offset = FPOffset;

885

} else if (hasMovingSP) {

886

assert(RegInfo->hasBasePointer(MF) &&((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 887, __PRETTY_FUNCTION__))

887

"VLAs and dynamic stack alignment, but missing base pointer!")((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 887, __PRETTY_FUNCTION__));

888

FrameReg = RegInfo->getBaseRegister();

889

}

890

return Offset;

891

}

892

893

// If there is a frame pointer, use it when we can.

894

if (hasFP(MF) && AFI->hasStackFrame()) {

895

// Use frame pointer to reference fixed objects. Use it for locals if

896

// there are VLAs (and thus the SP isn't reliable as a base).

897

if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {

898

FrameReg = RegInfo->getFrameRegister(MF);

899

return FPOffset;

900

} else if (hasMovingSP) {

901

assert(RegInfo->hasBasePointer(MF) && "missing base pointer!")((RegInfo->hasBasePointer(MF) && "missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 901, __PRETTY_FUNCTION__));

902

if (AFI->isThumb2Function()) {

903

// Try to use the frame pointer if we can, else use the base pointer

904

// since it's available. This is handy for the emergency spill slot, in

905

// particular.

906

if (FPOffset >= -255 && FPOffset < 0) {

907

FrameReg = RegInfo->getFrameRegister(MF);

908

return FPOffset;

909

}

910

}

911

} else if (AFI->isThumb2Function()) {

912

// Use add <rd>, sp, #<imm8>

913

// ldr <rd>, [sp, #<imm8>]

914

// if at all possible to save space.

915

if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)

916

return Offset;

917

// In Thumb2 mode, the negative offset is very limited. Try to avoid

918

// out of range references. ldr <rt>,[<rn>, #-<imm8>]

919

if (FPOffset >= -255 && FPOffset < 0) {

920

FrameReg = RegInfo->getFrameRegister(MF);

921

return FPOffset;

922

}

923

} else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {

924

// Otherwise, use SP or FP, whichever is closer to the stack slot.

925

FrameReg = RegInfo->getFrameRegister(MF);

926

return FPOffset;

927

}

928

}

929

// Use the base pointer if we have one.

930

if (RegInfo->hasBasePointer(MF))

931

FrameReg = RegInfo->getBaseRegister();

932

return Offset;

933

}

934

935

int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,

936

int FI) const {

937

unsigned FrameReg;

938

return getFrameIndexReference(MF, FI, FrameReg);

939

}

940

941

void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,

942

MachineBasicBlock::iterator MI,

943

const std::vector<CalleeSavedInfo> &CSI,

944

unsigned StmOpc, unsigned StrOpc,

945

bool NoGap,

946

bool(*Func)(unsigned, bool),

947

unsigned NumAlignedDPRCS2Regs,

948

unsigned MIFlags) const {

949

MachineFunction &MF = *MBB.getParent();

950

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

951

952

DebugLoc DL;

953

if (MI != MBB.end()) DL = MI->getDebugLoc();

954

955

SmallVector<std::pair<unsigned,bool>, 4> Regs;

956

unsigned i = CSI.size();

957

while (i != 0) {

958

unsigned LastReg = 0;

959

for (; i != 0; --i) {

960

unsigned Reg = CSI[i-1].getReg();

961

if (!(Func)(Reg, STI.isTargetDarwin())) continue;

962

963

// D-registers in the aligned area DPRCS2 are NOT spilled here.

964

if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)

965

continue;

966

967

// Add the callee-saved register as live-in unless it's LR and

968

// @llvm.returnaddress is called. If LR is returned for

969

// @llvm.returnaddress then it's already added to the function and

970

// entry block live-in sets.

971

bool isKill = true;

972

if (Reg == ARM::LR) {

973

if (MF.getFrameInfo()->isReturnAddressTaken() &&

974

MF.getRegInfo().isLiveIn(Reg))

975

isKill = false;

976

}

977

978

if (isKill)

979

MBB.addLiveIn(Reg);

980

981

// If NoGap is true, push consecutive registers and then leave the rest

982

// for other instructions. e.g.

983

// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}

984

if (NoGap && LastReg && LastReg != Reg-1)

985

break;

986

LastReg = Reg;

987

Regs.push_back(std::make_pair(Reg, isKill));

988

}

989

990

if (Regs.empty())

991

continue;

992

if (Regs.size() > 1 || StrOpc== 0) {

993

MachineInstrBuilder MIB =

994

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)

995

.addReg(ARM::SP).setMIFlags(MIFlags));

996

for (unsigned i = 0, e = Regs.size(); i < e; ++i)

997

MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));

998

} else if (Regs.size() == 1) {

999

MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),

1000

ARM::SP)

1001

.addReg(Regs[0].first, getKillRegState(Regs[0].second))

1002

.addReg(ARM::SP).setMIFlags(MIFlags)

1003

.addImm(-4);

1004

AddDefaultPred(MIB);

1005

}

1006

Regs.clear();

1007

1008

// Put any subsequent vpush instructions before this one: they will refer to

1009

// higher register numbers so need to be pushed first in order to preserve

1010

// monotonicity.

1011

--MI;

1012

}

1013

}

1014

1015

void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,

1016

MachineBasicBlock::iterator MI,

1017

const std::vector<CalleeSavedInfo> &CSI,

1018

unsigned LdmOpc, unsigned LdrOpc,

1019

bool isVarArg, bool NoGap,

1020

bool(*Func)(unsigned, bool),

1021

unsigned NumAlignedDPRCS2Regs) const {

1022

MachineFunction &MF = *MBB.getParent();

1023

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

1024

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1025

DebugLoc DL = MI->getDebugLoc();

1026

unsigned RetOpcode = MI->getOpcode();

1027

bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||

1028

RetOpcode == ARM::TCRETURNri);

1029

bool isInterrupt =

1030

RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;

1031

1032

SmallVector<unsigned, 4> Regs;

1033

unsigned i = CSI.size();

1034

while (i != 0) {

1035

unsigned LastReg = 0;

1036

bool DeleteRet = false;

1037

for (; i != 0; --i) {

1038

unsigned Reg = CSI[i-1].getReg();

1039

if (!(Func)(Reg, STI.isTargetDarwin())) continue;

1040

1041

// The aligned reloads from area DPRCS2 are not inserted here.

1042

if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)

1043

continue;

1044

1045

if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&

1046

STI.hasV5TOps()) {

1047

Reg = ARM::PC;

1048

LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;

1049

// Fold the return instruction into the LDM.

1050

DeleteRet = true;

1051

}

1052

1053

// If NoGap is true, pop consecutive registers and then leave the rest

1054

// for other instructions. e.g.

1055

// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}

1056

if (NoGap && LastReg && LastReg != Reg-1)

1057

break;

1058

1059

LastReg = Reg;

1060

Regs.push_back(Reg);

1061

}

1062

1063

if (Regs.empty())

1064

continue;

1065

if (Regs.size() > 1 || LdrOpc == 0) {

1066

MachineInstrBuilder MIB =

1067

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)

1068

.addReg(ARM::SP));

1069

for (unsigned i = 0, e = Regs.size(); i < e; ++i)

1070

MIB.addReg(Regs[i], getDefRegState(true));

1071

if (DeleteRet) {

1072

MIB.copyImplicitOps(&*MI);

1073

MI->eraseFromParent();

1074

}

1075

MI = MIB;

1076

} else if (Regs.size() == 1) {

1077

// If we adjusted the reg to PC from LR above, switch it back here. We

1078

// only do that for LDM.

1079

if (Regs[0] == ARM::PC)

1080

Regs[0] = ARM::LR;

1081

MachineInstrBuilder MIB =

1082

BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])

1083

.addReg(ARM::SP, RegState::Define)

1084

.addReg(ARM::SP);

1085

// ARM mode needs an extra reg0 here due to addrmode2. Will go away once

1086

// that refactoring is complete (eventually).

1087

if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {

1088

MIB.addReg(0);

1089

MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));

1090

} else

1091

MIB.addImm(4);

1092

AddDefaultPred(MIB);

1093

}

1094

Regs.clear();

1095

1096

// Put any subsequent vpop instructions after this one: they will refer to

1097

// higher register numbers so need to be popped afterwards.

1098

++MI;

1099

}

1100

}

1101

1102

/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers

1103

/// starting from d8. Also insert stack realignment code and leave the stack

1104

/// pointer pointing to the d8 spill slot.

1105

static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,

1106

MachineBasicBlock::iterator MI,

1107

unsigned NumAlignedDPRCS2Regs,

1108

const std::vector<CalleeSavedInfo> &CSI,

1109

const TargetRegisterInfo *TRI) {

1110

MachineFunction &MF = *MBB.getParent();

1111

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1112

DebugLoc DL = MI->getDebugLoc();

1113

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

1114

MachineFrameInfo &MFI = *MF.getFrameInfo();

1115

1116

// Mark the D-register spill slots as properly aligned. Since MFI computes

1117

// stack slot layout backwards, this can actually mean that the d-reg stack

1118

// slot offsets can be wrong. The offset for d8 will always be correct.

1119

for (unsigned i = 0, e = CSI.size(); i != e; ++i) {

1120

unsigned DNum = CSI[i].getReg() - ARM::D8;

1121

if (DNum >= 8)

1122

continue;

1123

int FI = CSI[i].getFrameIdx();

1124

// The even-numbered registers will be 16-byte aligned, the odd-numbered

1125

// registers will be 8-byte aligned.

1126

MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);

1127

1128

// The stack slot for D8 needs to be maximally aligned because this is

1129

// actually the point where we align the stack pointer. MachineFrameInfo

1130

// computes all offsets relative to the incoming stack pointer which is a

1131

// bit weird when realigning the stack. Any extra padding for this

1132

// over-alignment is not realized because the code inserted below adjusts

1133

// the stack pointer by numregs * 8 before aligning the stack pointer.

1134

if (DNum == 0)

1135

MFI.setObjectAlignment(FI, MFI.getMaxAlignment());

1136

}

1137

1138

// Move the stack pointer to the d8 spill slot, and align it at the same

1139

// time. Leave the stack slot address in the scratch register r4.

1140

1141

// sub r4, sp, #numregs * 8

1142

// bic r4, r4, #align - 1

1143

// mov sp, r4

1144

1145

bool isThumb = AFI->isThumbFunction();

1146

assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1146, __PRETTY_FUNCTION__));

1147

AFI->setShouldRestoreSPFromFP(true);

1148

1149

// sub r4, sp, #numregs * 8

1150

// The immediate is <= 64, so it doesn't need any special encoding.

1151

unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;

1152

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)

1153

.addReg(ARM::SP)

1154

.addImm(8 * NumAlignedDPRCS2Regs)));

1155

1156

unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();

1157

// We must set parameter MustBeSingleInstruction to true, since

1158

// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform

1159

// stack alignment. Luckily, this can always be done since all ARM

1160

// architecture versions that support Neon also support the BFC

1161

// instruction.

1162

emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);

1163

1164

// mov sp, r4

1165

// The stack pointer must be adjusted before spilling anything, otherwise

1166

// the stack slots could be clobbered by an interrupt handler.

1167

// Leave r4 live, it is used below.

1168

Opc = isThumb ? ARM::tMOVr : ARM::MOVr;

1169

MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)

1170

.addReg(ARM::R4);

1171

MIB = AddDefaultPred(MIB);

1172

if (!isThumb)

1173

AddDefaultCC(MIB);

1174

1175

// Now spill NumAlignedDPRCS2Regs registers starting from d8.

1176

// r4 holds the stack slot address.

1177

unsigned NextReg = ARM::D8;

1178

1179

// 16-byte aligned vst1.64 with 4 d-regs and address writeback.

1180

// The writeback is only needed when emitting two vst1.64 instructions.

1181

if (NumAlignedDPRCS2Regs >= 6) {

1182

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1183

&ARM::QQPRRegClass);

1184

MBB.addLiveIn(SupReg);

1185

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),

1186

ARM::R4)

1187

.addReg(ARM::R4, RegState::Kill).addImm(16)

1188

.addReg(NextReg)

1189

.addReg(SupReg, RegState::ImplicitKill));

1190

NextReg += 4;

1191

NumAlignedDPRCS2Regs -= 4;

1192

}

1193

1194

// We won't modify r4 beyond this point. It currently points to the next

1195

// register to be spilled.

1196

unsigned R4BaseReg = NextReg;

1197

1198

// 16-byte aligned vst1.64 with 4 d-regs, no writeback.

1199

if (NumAlignedDPRCS2Regs >= 4) {

1200

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1201

&ARM::QQPRRegClass);

1202

MBB.addLiveIn(SupReg);

1203

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))

1204

.addReg(ARM::R4).addImm(16).addReg(NextReg)

1205

.addReg(SupReg, RegState::ImplicitKill));

1206

NextReg += 4;

1207

NumAlignedDPRCS2Regs -= 4;

1208

}

1209

1210

// 16-byte aligned vst1.64 with 2 d-regs.

1211

if (NumAlignedDPRCS2Regs >= 2) {

1212

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1213

&ARM::QPRRegClass);

1214

MBB.addLiveIn(SupReg);

1215

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))

1216

.addReg(ARM::R4).addImm(16).addReg(SupReg));

1217

NextReg += 2;

1218

NumAlignedDPRCS2Regs -= 2;

1219

}

1220

1221

// Finally, use a vanilla vstr.64 for the odd last register.

1222

if (NumAlignedDPRCS2Regs) {

1223

MBB.addLiveIn(NextReg);

1224

// vstr.64 uses addrmode5 which has an offset scale of 4.

1225

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))

1226

.addReg(NextReg)

1227

.addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));

1228

}

1229

1230

// The last spill instruction inserted should kill the scratch register r4.

1231

std::prev(MI)->addRegisterKilled(ARM::R4, TRI);

1232

}

1233

1234

/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an

1235

/// iterator to the following instruction.

1236

static MachineBasicBlock::iterator

1237

skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,

1238

unsigned NumAlignedDPRCS2Regs) {

1239

// sub r4, sp, #numregs * 8

1240

// bic r4, r4, #align - 1

1241

// mov sp, r4

1242

++MI; ++MI; ++MI;

1243

assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction")
? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1243, __PRETTY_FUNCTION__));

1244

1245

// These switches all fall through.

1246

switch(NumAlignedDPRCS2Regs) {

1247

case 7:

1248

++MI;

1249

1250

default:

1251

++MI;

1252

1253

case 1:

1254

case 2:

1255

case 4:

1256

assert(MI->killsRegister(ARM::R4) && "Missed kill flag")((MI->killsRegister(ARM::R4) && "Missed kill flag"
) ? static_cast<void> (0) : __assert_fail ("MI->killsRegister(ARM::R4) && \"Missed kill flag\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1256, __PRETTY_FUNCTION__));

1257

++MI;

1258

}

1259

return MI;

1260

}

1261

1262

/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers

1263

/// starting from d8. These instructions are assumed to execute while the

1264

/// stack is still aligned, unlike the code inserted by emitPopInst.

1265

static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,

1266

MachineBasicBlock::iterator MI,

1267

unsigned NumAlignedDPRCS2Regs,

1268

const std::vector<CalleeSavedInfo> &CSI,

1269

const TargetRegisterInfo *TRI) {

1270

MachineFunction &MF = *MBB.getParent();

1271

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1272

DebugLoc DL = MI->getDebugLoc();

1273

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

1274

1275

// Find the frame index assigned to d8.

1276

int D8SpillFI = 0;

1277

for (unsigned i = 0, e = CSI.size(); i != e; ++i)

1278

if (CSI[i].getReg() == ARM::D8) {

1279

D8SpillFI = CSI[i].getFrameIdx();

1280

break;

1281

}

1282

1283

// Materialize the address of the d8 spill slot into the scratch register r4.

1284

// This can be fairly complicated if the stack frame is large, so just use

1285

// the normal frame index elimination mechanism to do it. This code runs as

1286

// the initial part of the epilog where the stack and base pointers haven't

1287

// been changed yet.

1288

bool isThumb = AFI->isThumbFunction();

1289

1290

1291

unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;

1292

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)

1293

.addFrameIndex(D8SpillFI).addImm(0)));

1294

1295

// Now restore NumAlignedDPRCS2Regs registers starting from d8.

1296

unsigned NextReg = ARM::D8;

1297

1298

// 16-byte aligned vld1.64 with 4 d-regs and writeback.

1299

if (NumAlignedDPRCS2Regs >= 6) {

1300

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1301

&ARM::QQPRRegClass);

1302

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)

1303

.addReg(ARM::R4, RegState::Define)

1304

.addReg(ARM::R4, RegState::Kill).addImm(16)

1305

.addReg(SupReg, RegState::ImplicitDefine));

1306

NextReg += 4;

1307

NumAlignedDPRCS2Regs -= 4;

1308

}

1309

1310

// We won't modify r4 beyond this point. It currently points to the next

1311

// register to be spilled.

1312

unsigned R4BaseReg = NextReg;

1313

1314

// 16-byte aligned vld1.64 with 4 d-regs, no writeback.

1315

if (NumAlignedDPRCS2Regs >= 4) {

1316

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1317

&ARM::QQPRRegClass);

1318

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)

1319

.addReg(ARM::R4).addImm(16)

1320

.addReg(SupReg, RegState::ImplicitDefine));

1321

NextReg += 4;

1322

NumAlignedDPRCS2Regs -= 4;

1323

}

1324

1325

// 16-byte aligned vld1.64 with 2 d-regs.

1326

if (NumAlignedDPRCS2Regs >= 2) {

1327

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1328

&ARM::QPRRegClass);

1329

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)

1330

.addReg(ARM::R4).addImm(16));

1331

NextReg += 2;

1332

NumAlignedDPRCS2Regs -= 2;

1333

}

1334

1335

// Finally, use a vanilla vldr.64 for the remaining odd register.

1336

if (NumAlignedDPRCS2Regs)

1337

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)

1338

.addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));

1339

1340

// Last store kills r4.

1341

std::prev(MI)->addRegisterKilled(ARM::R4, TRI);

1342

}

1343

1344

bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,

1345

MachineBasicBlock::iterator MI,

1346

const std::vector<CalleeSavedInfo> &CSI,

1347

const TargetRegisterInfo *TRI) const {

1348

if (CSI.empty())

1349

return false;

1350

1351

MachineFunction &MF = *MBB.getParent();

1352

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1353

1354

unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;

1355

unsigned PushOneOpc = AFI->isThumbFunction() ?

1356

ARM::t2STR_PRE : ARM::STR_PRE_IMM;

1357

unsigned FltOpc = ARM::VSTMDDB_UPD;

1358

unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();

1359

emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,

1360

MachineInstr::FrameSetup);

1361

emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,

1362

MachineInstr::FrameSetup);

1363

emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,

1364

NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);

1365

1366

// The code above does not insert spill code for the aligned DPRCS2 registers.

1367

// The stack realignment code will be inserted between the push instructions

1368

// and these spills.

1369

if (NumAlignedDPRCS2Regs)

1370

emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);

1371

1372

return true;

1373

}

1374

1375

bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,

1376

MachineBasicBlock::iterator MI,

1377

const std::vector<CalleeSavedInfo> &CSI,

1378

const TargetRegisterInfo *TRI) const {

1379

if (CSI.empty())

1380

return false;

1381

1382

MachineFunction &MF = *MBB.getParent();

1383

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1384

bool isVarArg = AFI->getArgRegsSaveSize() > 0;

1385

unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();

1386

1387

// The emitPopInst calls below do not insert reloads for the aligned DPRCS2

1388

// registers. Do that here instead.

1389

if (NumAlignedDPRCS2Regs)

1390

emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);

1391

1392

unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;

1393

unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;

1394

unsigned FltOpc = ARM::VLDMDIA_UPD;

1395

emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,

1396

NumAlignedDPRCS2Regs);

1397

emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,

1398

&isARMArea2Register, 0);

1399

emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,

1400

&isARMArea1Register, 0);

1401

1402

return true;

1403

}

1404

1405

// FIXME: Make generic?

1406

static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,

1407

const ARMBaseInstrInfo &TII) {

1408

unsigned FnSize = 0;

1409

for (auto &MBB : MF) {

1410

for (auto &MI : MBB)

1411

FnSize += TII.GetInstSizeInBytes(&MI);

1412

}

1413

return FnSize;

1414

}

1415

1416

/// estimateRSStackSizeLimit - Look at each instruction that references stack

1417

/// frames and return the stack size limit beyond which some of these

1418

/// instructions will require a scratch register during their expansion later.

1419

// FIXME: Move to TII?

1420

static unsigned estimateRSStackSizeLimit(MachineFunction &MF,

1421

const TargetFrameLowering *TFI) {

1422

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1423

unsigned Limit = (1 << 12) - 1;

1424

for (auto &MBB : MF) {

1425

for (auto &MI : MBB) {

1426

for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

1427

if (!MI.getOperand(i).isFI())

1428

continue;

1429

1430

// When using ADDri to get the address of a stack object, 255 is the

1431

// largest offset guaranteed to fit in the immediate offset.

1432

if (MI.getOpcode() == ARM::ADDri) {

1433

Limit = std::min(Limit, (1U << 8) - 1);

1434

break;

1435

}

1436

1437

// Otherwise check the addressing mode.

1438

switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {

1439

case ARMII::AddrMode3:

1440

case ARMII::AddrModeT2_i8:

1441

Limit = std::min(Limit, (1U << 8) - 1);

1442

break;

1443

case ARMII::AddrMode5:

1444

case ARMII::AddrModeT2_i8s4:

1445

Limit = std::min(Limit, ((1U << 8) - 1) * 4);

1446

break;

1447

case ARMII::AddrModeT2_i12:

1448

// i12 supports only positive offset so these will be converted to

1449

// i8 opcodes. See llvm::rewriteT2FrameIndex.

1450

if (TFI->hasFP(MF) && AFI->hasStackFrame())

1451

Limit = std::min(Limit, (1U << 8) - 1);

1452

break;

1453

case ARMII::AddrMode4:

1454

case ARMII::AddrMode6:

1455

// Addressing modes 4 & 6 (load/store) instructions can't encode an

1456

// immediate offset for stack references.

1457

return 0;

1458

default:

1459

break;

1460

}

1461

break; // At most one FI per instruction

1462

}

1463

}

1464

}

1465

1466

return Limit;

1467

}

1468

1469

// In functions that realign the stack, it can be an advantage to spill the

1470

// callee-saved vector registers after realigning the stack. The vst1 and vld1

1471

// instructions take alignment hints that can improve performance.

1472

1473

static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {

1474

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);

1475

if (!SpillAlignedNEONRegs)

1476

return;

1477

1478

// Naked functions don't spill callee-saved registers.

1479

if (MF.getFunction()->hasFnAttribute(Attribute::Naked))

1480

return;

1481

1482

// We are planning to use NEON instructions vst1 / vld1.

1483

if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())

1484

return;

1485

1486

// Don't bother if the default stack alignment is sufficiently high.

1487

if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)

1488

return;

1489

1490

// Aligned spills require stack realignment.

1491

if (!static_cast<const ARMBaseRegisterInfo *>(

1492

MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))

1493

return;

1494

1495

// We always spill contiguous d-registers starting from d8. Count how many

1496

// needs spilling. The register allocator will almost always use the

1497

// callee-saved registers in order, but it can happen that there are holes in

1498

// the range. Registers above the hole will be spilled to the standard DPRCS

1499

// area.

1500

MachineRegisterInfo &MRI = MF.getRegInfo();

1501

unsigned NumSpills = 0;

1502

for (; NumSpills < 8; ++NumSpills)

1503

if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))

1504

break;

1505

1506

// Don't do this for just one d-register. It's not worth it.

1507

if (NumSpills < 2)

1508

return;

1509

1510

// Spill the first NumSpills D-registers after realigning the stack.

1511

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);

1512

1513

// A scratch register is required for the vst1 / vld1 instructions.

1514

MF.getRegInfo().setPhysRegUsed(ARM::R4);

1515

}

1516

1517

void

1518

ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

1519

RegScavenger *RS) const {

1520

// This tells PEI to spill the FP as if it is any other callee-save register

1521

// to take advantage the eliminateFrameIndex machinery. This also ensures it

1522

// is spilled in the order specified by getCalleeSavedRegs() to make it easier

1523

// to combine multiple loads / stores.

1524

bool CanEliminateFrame = true;

1525

bool CS1Spilled = false;

1526

bool LRSpilled = false;

1527

unsigned NumGPRSpills = 0;

1528

SmallVector<unsigned, 4> UnspilledCS1GPRs;

1529

SmallVector<unsigned, 4> UnspilledCS2GPRs;

1530

const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(

1531

MF.getSubtarget().getRegisterInfo());

1532

const ARMBaseInstrInfo &TII =

1533

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1534

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1535

MachineFrameInfo *MFI = MF.getFrameInfo();

1536

MachineRegisterInfo &MRI = MF.getRegInfo();

1537

unsigned FramePtr = RegInfo->getFrameRegister(MF);

1538

1539

// Spill R4 if Thumb2 function requires stack realignment - it will be used as

1540

// scratch register. Also spill R4 if Thumb2 function has varsized objects,

1541

// since it's not always possible to restore sp from fp in a single

1542

// instruction.

1543

// FIXME: It will be better just to find spare register here.

1544

if (AFI->isThumb2Function() &&

1545

(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))

1546

MRI.setPhysRegUsed(ARM::R4);

1547

1548

if (AFI->isThumb1OnlyFunction()) {

Taking false branch

→

1549

// Spill LR if Thumb1 function uses variable length argument lists.

1550

if (AFI->getArgRegsSaveSize() > 0)

1551

MRI.setPhysRegUsed(ARM::LR);

1552

1553

// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know

1554

// for sure what the stack size will be, but for this, an estimate is good

1555

// enough. If there anything changes it, it'll be a spill, which implies

1556

// we've used all the registers and so R4 is already used, so not marking

1557

// it here will be OK.

1558

// FIXME: It will be better just to find spare register here.

1559

unsigned StackSize = MFI->estimateStackSize(MF);

1560

if (MFI->hasVarSizedObjects() || StackSize > 508)

1561

MRI.setPhysRegUsed(ARM::R4);

1562

}

1563

1564

// See if we can spill vector registers to aligned stack.

1565

checkNumAlignedDPRCS2Regs(MF);

1566

1567

// Spill the BasePtr if it's used.

1568

if (RegInfo->hasBasePointer(MF))

←

Taking false branch

→

1569

MRI.setPhysRegUsed(RegInfo->getBaseRegister());

1570

1571

// Don't spill FP if the frame can be eliminated. This is determined

1572

// by scanning the callee-save registers to see if any is used.

1573

const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

1574

for (unsigned i = 0; CSRegs[i]; ++i) {

←

Loop condition is false. Execution continues on line 1630

→

1575

unsigned Reg = CSRegs[i];

1576

bool Spilled = false;

1577

if (MRI.isPhysRegUsed(Reg)) {

1578

Spilled = true;

1579

CanEliminateFrame = false;

1580

}

1581

1582

if (!ARM::GPRRegClass.contains(Reg))

1583

continue;

1584

1585

if (Spilled) {

1586

NumGPRSpills++;

1587

1588

if (!STI.isTargetDarwin()) {

1589

if (Reg == ARM::LR)

1590

LRSpilled = true;

1591

CS1Spilled = true;

1592

continue;

1593

}

1594

1595

// Keep track if LR and any of R4, R5, R6, and R7 is spilled.

1596

switch (Reg) {

1597

case ARM::LR:

1598

LRSpilled = true;

1599

// Fallthrough

1600

case ARM::R0: case ARM::R1:

1601

case ARM::R2: case ARM::R3:

1602

case ARM::R4: case ARM::R5:

1603

case ARM::R6: case ARM::R7:

1604

CS1Spilled = true;

1605

break;

1606

default:

1607

break;

1608

}

1609

} else {

1610

if (!STI.isTargetDarwin()) {

1611

UnspilledCS1GPRs.push_back(Reg);

1612

continue;

1613

}

1614

1615

switch (Reg) {

1616

case ARM::R0: case ARM::R1:

1617

case ARM::R2: case ARM::R3:

1618

case ARM::R4: case ARM::R5:

1619

case ARM::R6: case ARM::R7:

1620

case ARM::LR:

1621

UnspilledCS1GPRs.push_back(Reg);

1622

break;

1623

default:

1624

UnspilledCS2GPRs.push_back(Reg);

1625

break;

1626

}

1627

}

1628

}

1629

1630

bool ForceLRSpill = false;

1631

if (!LRSpilled && AFI->isThumb1OnlyFunction()) {

←

Taking false branch

→

1632

unsigned FnSize = GetFunctionSizeInBytes(MF, TII);

1633

// Force LR to be spilled if the Thumb function size is > 2048. This enables

1634

// use of BL to implement far jump. If it turns out that it's not needed

1635

// then the branch fix up path will undo it.

1636

if (FnSize >= (1 << 11)) {

1637

CanEliminateFrame = false;

1638

ForceLRSpill = true;

1639

}

1640

}

1641

1642

// If any of the stack slot references may be out of range of an immediate

1643

// offset, make sure a register (or a spill slot) is available for the

1644

// register scavenger. Note that if we're indexing off the frame pointer, the

1645

// effective stack size is 4 bytes larger since the FP points to the stack

1646

// slot of the previous FP. Also, if we have variable sized objects in the

1647

// function, stack slot references will often be negative, and some of

1648

// our instructions are positive-offset only, so conservatively consider

1649

// that case to want a spill slot (or register) as well. Similarly, if

1650

// the function adjusts the stack pointer during execution and the

1651

// adjustments aren't already part of our stack size estimate, our offset

1652

// calculations may be off, so be conservative.

1653

// FIXME: We could add logic to be more precise about negative offsets

1654

// and which instructions will need a scratch register for them. Is it

1655

// worth the effort and added fragility?

1656

bool BigStack =

1657

(RS &&

←

Assuming pointer value is null

→

1658

(MFI->estimateStackSize(MF) +

1659

((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=

1660

estimateRSStackSizeLimit(MF, this)))

1661

|| MFI->hasVarSizedObjects()

1662

|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));

1663

1664

bool ExtraCSSpill = false;

1665

if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {

1666

AFI->setHasStackFrame(true);

1667

1668

// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.

1669

// Spill LR as well so we can fold BX_RET to the registers restore (LDM).

1670

if (!LRSpilled && CS1Spilled) {

←

Taking false branch

→

1671

MRI.setPhysRegUsed(ARM::LR);

1672

NumGPRSpills++;

1673

SmallVectorImpl<unsigned>::iterator LRPos;

1674

LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

1675

(unsigned)ARM::LR);

1676

if (LRPos != UnspilledCS1GPRs.end())

1677

UnspilledCS1GPRs.erase(LRPos);

1678

1679

ForceLRSpill = false;

1680

ExtraCSSpill = true;

1681

}

1682

1683

if (hasFP(MF)) {

←

Taking false branch

→

1684

MRI.setPhysRegUsed(FramePtr);

1685

auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

1686

FramePtr);

1687

if (FPPos != UnspilledCS1GPRs.end())

1688

UnspilledCS1GPRs.erase(FPPos);

1689

NumGPRSpills++;

1690

}

1691

1692

// If stack and double are 8-byte aligned and we are spilling an odd number

1693

// of GPRs, spill one extra callee save GPR so we won't have to pad between

1694

// the integer and double callee save areas.

1695

unsigned TargetAlign = getStackAlignment();

1696

if (TargetAlign >= 8 && (NumGPRSpills & 1)) {

←

Assuming 'TargetAlign' is < 8

→

1697

if (CS1Spilled && !UnspilledCS1GPRs.empty()) {

1698

for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {

1699

unsigned Reg = UnspilledCS1GPRs[i];

1700

// Don't spill high register if the function is thumb

1701

if (!AFI->isThumbFunction() ||

1702

isARMLowRegister(Reg) || Reg == ARM::LR) {

1703

MRI.setPhysRegUsed(Reg);

1704

if (!MRI.isReserved(Reg))

1705

ExtraCSSpill = true;

1706

break;

1707

}

1708

}

1709

} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {

1710

unsigned Reg = UnspilledCS2GPRs.front();

1711

MRI.setPhysRegUsed(Reg);

1712

if (!MRI.isReserved(Reg))

1713

ExtraCSSpill = true;

1714

}

1715

}

1716

1717

// Estimate if we might need to scavenge a register at some point in order

1718

// to materialize a stack offset. If so, either spill one additional

1719

// callee-saved register or reserve a special spill slot to facilitate

1720

// register scavenging. Thumb1 needs a spill slot for stack pointer

1721

// adjustments also, even when the frame itself is small.

1722

if (BigStack && !ExtraCSSpill) {

←

Taking true branch

→

1723

// If any non-reserved CS register isn't spilled, just spill one or two

1724

// extra. That should take care of it!

1725

unsigned NumExtras = TargetAlign / 4;

1726

SmallVector<unsigned, 2> Extras;

1727

while (NumExtras && !UnspilledCS1GPRs.empty()) {

1728

unsigned Reg = UnspilledCS1GPRs.back();

1729

UnspilledCS1GPRs.pop_back();

1730

if (!MRI.isReserved(Reg) &&

1731

(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||

1732

Reg == ARM::LR)) {

1733

Extras.push_back(Reg);

1734

NumExtras--;

1735

}

1736

}

1737

// For non-Thumb1 functions, also check for hi-reg CS registers

1738

if (!AFI->isThumb1OnlyFunction()) {

←

Taking true branch

→

1739

while (NumExtras && !UnspilledCS2GPRs.empty()) {

1740

unsigned Reg = UnspilledCS2GPRs.back();

1741

UnspilledCS2GPRs.pop_back();

1742

if (!MRI.isReserved(Reg)) {

1743

Extras.push_back(Reg);

1744

NumExtras--;

1745

}

1746

}

1747

}

1748

if (Extras.size() && NumExtras == 0) {

1749

for (unsigned i = 0, e = Extras.size(); i != e; ++i) {

1750

MRI.setPhysRegUsed(Extras[i]);

1751

}

1752

} else if (!AFI->isThumb1OnlyFunction()) {

←

Taking true branch

→

1753

// note: Thumb1 functions spill to R12, not the stack. Reserve a slot

1754

// closest to SP or frame pointer.

1755

const TargetRegisterClass *RC = &ARM::GPRRegClass;

1756

RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),

←

Called C++ object pointer is null

1757

RC->getAlignment(),

1758

false));

1759

}

1760

}

1761

}

1762

1763

if (ForceLRSpill) {

1764

MRI.setPhysRegUsed(ARM::LR);

1765

AFI->setLRIsSpilledForFarJump(true);

1766

}

1767

}

1768

1769

1770

void ARMFrameLowering::

1771

eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,

1772

MachineBasicBlock::iterator I) const {

1773

const ARMBaseInstrInfo &TII =

1774

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1775

if (!hasReservedCallFrame(MF)) {

1776

// If we have alloca, convert as follows:

1777

// ADJCALLSTACKDOWN -> sub, sp, sp, amount

1778

// ADJCALLSTACKUP -> add, sp, sp, amount

1779

MachineInstr *Old = I;

1780

DebugLoc dl = Old->getDebugLoc();

1781

unsigned Amount = Old->getOperand(0).getImm();

1782

if (Amount != 0) {

1783

// We need to keep the stack aligned properly. To do this, we round the

1784

// amount of space needed for the outgoing arguments up to the next

1785

// alignment boundary.

1786

unsigned Align = getStackAlignment();

1787

Amount = (Amount+Align-1)/Align*Align;

1788

1789

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1790

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1791, __PRETTY_FUNCTION__))

1791

"This eliminateCallFramePseudoInstr does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1791, __PRETTY_FUNCTION__));

1792

bool isARM = !AFI->isThumbFunction();

1793

1794

// Replace the pseudo instruction with a new instruction...

1795

unsigned Opc = Old->getOpcode();

1796

int PIdx = Old->findFirstPredOperandIdx();

1797

ARMCC::CondCodes Pred = (PIdx == -1)

1798

? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();

1799

if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {

1800

// Note: PredReg is operand 2 for ADJCALLSTACKDOWN.

1801

unsigned PredReg = Old->getOperand(2).getReg();

1802

emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,

1803

Pred, PredReg);

1804

} else {

1805

// Note: PredReg is operand 3 for ADJCALLSTACKUP.

1806

unsigned PredReg = Old->getOperand(3).getReg();

1807

assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP)((Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP) ?
static_cast<void> (0) : __assert_fail ("Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1807, __PRETTY_FUNCTION__));

1808

emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,

1809

Pred, PredReg);

1810

}

1811

}

1812

}

1813

MBB.erase(I);

1814

}

1815

1816

/// Get the minimum constant for ARM that is greater than or equal to the

1817

/// argument. In ARM, constants can have any value that can be produced by

1818

/// rotating an 8-bit value to the right by an even number of bits within a

1819

/// 32-bit word.

1820

static uint32_t alignToARMConstant(uint32_t Value) {

1821

unsigned Shifted = 0;

1822

1823

if (Value == 0)

1824

return 0;

1825

1826

while (!(Value & 0xC0000000)) {

1827

Value = Value << 2;

1828

Shifted += 2;

1829

}

1830

1831

bool Carry = (Value & 0x00FFFFFF);

1832

Value = ((Value & 0xFF000000) >> 24) + Carry;

1833

1834

if (Value & 0x0000100)

1835

Value = Value & 0x000001FC;

1836

1837

if (Shifted > 24)

1838

Value = Value >> (Shifted - 24);

1839

else

1840

Value = Value << (24 - Shifted);

1841

1842

return Value;

1843

}

1844

1845

// The stack limit in the TCB is set to this many bytes above the actual

1846

// stack limit.

1847

static const uint64_t kSplitStackAvailable = 256;

1848

1849

// Adjust the function prologue to enable split stacks. This currently only

1850

// supports android and linux.

1851

1852

// The ABI of the segmented stack prologue is a little arbitrarily chosen, but

1853

// must be well defined in order to allow for consistent implementations of the

1854

// __morestack helper function. The ABI is also not a normal ABI in that it

1855

// doesn't follow the normal calling conventions because this allows the

1856

// prologue of each function to be optimized further.

1857

1858

// Currently, the ABI looks like (when calling __morestack)

1859

1860

// * r4 holds the minimum stack size requested for this function call

1861

// * r5 holds the stack size of the arguments to the function

1862

// * the beginning of the function is 3 instructions after the call to

1863

// __morestack

1864

1865

// Implementations of __morestack should use r4 to allocate a new stack, r5 to

1866

// place the arguments on to the new stack, and the 3-instruction knowledge to

1867

// jump directly to the body of the function when working on the new stack.

1868

1869

// An old (and possibly no longer compatible) implementation of __morestack for

1870

// ARM can be found at [1].

1871

1872

// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S

1873

void ARMFrameLowering::adjustForSegmentedStacks(

1874

MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {

1875

unsigned Opcode;

1876

unsigned CFIIndex;

1877

const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();

1878

bool Thumb = ST->isThumb();

1879

1880

// Sadly, this currently doesn't support varargs, platforms other than

1881

// android/linux. Note that thumb1/thumb2 are support for android/linux.

1882

if (MF.getFunction()->isVarArg())

1883

report_fatal_error("Segmented stacks do not support vararg functions.");

1884

if (!ST->isTargetAndroid() && !ST->isTargetLinux())

1885

report_fatal_error("Segmented stacks not supported on this platform.");

1886

1887

assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented")((&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("&PrologueMBB == &MF.front() && \"Shrink-wrapping not yet implemented\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 1887, __PRETTY_FUNCTION__));

1888

MachineFrameInfo *MFI = MF.getFrameInfo();

1889

MachineModuleInfo &MMI = MF.getMMI();

1890

MCContext &Context = MMI.getContext();

1891

const MCRegisterInfo *MRI = Context.getRegisterInfo();

1892

const ARMBaseInstrInfo &TII =

1893

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1894

ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();

1895

DebugLoc DL;

1896

1897

uint64_t StackSize = MFI->getStackSize();

1898

1899

// Do not generate a prologue for functions with a stack of size zero

1900

if (StackSize == 0)

1901

return;

1902

1903

// Use R4 and R5 as scratch registers.

1904

// We save R4 and R5 before use and restore them before leaving the function.

1905

unsigned ScratchReg0 = ARM::R4;

1906

unsigned ScratchReg1 = ARM::R5;

1907

uint64_t AlignedStackSize;

1908

1909

MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();

1910

MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();

1911

MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();

1912

MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();

1913

MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();

1914

1915

for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),

1916

e = PrologueMBB.livein_end();

1917

i != e; ++i) {

1918

AllocMBB->addLiveIn(*i);

1919

GetMBB->addLiveIn(*i);

1920

McrMBB->addLiveIn(*i);

1921

PrevStackMBB->addLiveIn(*i);

1922

PostStackMBB->addLiveIn(*i);

1923

}

1924

1925

MF.push_front(PostStackMBB);

1926

MF.push_front(AllocMBB);

1927

MF.push_front(GetMBB);

1928

MF.push_front(McrMBB);

1929

MF.push_front(PrevStackMBB);

1930

1931

// The required stack size that is aligned to ARM constant criterion.

1932

AlignedStackSize = alignToARMConstant(StackSize);

1933

1934

// When the frame size is less than 256 we just compare the stack

1935

// boundary directly to the value of the stack pointer, per gcc.

1936

bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;

1937

1938

// We will use two of the callee save registers as scratch registers so we

1939

// need to save those registers onto the stack.

1940

// We will use SR0 to hold stack limit and SR1 to hold the stack size

1941

// requested and arguments for __morestack().

1942

// SR0: Scratch Register #0

1943

// SR1: Scratch Register #1

1944

// push {SR0, SR1}

1945

if (Thumb) {

1946

AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))

1947

.addReg(ScratchReg0).addReg(ScratchReg1);

1948

} else {

1949

AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))

1950

.addReg(ARM::SP, RegState::Define).addReg(ARM::SP))

1951

.addReg(ScratchReg0).addReg(ScratchReg1);

1952

}

1953

1954

// Emit the relevant DWARF information about the change in stack pointer as

1955

// well as where to find both r4 and r5 (the callee-save registers)

1956

CFIIndex =

1957

MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));

1958

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1959

.addCFIIndex(CFIIndex);

1960

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

1961

nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));

1962

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1963

.addCFIIndex(CFIIndex);

1964

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

1965

nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));

1966

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1967

.addCFIIndex(CFIIndex);

1968

1969

// mov SR1, sp

1970

if (Thumb) {

1971

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)

1972

.addReg(ARM::SP));

1973

} else if (CompareStackPointer) {

1974

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)

1975

.addReg(ARM::SP)).addReg(0);

1976

}

1977

1978

// sub SR1, sp, #StackSize

1979

if (!CompareStackPointer && Thumb) {

1980

AddDefaultPred(

1981

AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))

1982

.addReg(ScratchReg1).addImm(AlignedStackSize));

1983

} else if (!CompareStackPointer) {

1984

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)

1985

.addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);

1986

}

1987

1988

if (Thumb && ST->isThumb1Only()) {

1989

unsigned PCLabelId = ARMFI->createPICLabelUId();

1990

ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(

1991

MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);

1992

MachineConstantPool *MCP = MF.getConstantPool();

1993

unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());

1994

1995

// ldr SR0, [pc, offset(STACK_LIMIT)]

1996

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)

1997

.addConstantPoolIndex(CPI));

1998

1999

// ldr SR0, [SR0]

2000

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)

2001

.addReg(ScratchReg0).addImm(0));

2002

} else {

2003

// Get TLS base address from the coprocessor

2004

// mrc p15, #0, SR0, c13, c0, #3

2005

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)

2006

.addImm(15)

2007

.addImm(0)

2008

.addImm(13)

2009

.addImm(0)

2010

.addImm(3));

2011

2012

// Use the last tls slot on android and a private field of the TCP on linux.

2013

assert(ST->isTargetAndroid() || ST->isTargetLinux())((ST->isTargetAndroid() || ST->isTargetLinux()) ? static_cast
<void> (0) : __assert_fail ("ST->isTargetAndroid() || ST->isTargetLinux()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.7~svn240924/lib/Target/ARM/ARMFrameLowering.cpp"
, 2013, __PRETTY_FUNCTION__));

2014

unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;

2015

2016

// Get the stack limit from the right offset

2017

// ldr SR0, [sr0, #4 * TlsOffset]

2018

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)

2019

.addReg(ScratchReg0).addImm(4 * TlsOffset));

2020

}

2021

2022

// Compare stack limit with stack size requested.

2023

// cmp SR0, SR1

2024

Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;

2025

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))

2026

.addReg(ScratchReg0)

2027

.addReg(ScratchReg1));

2028

2029

// This jump is taken if StackLimit < SP - stack required.

2030

Opcode = Thumb ? ARM::tBcc : ARM::Bcc;

2031

BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)

2032

.addImm(ARMCC::LO)

2033

.addReg(ARM::CPSR);

2034

2035

2036

// Calling __morestack(StackSize, Size of stack arguments).

2037

// __morestack knows that the stack size requested is in SR0(r4)

2038

// and amount size of stack arguments is in SR1(r5).

2039

2040

// Pass first argument for the __morestack by Scratch Register #0.

2041

// The amount size of stack required

2042

if (Thumb) {

2043

AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),

2044

ScratchReg0)).addImm(AlignedStackSize));

2045

} else {

2046

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)

2047

.addImm(AlignedStackSize)).addReg(0);

2048

}

2049

// Pass second argument for the __morestack by Scratch Register #1.

2050

// The amount size of stack consumed to save function arguments.

2051

if (Thumb) {

2052

AddDefaultPred(

2053

AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))

2054

.addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));

2055

} else {

2056

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)

2057

.addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))

2058

.addReg(0);

2059

}

2060

2061

// push {lr} - Save return address of this function.

2062

if (Thumb) {

2063

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))

2064

.addReg(ARM::LR);

2065

} else {

2066

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))

2067

.addReg(ARM::SP, RegState::Define)

2068

.addReg(ARM::SP))

2069

.addReg(ARM::LR);

2070

}

2071

2072

// Emit the DWARF info about the change in stack as well as where to find the

2073

// previous link register

2074

CFIIndex =

2075

MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));

2076

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2077

.addCFIIndex(CFIIndex);

2078

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

2079

nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));

2080

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2081

.addCFIIndex(CFIIndex);

2082

2083

// Call __morestack().

2084

if (Thumb) {

2085

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))

2086

.addExternalSymbol("__morestack");

2087

} else {

2088

BuildMI(AllocMBB, DL, TII.get(ARM::BL))

2089

.addExternalSymbol("__morestack");

2090

}

2091

2092

// pop {lr} - Restore return address of this original function.

2093

if (Thumb) {

2094

if (ST->isThumb1Only()) {

2095

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))

2096

.addReg(ScratchReg0);

2097

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)

2098

.addReg(ScratchReg0));

2099

} else {

2100

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))

2101

.addReg(ARM::LR, RegState::Define)

2102

.addReg(ARM::SP, RegState::Define)

2103

.addReg(ARM::SP)

2104

.addImm(4));

2105

}

2106

} else {

2107

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))

2108

.addReg(ARM::SP, RegState::Define)

2109

.addReg(ARM::SP))

2110

.addReg(ARM::LR);

2111

}

2112

2113

// Restore SR0 and SR1 in case of __morestack() was called.

2114

// __morestack() will skip PostStackMBB block so we need to restore

2115

// scratch registers from here.

2116

// pop {SR0, SR1}

2117

if (Thumb) {

2118

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))

2119

.addReg(ScratchReg0)

2120

.addReg(ScratchReg1);

2121

} else {

2122

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))

2123

.addReg(ARM::SP, RegState::Define)

2124

.addReg(ARM::SP))

2125

.addReg(ScratchReg0)

2126

.addReg(ScratchReg1);

2127

}

2128

2129

// Update the CFA offset now that we've popped

2130

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));

2131

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2132

.addCFIIndex(CFIIndex);

2133

2134

// bx lr - Return from this function.

2135

Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;

2136

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));

2137

2138

// Restore SR0 and SR1 in case of __morestack() was not called.

2139

// pop {SR0, SR1}

2140

if (Thumb) {

2141

AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))

2142

.addReg(ScratchReg0)

2143

.addReg(ScratchReg1);

2144

} else {

2145

AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))

2146

.addReg(ARM::SP, RegState::Define)

2147

.addReg(ARM::SP))

2148

.addReg(ScratchReg0)

2149

.addReg(ScratchReg1);

2150

}

2151

2152

// Update the CFA offset now that we've popped

2153

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));

2154

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2155

.addCFIIndex(CFIIndex);

2156

2157

// Tell debuggers that r4 and r5 are now the same as they were in the

2158

// previous function, that they're the "Same Value".

2159

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(

2160

nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));

2161

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2162

.addCFIIndex(CFIIndex);

2163

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(

2164

nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));

2165

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2166

.addCFIIndex(CFIIndex);

2167

2168

// Organizing MBB lists

2169

PostStackMBB->addSuccessor(&PrologueMBB);

2170

2171

AllocMBB->addSuccessor(PostStackMBB);

2172

2173

GetMBB->addSuccessor(PostStackMBB);

2174

GetMBB->addSuccessor(AllocMBB);

2175

2176

McrMBB->addSuccessor(GetMBB);

2177

2178

PrevStackMBB->addSuccessor(McrMBB);

2179

2180

#ifdef XDEBUG

2181

MF.verify();

2182

#endif

2183

}