/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp

Bug Summary

File:	lib/Target/ARM/ARMFrameLowering.cpp
Location:	line 1710, column 9
Description:	Called C++ object pointer is null

Annotated Source Code

//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file contains the ARM implementation of TargetFrameLowering class.

//===----------------------------------------------------------------------===//

#include "ARMFrameLowering.h"

#include "ARMBaseInstrInfo.h"

#include "ARMBaseRegisterInfo.h"

#include "ARMConstantPoolValue.h"

#include "ARMMachineFunctionInfo.h"

#include "MCTargetDesc/ARMAddressingModes.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RegisterScavenging.h"

#include "llvm/MC/MCAsmInfo.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/Function.h"

#include "llvm/MC/MCContext.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Target/TargetOptions.h"

using namespace llvm;

static cl::opt<bool>

SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),

cl::desc("Align ARM NEON spills in prolog and epilog"));

static MachineBasicBlock::iterator

skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,

unsigned NumAlignedDPRCS2Regs);

ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)

: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),

STI(sti) {}

bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {

// iOS always has a FP for backtracking, force other targets to keep their FP

// when doing FastISel. The emitted code is currently superior, and in cases

// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.

return TargetFrameLowering::noFramePointerElim(MF) ||

MF.getSubtarget<ARMSubtarget>().useFastISel();

}

/// hasFP - Return true if the specified function should have a dedicated frame

/// pointer register. This is true if the function has variable sized allocas

/// or if frame pointer elimination is disabled.

bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {

const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

// iOS requires FP not to be clobbered for backtracing purpose.

if (STI.isTargetIOS() || STI.isTargetWatchOS())

return true;

const MachineFrameInfo *MFI = MF.getFrameInfo();

// Always eliminate non-leaf frame pointers.

return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&

MFI->hasCalls()) ||

RegInfo->needsStackRealignment(MF) ||

MFI->hasVarSizedObjects() ||

MFI->isFrameAddressTaken());

}

/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is

/// not required, we reserve argument space for call sites in the function

/// immediately on entry to the current function. This eliminates the need for

/// add/sub sp brackets around call sites. Returns true if the call frame is

/// included as part of the stack frame.

bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {

const MachineFrameInfo *FFI = MF.getFrameInfo();

unsigned CFSize = FFI->getMaxCallFrameSize();

// It's not always a good idea to include the call frame as part of the

// stack frame. ARM (especially Thumb) has small immediate offset to

// address the stack frame. So a large call frame can cause poor codegen

// and may even makes it impossible to scavenge a register.

if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12

return false;

return !MF.getFrameInfo()->hasVarSizedObjects();

}

/// canSimplifyCallFramePseudos - If there is a reserved call frame, the

/// call frame pseudos can be simplified. Unlike most targets, having a FP

/// is not sufficient here since we still may reference some objects via SP

/// even when FP is available in Thumb2 mode.

bool

ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {

return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();

}

100

101

static bool isCSRestore(MachineInstr *MI,

102

const ARMBaseInstrInfo &TII,

103

const MCPhysReg *CSRegs) {

104

// Integer spill area is handled with "pop".

105

if (isPopOpcode(MI->getOpcode())) {

106

// The first two operands are predicates. The last two are

107

// imp-def and imp-use of SP. Check everything in between.

108

for (int i = 5, e = MI->getNumOperands(); i != e; ++i)

109

if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))

110

return false;

111

return true;

112

}

113

if ((MI->getOpcode() == ARM::LDR_POST_IMM ||

114

MI->getOpcode() == ARM::LDR_POST_REG ||

115

MI->getOpcode() == ARM::t2LDR_POST) &&

116

isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&

117

MI->getOperand(1).getReg() == ARM::SP)

118

return true;

119

120

return false;

121

}

122

123

static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,

124

MachineBasicBlock::iterator &MBBI, DebugLoc dl,

125

const ARMBaseInstrInfo &TII, unsigned DestReg,

126

unsigned SrcReg, int NumBytes,

127

unsigned MIFlags = MachineInstr::NoFlags,

128

ARMCC::CondCodes Pred = ARMCC::AL,

129

unsigned PredReg = 0) {

130

if (isARM)

131

emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,

132

Pred, PredReg, TII, MIFlags);

133

else

134

emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,

135

Pred, PredReg, TII, MIFlags);

136

}

137

138

static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,

139

MachineBasicBlock::iterator &MBBI, DebugLoc dl,

140

const ARMBaseInstrInfo &TII, int NumBytes,

141

unsigned MIFlags = MachineInstr::NoFlags,

142

ARMCC::CondCodes Pred = ARMCC::AL,

143

unsigned PredReg = 0) {

144

emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,

145

MIFlags, Pred, PredReg);

146

}

147

148

static int sizeOfSPAdjustment(const MachineInstr *MI) {

149

int RegSize;

150

switch (MI->getOpcode()) {

151

case ARM::VSTMDDB_UPD:

152

RegSize = 8;

153

break;

154

case ARM::STMDB_UPD:

155

case ARM::t2STMDB_UPD:

156

RegSize = 4;

157

break;

158

case ARM::t2STR_PRE:

159

case ARM::STR_PRE_IMM:

160

return 4;

161

default:

162

llvm_unreachable("Unknown push or pop like instruction")::llvm::llvm_unreachable_internal("Unknown push or pop like instruction"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 162);

163

}

164

165

int count = 0;

166

// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+

167

// pred) so the list starts at 4.

168

for (int i = MI->getNumOperands() - 1; i >= 4; --i)

169

count += RegSize;

170

return count;

171

}

172

173

static bool WindowsRequiresStackProbe(const MachineFunction &MF,

174

size_t StackSizeInBytes) {

175

const MachineFrameInfo *MFI = MF.getFrameInfo();

176

const Function *F = MF.getFunction();

177

unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;

178

if (F->hasFnAttribute("stack-probe-size"))

179

F->getFnAttribute("stack-probe-size")

180

.getValueAsString()

181

.getAsInteger(0, StackProbeSize);

182

return StackSizeInBytes >= StackProbeSize;

183

}

184

185

namespace {

186

struct StackAdjustingInsts {

187

struct InstInfo {

188

MachineBasicBlock::iterator I;

189

unsigned SPAdjust;

190

bool BeforeFPSet;

191

};

192

193

SmallVector<InstInfo, 4> Insts;

194

195

void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,

196

bool BeforeFPSet = false) {

197

InstInfo Info = {I, SPAdjust, BeforeFPSet};

198

Insts.push_back(Info);

199

}

200

201

void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {

202

auto Info = std::find_if(Insts.begin(), Insts.end(),

203

[&](InstInfo &Info) { return Info.I == I; });

204

assert(Info != Insts.end() && "invalid sp adjusting instruction")((Info != Insts.end() && "invalid sp adjusting instruction"
) ? static_cast<void> (0) : __assert_fail ("Info != Insts.end() && \"invalid sp adjusting instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 204, __PRETTY_FUNCTION__));

205

Info->SPAdjust += ExtraBytes;

206

}

207

208

void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,

209

DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {

210

unsigned CFAOffset = 0;

211

for (auto &Info : Insts) {

212

if (HasFP && !Info.BeforeFPSet)

213

return;

214

215

CFAOffset -= Info.SPAdjust;

216

unsigned CFIIndex = MMI.addFrameInst(

217

MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));

218

BuildMI(MBB, std::next(Info.I), dl,

219

TII.get(TargetOpcode::CFI_INSTRUCTION))

220

.addCFIIndex(CFIIndex)

221

.setMIFlags(MachineInstr::FrameSetup);

222

}

223

}

224

};

225

}

226

227

/// Emit an instruction sequence that will align the address in

228

/// register Reg by zero-ing out the lower bits. For versions of the

229

/// architecture that support Neon, this must be done in a single

230

/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a

231

/// single instruction. That function only gets called when optimizing

232

/// spilling of D registers on a core with the Neon instruction set

233

/// present.

234

static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,

235

const TargetInstrInfo &TII,

236

MachineBasicBlock &MBB,

237

MachineBasicBlock::iterator MBBI,

238

DebugLoc DL, const unsigned Reg,

239

const unsigned Alignment,

240

const bool MustBeSingleInstruction) {

241

const ARMSubtarget &AST =

242

static_cast<const ARMSubtarget &>(MF.getSubtarget());

243

const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();

244

const unsigned AlignMask = Alignment - 1;

245

const unsigned NrBitsToZero = countTrailingZeros(Alignment);

246

assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported")((!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Thumb1 not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 246, __PRETTY_FUNCTION__));

247

if (!AFI->isThumbFunction()) {

248

// if the BFC instruction is available, use that to zero the lower

249

// bits:

250

// bfc Reg, #0, log2(Alignment)

251

// otherwise use BIC, if the mask to zero the required number of bits

252

// can be encoded in the bic immediate field

253

// bic Reg, Reg, Alignment-1

254

// otherwise, emit

255

// lsr Reg, Reg, log2(Alignment)

256

// lsl Reg, Reg, log2(Alignment)

257

if (CanUseBFC) {

258

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)

259

.addReg(Reg, RegState::Kill)

260

.addImm(~AlignMask));

261

} else if (AlignMask <= 255) {

262

AddDefaultCC(

263

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)

264

.addReg(Reg, RegState::Kill)

265

.addImm(AlignMask)));

266

} else {

267

assert(!MustBeSingleInstruction &&((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 270, __PRETTY_FUNCTION__))

268

"Shouldn't call emitAligningInstructions demanding a single "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 270, __PRETTY_FUNCTION__))

269

"instruction to be emitted for large stack alignment for a target "((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 270, __PRETTY_FUNCTION__))

270

"without BFC.")((!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.") ? static_cast<void> (0) : __assert_fail
("!MustBeSingleInstruction && \"Shouldn't call emitAligningInstructions demanding a single \" \"instruction to be emitted for large stack alignment for a target \" \"without BFC.\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 270, __PRETTY_FUNCTION__));

271

AddDefaultCC(AddDefaultPred(

272

BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)

273

.addReg(Reg, RegState::Kill)

274

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));

275

AddDefaultCC(AddDefaultPred(

276

BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)

277

.addReg(Reg, RegState::Kill)

278

.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));

279

}

280

} else {

281

// Since this is only reached for Thumb-2 targets, the BFC instruction

282

// should always be available.

283

assert(CanUseBFC)((CanUseBFC) ? static_cast<void> (0) : __assert_fail ("CanUseBFC"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 283, __PRETTY_FUNCTION__));

284

AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)

285

.addReg(Reg, RegState::Kill)

286

.addImm(~AlignMask));

287

}

288

}

289

290

void ARMFrameLowering::emitPrologue(MachineFunction &MF,

291

MachineBasicBlock &MBB) const {

292

MachineBasicBlock::iterator MBBI = MBB.begin();

293

MachineFrameInfo *MFI = MF.getFrameInfo();

294

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

295

MachineModuleInfo &MMI = MF.getMMI();

296

MCContext &Context = MMI.getContext();

297

const TargetMachine &TM = MF.getTarget();

298

const MCRegisterInfo *MRI = Context.getRegisterInfo();

299

const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();

300

const ARMBaseInstrInfo &TII = *STI.getInstrInfo();

301

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 302, __PRETTY_FUNCTION__))

302

"This emitPrologue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitPrologue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 302, __PRETTY_FUNCTION__));

303

bool isARM = !AFI->isThumbFunction();

304

unsigned Align = STI.getFrameLowering()->getStackAlignment();

305

unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();

306

unsigned NumBytes = MFI->getStackSize();

307

const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();

308

309

// Debug location must be unknown since the first debug location is used

310

// to determine the end of the prologue.

311

DebugLoc dl;

312

313

unsigned FramePtr = RegInfo->getFrameRegister(MF);

314

315

// Determine the sizes of each callee-save spill areas and record which frame

316

// belongs to which callee-save spill areas.

317

unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;

318

int FramePtrSpillFI = 0;

319

int D8SpillFI = 0;

320

321

// All calls are tail calls in GHC calling conv, and functions have no

322

// prologue/epilogue.

323

if (MF.getFunction()->getCallingConv() == CallingConv::GHC)

324

return;

325

326

StackAdjustingInsts DefCFAOffsetCandidates;

327

bool HasFP = hasFP(MF);

328

329

// Allocate the vararg register save area.

330

if (ArgRegsSaveSize) {

331

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,

332

MachineInstr::FrameSetup);

333

DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);

334

}

335

336

if (!AFI->hasStackFrame() &&

337

(!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {

338

if (NumBytes - ArgRegsSaveSize != 0) {

339

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),

340

MachineInstr::FrameSetup);

341

DefCFAOffsetCandidates.addInst(std::prev(MBBI),

342

NumBytes - ArgRegsSaveSize, true);

343

}

344

DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);

345

return;

346

}

347

348

// Determine spill area sizes.

349

for (unsigned i = 0, e = CSI.size(); i != e; ++i) {

350

unsigned Reg = CSI[i].getReg();

351

int FI = CSI[i].getFrameIdx();

352

switch (Reg) {

353

case ARM::R8:

354

case ARM::R9:

355

case ARM::R10:

356

case ARM::R11:

357

case ARM::R12:

358

if (STI.isTargetMachO()) {

359

GPRCS2Size += 4;

360

break;

361

}

362

// fallthrough

363

case ARM::R0:

364

case ARM::R1:

365

case ARM::R2:

366

case ARM::R3:

367

case ARM::R4:

368

case ARM::R5:

369

case ARM::R6:

370

case ARM::R7:

371

case ARM::LR:

372

if (Reg == FramePtr)

373

FramePtrSpillFI = FI;

374

GPRCS1Size += 4;

375

break;

376

default:

377

// This is a DPR. Exclude the aligned DPRCS2 spills.

378

if (Reg == ARM::D8)

379

D8SpillFI = FI;

380

if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())

381

DPRCSSize += 8;

382

}

383

}

384

385

// Move past area 1.

386

MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;

387

if (GPRCS1Size > 0) {

388

GPRCS1Push = LastPush = MBBI++;

389

DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);

390

}

391

392

// Determine starting offsets of spill areas.

393

unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;

394

unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;

395

unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;

396

unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;

397

unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;

398

int FramePtrOffsetInPush = 0;

399

if (HasFP) {

400

FramePtrOffsetInPush =

401

MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;

402

AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +

403

NumBytes);

404

}

405

AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);

406

AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);

407

AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);

408

409

// Move past area 2.

410

if (GPRCS2Size > 0) {

411

GPRCS2Push = LastPush = MBBI++;

412

DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);

413

}

414

415

// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our

416

// .cfi_offset operations will reflect that.

417

if (DPRGapSize) {

418

assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs")((DPRGapSize == 4 && "unexpected alignment requirements for DPRs"
) ? static_cast<void> (0) : __assert_fail ("DPRGapSize == 4 && \"unexpected alignment requirements for DPRs\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 418, __PRETTY_FUNCTION__));

419

if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))

420

DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);

421

else {

422

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,

423

MachineInstr::FrameSetup);

424

DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);

425

}

426

}

427

428

// Move past area 3.

429

if (DPRCSSize > 0) {

430

// Since vpush register list cannot have gaps, there may be multiple vpush

431

// instructions in the prologue.

432

while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {

433

DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));

434

LastPush = MBBI++;

435

}

436

}

437

438

// Move past the aligned DPRCS2 area.

439

if (AFI->getNumAlignedDPRCS2Regs() > 0) {

440

MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());

441

// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and

442

// leaves the stack pointer pointing to the DPRCS2 area.

443

444

// Adjust NumBytes to represent the stack slots below the DPRCS2 area.

445

NumBytes += MFI->getObjectOffset(D8SpillFI);

446

} else

447

NumBytes = DPRCSOffset;

448

449

if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {

450

uint32_t NumWords = NumBytes >> 2;

451

452

if (NumWords < 65536)

453

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)

454

.addImm(NumWords)

455

.setMIFlags(MachineInstr::FrameSetup));

456

else

457

BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)

458

.addImm(NumWords)

459

.setMIFlags(MachineInstr::FrameSetup);

460

461

switch (TM.getCodeModel()) {

462

case CodeModel::Small:

463

case CodeModel::Medium:

464

case CodeModel::Default:

465

case CodeModel::Kernel:

466

BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))

467

.addImm((unsigned)ARMCC::AL).addReg(0)

468

.addExternalSymbol("__chkstk")

469

.addReg(ARM::R4, RegState::Implicit)

470

.setMIFlags(MachineInstr::FrameSetup);

471

break;

472

case CodeModel::Large:

473

case CodeModel::JITDefault:

474

BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)

475

.addExternalSymbol("__chkstk")

476

.setMIFlags(MachineInstr::FrameSetup);

477

478

BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))

479

.addImm((unsigned)ARMCC::AL).addReg(0)

480

.addReg(ARM::R12, RegState::Kill)

481

.addReg(ARM::R4, RegState::Implicit)

482

.setMIFlags(MachineInstr::FrameSetup);

483

break;

484

}

485

486

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),

487

ARM::SP)

488

.addReg(ARM::SP, RegState::Kill)

489

.addReg(ARM::R4, RegState::Kill)

490

.setMIFlags(MachineInstr::FrameSetup)));

491

NumBytes = 0;

492

}

493

494

if (NumBytes) {

495

// Adjust SP after all the callee-save spills.

496

if (AFI->getNumAlignedDPRCS2Regs() == 0 &&

497

tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))

498

DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);

499

else {

500

emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,

501

MachineInstr::FrameSetup);

502

DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);

503

}

504

505

if (HasFP && isARM)

506

// Restore from fp only in ARM mode: e.g. sub sp, r7, #24

507

// Note it's not safe to do this in Thumb2 mode because it would have

508

// taken two instructions:

509

// mov sp, r7

510

// sub sp, #24

511

// If an interrupt is taken between the two instructions, then sp is in

512

// an inconsistent state (pointing to the middle of callee-saved area).

513

// The interrupt handler can end up clobbering the registers.

514

AFI->setShouldRestoreSPFromFP(true);

515

}

516

517

// Set FP to point to the stack slot that contains the previous FP.

518

// For iOS, FP is R7, which has now been stored in spill area 1.

519

// Otherwise, if this is not iOS, all the callee-saved registers go

520

// into spill area 1, including the FP in R11. In either case, it

521

// is in area one and the adjustment needs to take place just after

522

// that push.

523

if (HasFP) {

524

MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);

525

unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);

526

emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,

527

dl, TII, FramePtr, ARM::SP,

528

PushSize + FramePtrOffsetInPush,

529

MachineInstr::FrameSetup);

530

if (FramePtrOffsetInPush + PushSize != 0) {

531

unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(

532

nullptr, MRI->getDwarfRegNum(FramePtr, true),

533

-(ArgRegsSaveSize - FramePtrOffsetInPush)));

534

BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

535

.addCFIIndex(CFIIndex)

536

.setMIFlags(MachineInstr::FrameSetup);

537

} else {

538

unsigned CFIIndex =

539

MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(

540

nullptr, MRI->getDwarfRegNum(FramePtr, true)));

541

BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

542

.addCFIIndex(CFIIndex)

543

.setMIFlags(MachineInstr::FrameSetup);

544

}

545

}

546

547

// Now that the prologue's actual instructions are finalised, we can insert

548

// the necessary DWARF cf instructions to describe the situation. Start by

549

// recording where each register ended up:

550

if (GPRCS1Size > 0) {

551

MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);

552

int CFIIndex;

553

for (const auto &Entry : CSI) {

554

unsigned Reg = Entry.getReg();

555

int FI = Entry.getFrameIdx();

556

switch (Reg) {

557

case ARM::R8:

558

case ARM::R9:

559

case ARM::R10:

560

case ARM::R11:

561

case ARM::R12:

562

if (STI.isTargetMachO())

563

break;

564

// fallthrough

565

case ARM::R0:

566

case ARM::R1:

567

case ARM::R2:

568

case ARM::R3:

569

case ARM::R4:

570

case ARM::R5:

571

case ARM::R6:

572

case ARM::R7:

573

case ARM::LR:

574

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

575

nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));

576

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

577

.addCFIIndex(CFIIndex)

578

.setMIFlags(MachineInstr::FrameSetup);

579

break;

580

}

581

}

582

}

583

584

if (GPRCS2Size > 0) {

585

MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);

586

for (const auto &Entry : CSI) {

587

unsigned Reg = Entry.getReg();

588

int FI = Entry.getFrameIdx();

589

switch (Reg) {

590

case ARM::R8:

591

case ARM::R9:

592

case ARM::R10:

593

case ARM::R11:

594

case ARM::R12:

595

if (STI.isTargetMachO()) {

596

unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);

597

unsigned Offset = MFI->getObjectOffset(FI);

598

unsigned CFIIndex = MMI.addFrameInst(

599

MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));

600

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

601

.addCFIIndex(CFIIndex)

602

.setMIFlags(MachineInstr::FrameSetup);

603

}

604

break;

605

}

606

}

607

}

608

609

if (DPRCSSize > 0) {

610

// Since vpush register list cannot have gaps, there may be multiple vpush

611

// instructions in the prologue.

612

MachineBasicBlock::iterator Pos = std::next(LastPush);

613

for (const auto &Entry : CSI) {

614

unsigned Reg = Entry.getReg();

615

int FI = Entry.getFrameIdx();

616

if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&

617

(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {

618

unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);

619

unsigned Offset = MFI->getObjectOffset(FI);

620

unsigned CFIIndex = MMI.addFrameInst(

621

MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));

622

BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))

623

.addCFIIndex(CFIIndex)

624

.setMIFlags(MachineInstr::FrameSetup);

625

}

626

}

627

}

628

629

// Now we can emit descriptions of where the canonical frame address was

630

// throughout the process. If we have a frame pointer, it takes over the job

631

// half-way through, so only the first few .cfi_def_cfa_offset instructions

632

// actually get emitted.

633

DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);

634

635

if (STI.isTargetELF() && hasFP(MF))

636

MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -

637

AFI->getFramePtrSpillOffset());

638

639

AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);

640

AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);

641

AFI->setDPRCalleeSavedGapSize(DPRGapSize);

642

AFI->setDPRCalleeSavedAreaSize(DPRCSSize);

643

644

// If we need dynamic stack realignment, do it here. Be paranoid and make

645

// sure if we also have VLAs, we have a base pointer for frame access.

646

// If aligned NEON registers were spilled, the stack has already been

647

// realigned.

648

if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {

649

unsigned MaxAlign = MFI->getMaxAlignment();

650

assert(!AFI->isThumb1OnlyFunction())((!AFI->isThumb1OnlyFunction()) ? static_cast<void> (
0) : __assert_fail ("!AFI->isThumb1OnlyFunction()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 650, __PRETTY_FUNCTION__));

651

if (!AFI->isThumbFunction()) {

652

emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,

653

false);

654

} else {

655

// We cannot use sp as source/dest register here, thus we're using r4 to

656

// perform the calculations. We're emitting the following sequence:

657

// mov r4, sp

658

// -- use emitAligningInstructions to produce best sequence to zero

659

// -- out lower bits in r4

660

// mov sp, r4

661

// FIXME: It will be better just to find spare register here.

662

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)

663

.addReg(ARM::SP, RegState::Kill));

664

emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,

665

false);

666

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)

667

.addReg(ARM::R4, RegState::Kill));

668

}

669

670

AFI->setShouldRestoreSPFromFP(true);

671

}

672

673

// If we need a base pointer, set it up here. It's whatever the value

674

// of the stack pointer is at this point. Any variable size objects

675

// will be allocated after this, so we can still use the base pointer

676

// to reference locals.

677

// FIXME: Clarify FrameSetup flags here.

678

if (RegInfo->hasBasePointer(MF)) {

679

if (isARM)

680

BuildMI(MBB, MBBI, dl,

681

TII.get(ARM::MOVr), RegInfo->getBaseRegister())

682

.addReg(ARM::SP)

683

.addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);

684

else

685

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

686

RegInfo->getBaseRegister())

687

.addReg(ARM::SP));

688

}

689

690

// If the frame has variable sized objects then the epilogue must restore

691

// the sp from fp. We can assume there's an FP here since hasFP already

692

// checks for hasVarSizedObjects.

693

if (MFI->hasVarSizedObjects())

694

AFI->setShouldRestoreSPFromFP(true);

695

}

696

697

void ARMFrameLowering::emitEpilogue(MachineFunction &MF,

698

MachineBasicBlock &MBB) const {

699

MachineFrameInfo *MFI = MF.getFrameInfo();

700

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

701

const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

702

const ARMBaseInstrInfo &TII =

703

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

704

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 705, __PRETTY_FUNCTION__))

705

"This emitEpilogue does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This emitEpilogue does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 705, __PRETTY_FUNCTION__));

706

bool isARM = !AFI->isThumbFunction();

707

708

unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();

709

int NumBytes = (int)MFI->getStackSize();

710

unsigned FramePtr = RegInfo->getFrameRegister(MF);

711

712

// All calls are tail calls in GHC calling conv, and functions have no

713

// prologue/epilogue.

714

if (MF.getFunction()->getCallingConv() == CallingConv::GHC)

715

return;

716

717

// First put ourselves on the first (from top) terminator instructions.

718

MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();

719

DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

720

721

if (!AFI->hasStackFrame()) {

722

if (NumBytes - ArgRegsSaveSize != 0)

723

emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);

724

} else {

725

// Unwind MBBI to point to first LDR / VLDRD.

726

const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

727

if (MBBI != MBB.begin()) {

728

do {

729

--MBBI;

730

} while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));

731

if (!isCSRestore(MBBI, TII, CSRegs))

732

++MBBI;

733

}

734

735

// Move SP to start of FP callee save spill area.

736

NumBytes -= (ArgRegsSaveSize +

737

AFI->getGPRCalleeSavedArea1Size() +

738

AFI->getGPRCalleeSavedArea2Size() +

739

AFI->getDPRCalleeSavedGapSize() +

740

AFI->getDPRCalleeSavedAreaSize());

741

742

// Reset SP based on frame pointer only if the stack frame extends beyond

743

// frame pointer stack slot or target is ELF and the function has FP.

744

if (AFI->shouldRestoreSPFromFP()) {

745

NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;

746

if (NumBytes) {

747

if (isARM)

748

emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,

749

ARMCC::AL, 0, TII);

750

else {

751

// It's not possible to restore SP from FP in a single instruction.

752

// For iOS, this looks like:

753

// mov sp, r7

754

// sub sp, #24

755

// This is bad, if an interrupt is taken after the mov, sp is in an

756

// inconsistent state.

757

// Use the first callee-saved register as a scratch register.

758

assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&((!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"
) ? static_cast<void> (0) : __assert_fail ("!MFI->getPristineRegs(MF).test(ARM::R4) && \"No scratch register to restore SP from FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 759, __PRETTY_FUNCTION__))

759

"No scratch register to restore SP from FP!")((!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"
) ? static_cast<void> (0) : __assert_fail ("!MFI->getPristineRegs(MF).test(ARM::R4) && \"No scratch register to restore SP from FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 759, __PRETTY_FUNCTION__));

760

emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,

761

ARMCC::AL, 0, TII);

762

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

763

ARM::SP)

764

.addReg(ARM::R4));

765

}

766

} else {

767

// Thumb2 or ARM.

768

if (isARM)

769

BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)

770

.addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);

771

else

772

AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),

773

ARM::SP)

774

.addReg(FramePtr));

775

}

776

} else if (NumBytes &&

777

!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))

778

emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);

779

780

// Increment past our save areas.

781

if (AFI->getDPRCalleeSavedAreaSize()) {

782

MBBI++;

783

// Since vpop register list cannot have gaps, there may be multiple vpop

784

// instructions in the epilogue.

785

while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)

786

MBBI++;

787

}

788

if (AFI->getDPRCalleeSavedGapSize()) {

789

assert(AFI->getDPRCalleeSavedGapSize() == 4 &&((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap"
) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 790, __PRETTY_FUNCTION__))

790

"unexpected DPR alignment gap")((AFI->getDPRCalleeSavedGapSize() == 4 && "unexpected DPR alignment gap"
) ? static_cast<void> (0) : __assert_fail ("AFI->getDPRCalleeSavedGapSize() == 4 && \"unexpected DPR alignment gap\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 790, __PRETTY_FUNCTION__));

791

emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());

792

}

793

794

if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;

795

if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;

796

}

797

798

if (ArgRegsSaveSize)

799

emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);

800

}

801

802

/// getFrameIndexReference - Provide a base+offset reference to an FI slot for

803

/// debug info. It's the same as what we use for resolving the code-gen

804

/// references for now. FIXME: This can go wrong when references are

805

/// SP-relative and simple call frames aren't used.

806

int

807

ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,

808

unsigned &FrameReg) const {

809

return ResolveFrameIndexReference(MF, FI, FrameReg, 0);

810

}

811

812

int

813

ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,

814

int FI, unsigned &FrameReg,

815

int SPAdj) const {

816

const MachineFrameInfo *MFI = MF.getFrameInfo();

817

const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(

818

MF.getSubtarget().getRegisterInfo());

819

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

820

int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();

821

int FPOffset = Offset - AFI->getFramePtrSpillOffset();

822

bool isFixed = MFI->isFixedObjectIndex(FI);

823

824

FrameReg = ARM::SP;

825

Offset += SPAdj;

826

827

// SP can move around if there are allocas. We may also lose track of SP

828

// when emergency spilling inside a non-reserved call frame setup.

829

bool hasMovingSP = !hasReservedCallFrame(MF);

830

831

// When dynamically realigning the stack, use the frame pointer for

832

// parameters, and the stack/base pointer for locals.

833

if (RegInfo->needsStackRealignment(MF)) {

834

assert (hasFP(MF) && "dynamic stack realignment without a FP!")((hasFP(MF) && "dynamic stack realignment without a FP!"
) ? static_cast<void> (0) : __assert_fail ("hasFP(MF) && \"dynamic stack realignment without a FP!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 834, __PRETTY_FUNCTION__));

835

if (isFixed) {

836

FrameReg = RegInfo->getFrameRegister(MF);

837

Offset = FPOffset;

838

} else if (hasMovingSP) {

839

assert(RegInfo->hasBasePointer(MF) &&((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 840, __PRETTY_FUNCTION__))

840

"VLAs and dynamic stack alignment, but missing base pointer!")((RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"VLAs and dynamic stack alignment, but missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 840, __PRETTY_FUNCTION__));

841

FrameReg = RegInfo->getBaseRegister();

842

}

843

return Offset;

844

}

845

846

// If there is a frame pointer, use it when we can.

847

if (hasFP(MF) && AFI->hasStackFrame()) {

848

// Use frame pointer to reference fixed objects. Use it for locals if

849

// there are VLAs (and thus the SP isn't reliable as a base).

850

if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {

851

FrameReg = RegInfo->getFrameRegister(MF);

852

return FPOffset;

853

} else if (hasMovingSP) {

854

assert(RegInfo->hasBasePointer(MF) && "missing base pointer!")((RegInfo->hasBasePointer(MF) && "missing base pointer!"
) ? static_cast<void> (0) : __assert_fail ("RegInfo->hasBasePointer(MF) && \"missing base pointer!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 854, __PRETTY_FUNCTION__));

855

if (AFI->isThumb2Function()) {

856

// Try to use the frame pointer if we can, else use the base pointer

857

// since it's available. This is handy for the emergency spill slot, in

858

// particular.

859

if (FPOffset >= -255 && FPOffset < 0) {

860

FrameReg = RegInfo->getFrameRegister(MF);

861

return FPOffset;

862

}

863

}

864

} else if (AFI->isThumb2Function()) {

865

// Use add <rd>, sp, #<imm8>

866

// ldr <rd>, [sp, #<imm8>]

867

// if at all possible to save space.

868

if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)

869

return Offset;

870

// In Thumb2 mode, the negative offset is very limited. Try to avoid

871

// out of range references. ldr <rt>,[<rn>, #-<imm8>]

872

if (FPOffset >= -255 && FPOffset < 0) {

873

FrameReg = RegInfo->getFrameRegister(MF);

874

return FPOffset;

875

}

876

} else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {

877

// Otherwise, use SP or FP, whichever is closer to the stack slot.

878

FrameReg = RegInfo->getFrameRegister(MF);

879

return FPOffset;

880

}

881

}

882

// Use the base pointer if we have one.

883

if (RegInfo->hasBasePointer(MF))

884

FrameReg = RegInfo->getBaseRegister();

885

return Offset;

886

}

887

888

void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,

889

MachineBasicBlock::iterator MI,

890

const std::vector<CalleeSavedInfo> &CSI,

891

unsigned StmOpc, unsigned StrOpc,

892

bool NoGap,

893

bool(*Func)(unsigned, bool),

894

unsigned NumAlignedDPRCS2Regs,

895

unsigned MIFlags) const {

896

MachineFunction &MF = *MBB.getParent();

897

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

898

899

DebugLoc DL;

900

901

SmallVector<std::pair<unsigned,bool>, 4> Regs;

902

unsigned i = CSI.size();

903

while (i != 0) {

904

unsigned LastReg = 0;

905

for (; i != 0; --i) {

906

unsigned Reg = CSI[i-1].getReg();

907

if (!(Func)(Reg, STI.isTargetMachO())) continue;

908

909

// D-registers in the aligned area DPRCS2 are NOT spilled here.

910

if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)

911

continue;

912

913

bool isLiveIn = MF.getRegInfo().isLiveIn(Reg);

914

if (!isLiveIn)

915

MBB.addLiveIn(Reg);

916

// If NoGap is true, push consecutive registers and then leave the rest

917

// for other instructions. e.g.

918

// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}

919

if (NoGap && LastReg && LastReg != Reg-1)

920

break;

921

LastReg = Reg;

922

// Do not set a kill flag on values that are also marked as live-in. This

923

// happens with the @llvm-returnaddress intrinsic and with arguments

924

// passed in callee saved registers.

925

// Omitting the kill flags is conservatively correct even if the live-in

926

// is not used after all.

927

Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));

928

}

929

930

if (Regs.empty())

931

continue;

932

if (Regs.size() > 1 || StrOpc== 0) {

933

MachineInstrBuilder MIB =

934

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)

935

.addReg(ARM::SP).setMIFlags(MIFlags));

936

for (unsigned i = 0, e = Regs.size(); i < e; ++i)

937

MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));

938

} else if (Regs.size() == 1) {

939

MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),

940

ARM::SP)

941

.addReg(Regs[0].first, getKillRegState(Regs[0].second))

942

.addReg(ARM::SP).setMIFlags(MIFlags)

943

.addImm(-4);

944

AddDefaultPred(MIB);

945

}

946

Regs.clear();

947

948

// Put any subsequent vpush instructions before this one: they will refer to

949

// higher register numbers so need to be pushed first in order to preserve

950

// monotonicity.

951

if (MI != MBB.begin())

952

--MI;

953

}

954

}

955

956

void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,

957

MachineBasicBlock::iterator MI,

958

const std::vector<CalleeSavedInfo> &CSI,

959

unsigned LdmOpc, unsigned LdrOpc,

960

bool isVarArg, bool NoGap,

961

bool(*Func)(unsigned, bool),

962

unsigned NumAlignedDPRCS2Regs) const {

963

MachineFunction &MF = *MBB.getParent();

964

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

965

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

966

DebugLoc DL;

967

bool isTailCall = false;

968

bool isInterrupt = false;

969

bool isTrap = false;

970

if (MBB.end() != MI) {

971

DL = MI->getDebugLoc();

972

unsigned RetOpcode = MI->getOpcode();

973

isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);

974

isInterrupt =

975

RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;

976

isTrap =

977

RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||

978

RetOpcode == ARM::tTRAP;

979

}

980

981

SmallVector<unsigned, 4> Regs;

982

unsigned i = CSI.size();

983

while (i != 0) {

984

unsigned LastReg = 0;

985

bool DeleteRet = false;

986

for (; i != 0; --i) {

987

unsigned Reg = CSI[i-1].getReg();

988

if (!(Func)(Reg, STI.isTargetMachO())) continue;

989

990

// The aligned reloads from area DPRCS2 are not inserted here.

991

if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)

992

continue;

993

994

if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&

995

!isTrap && STI.hasV5TOps()) {

996

if (MBB.succ_empty()) {

997

Reg = ARM::PC;

998

DeleteRet = true;

999

LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;

1000

} else

1001

LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;

1002

// Fold the return instruction into the LDM.

1003

}

1004

1005

// If NoGap is true, pop consecutive registers and then leave the rest

1006

// for other instructions. e.g.

1007

// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}

1008

if (NoGap && LastReg && LastReg != Reg-1)

1009

break;

1010

1011

LastReg = Reg;

1012

Regs.push_back(Reg);

1013

}

1014

1015

if (Regs.empty())

1016

continue;

1017

if (Regs.size() > 1 || LdrOpc == 0) {

1018

MachineInstrBuilder MIB =

1019

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)

1020

.addReg(ARM::SP));

1021

for (unsigned i = 0, e = Regs.size(); i < e; ++i)

1022

MIB.addReg(Regs[i], getDefRegState(true));

1023

if (DeleteRet && MI != MBB.end()) {

1024

MIB.copyImplicitOps(*MI);

1025

MI->eraseFromParent();

1026

}

1027

MI = MIB;

1028

} else if (Regs.size() == 1) {

1029

// If we adjusted the reg to PC from LR above, switch it back here. We

1030

// only do that for LDM.

1031

if (Regs[0] == ARM::PC)

1032

Regs[0] = ARM::LR;

1033

MachineInstrBuilder MIB =

1034

BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])

1035

.addReg(ARM::SP, RegState::Define)

1036

.addReg(ARM::SP);

1037

// ARM mode needs an extra reg0 here due to addrmode2. Will go away once

1038

// that refactoring is complete (eventually).

1039

if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {

1040

MIB.addReg(0);

1041

MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));

1042

} else

1043

MIB.addImm(4);

1044

AddDefaultPred(MIB);

1045

}

1046

Regs.clear();

1047

1048

// Put any subsequent vpop instructions after this one: they will refer to

1049

// higher register numbers so need to be popped afterwards.

1050

if (MI != MBB.end())

1051

++MI;

1052

}

1053

}

1054

1055

/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers

1056

/// starting from d8. Also insert stack realignment code and leave the stack

1057

/// pointer pointing to the d8 spill slot.

1058

static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,

1059

MachineBasicBlock::iterator MI,

1060

unsigned NumAlignedDPRCS2Regs,

1061

const std::vector<CalleeSavedInfo> &CSI,

1062

const TargetRegisterInfo *TRI) {

1063

MachineFunction &MF = *MBB.getParent();

1064

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1065

DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();

1066

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

1067

MachineFrameInfo &MFI = *MF.getFrameInfo();

1068

1069

// Mark the D-register spill slots as properly aligned. Since MFI computes

1070

// stack slot layout backwards, this can actually mean that the d-reg stack

1071

// slot offsets can be wrong. The offset for d8 will always be correct.

1072

for (unsigned i = 0, e = CSI.size(); i != e; ++i) {

1073

unsigned DNum = CSI[i].getReg() - ARM::D8;

1074

if (DNum > NumAlignedDPRCS2Regs - 1)

1075

continue;

1076

int FI = CSI[i].getFrameIdx();

1077

// The even-numbered registers will be 16-byte aligned, the odd-numbered

1078

// registers will be 8-byte aligned.

1079

MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);

1080

1081

// The stack slot for D8 needs to be maximally aligned because this is

1082

// actually the point where we align the stack pointer. MachineFrameInfo

1083

// computes all offsets relative to the incoming stack pointer which is a

1084

// bit weird when realigning the stack. Any extra padding for this

1085

// over-alignment is not realized because the code inserted below adjusts

1086

// the stack pointer by numregs * 8 before aligning the stack pointer.

1087

if (DNum == 0)

1088

MFI.setObjectAlignment(FI, MFI.getMaxAlignment());

1089

}

1090

1091

// Move the stack pointer to the d8 spill slot, and align it at the same

1092

// time. Leave the stack slot address in the scratch register r4.

1093

1094

// sub r4, sp, #numregs * 8

1095

// bic r4, r4, #align - 1

1096

// mov sp, r4

1097

1098

bool isThumb = AFI->isThumbFunction();

1099

assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1")((!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"Can't realign stack for thumb1\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1099, __PRETTY_FUNCTION__));

1100

AFI->setShouldRestoreSPFromFP(true);

1101

1102

// sub r4, sp, #numregs * 8

1103

// The immediate is <= 64, so it doesn't need any special encoding.

1104

unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;

1105

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)

1106

.addReg(ARM::SP)

1107

.addImm(8 * NumAlignedDPRCS2Regs)));

1108

1109

unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();

1110

// We must set parameter MustBeSingleInstruction to true, since

1111

// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform

1112

// stack alignment. Luckily, this can always be done since all ARM

1113

// architecture versions that support Neon also support the BFC

1114

// instruction.

1115

emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);

1116

1117

// mov sp, r4

1118

// The stack pointer must be adjusted before spilling anything, otherwise

1119

// the stack slots could be clobbered by an interrupt handler.

1120

// Leave r4 live, it is used below.

1121

Opc = isThumb ? ARM::tMOVr : ARM::MOVr;

1122

MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)

1123

.addReg(ARM::R4);

1124

MIB = AddDefaultPred(MIB);

1125

if (!isThumb)

1126

AddDefaultCC(MIB);

1127

1128

// Now spill NumAlignedDPRCS2Regs registers starting from d8.

1129

// r4 holds the stack slot address.

1130

unsigned NextReg = ARM::D8;

1131

1132

// 16-byte aligned vst1.64 with 4 d-regs and address writeback.

1133

// The writeback is only needed when emitting two vst1.64 instructions.

1134

if (NumAlignedDPRCS2Regs >= 6) {

1135

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1136

&ARM::QQPRRegClass);

1137

MBB.addLiveIn(SupReg);

1138

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),

1139

ARM::R4)

1140

.addReg(ARM::R4, RegState::Kill).addImm(16)

1141

.addReg(NextReg)

1142

.addReg(SupReg, RegState::ImplicitKill));

1143

NextReg += 4;

1144

NumAlignedDPRCS2Regs -= 4;

1145

}

1146

1147

// We won't modify r4 beyond this point. It currently points to the next

1148

// register to be spilled.

1149

unsigned R4BaseReg = NextReg;

1150

1151

// 16-byte aligned vst1.64 with 4 d-regs, no writeback.

1152

if (NumAlignedDPRCS2Regs >= 4) {

1153

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1154

&ARM::QQPRRegClass);

1155

MBB.addLiveIn(SupReg);

1156

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))

1157

.addReg(ARM::R4).addImm(16).addReg(NextReg)

1158

.addReg(SupReg, RegState::ImplicitKill));

1159

NextReg += 4;

1160

NumAlignedDPRCS2Regs -= 4;

1161

}

1162

1163

// 16-byte aligned vst1.64 with 2 d-regs.

1164

if (NumAlignedDPRCS2Regs >= 2) {

1165

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1166

&ARM::QPRRegClass);

1167

MBB.addLiveIn(SupReg);

1168

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))

1169

.addReg(ARM::R4).addImm(16).addReg(SupReg));

1170

NextReg += 2;

1171

NumAlignedDPRCS2Regs -= 2;

1172

}

1173

1174

// Finally, use a vanilla vstr.64 for the odd last register.

1175

if (NumAlignedDPRCS2Regs) {

1176

MBB.addLiveIn(NextReg);

1177

// vstr.64 uses addrmode5 which has an offset scale of 4.

1178

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))

1179

.addReg(NextReg)

1180

.addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));

1181

}

1182

1183

// The last spill instruction inserted should kill the scratch register r4.

1184

std::prev(MI)->addRegisterKilled(ARM::R4, TRI);

1185

}

1186

1187

/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an

1188

/// iterator to the following instruction.

1189

static MachineBasicBlock::iterator

1190

skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,

1191

unsigned NumAlignedDPRCS2Regs) {

1192

// sub r4, sp, #numregs * 8

1193

// bic r4, r4, #align - 1

1194

// mov sp, r4

1195

++MI; ++MI; ++MI;

1196

assert(MI->mayStore() && "Expecting spill instruction")((MI->mayStore() && "Expecting spill instruction")
? static_cast<void> (0) : __assert_fail ("MI->mayStore() && \"Expecting spill instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1196, __PRETTY_FUNCTION__));

1197

1198

// These switches all fall through.

1199

switch(NumAlignedDPRCS2Regs) {

1200

case 7:

1201

++MI;

1202

1203

default:

1204

++MI;

1205

1206

case 1:

1207

case 2:

1208

case 4:

1209

assert(MI->killsRegister(ARM::R4) && "Missed kill flag")((MI->killsRegister(ARM::R4) && "Missed kill flag"
) ? static_cast<void> (0) : __assert_fail ("MI->killsRegister(ARM::R4) && \"Missed kill flag\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1209, __PRETTY_FUNCTION__));

1210

++MI;

1211

}

1212

return MI;

1213

}

1214

1215

/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers

1216

/// starting from d8. These instructions are assumed to execute while the

1217

/// stack is still aligned, unlike the code inserted by emitPopInst.

1218

static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,

1219

MachineBasicBlock::iterator MI,

1220

unsigned NumAlignedDPRCS2Regs,

1221

const std::vector<CalleeSavedInfo> &CSI,

1222

const TargetRegisterInfo *TRI) {

1223

MachineFunction &MF = *MBB.getParent();

1224

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1225

DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();

1226

const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

1227

1228

// Find the frame index assigned to d8.

1229

int D8SpillFI = 0;

1230

for (unsigned i = 0, e = CSI.size(); i != e; ++i)

1231

if (CSI[i].getReg() == ARM::D8) {

1232

D8SpillFI = CSI[i].getFrameIdx();

1233

break;

1234

}

1235

1236

// Materialize the address of the d8 spill slot into the scratch register r4.

1237

// This can be fairly complicated if the stack frame is large, so just use

1238

// the normal frame index elimination mechanism to do it. This code runs as

1239

// the initial part of the epilog where the stack and base pointers haven't

1240

// been changed yet.

1241

bool isThumb = AFI->isThumbFunction();

1242

1243

1244

unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;

1245

AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)

1246

.addFrameIndex(D8SpillFI).addImm(0)));

1247

1248

// Now restore NumAlignedDPRCS2Regs registers starting from d8.

1249

unsigned NextReg = ARM::D8;

1250

1251

// 16-byte aligned vld1.64 with 4 d-regs and writeback.

1252

if (NumAlignedDPRCS2Regs >= 6) {

1253

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1254

&ARM::QQPRRegClass);

1255

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)

1256

.addReg(ARM::R4, RegState::Define)

1257

.addReg(ARM::R4, RegState::Kill).addImm(16)

1258

.addReg(SupReg, RegState::ImplicitDefine));

1259

NextReg += 4;

1260

NumAlignedDPRCS2Regs -= 4;

1261

}

1262

1263

// We won't modify r4 beyond this point. It currently points to the next

1264

// register to be spilled.

1265

unsigned R4BaseReg = NextReg;

1266

1267

// 16-byte aligned vld1.64 with 4 d-regs, no writeback.

1268

if (NumAlignedDPRCS2Regs >= 4) {

1269

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1270

&ARM::QQPRRegClass);

1271

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)

1272

.addReg(ARM::R4).addImm(16)

1273

.addReg(SupReg, RegState::ImplicitDefine));

1274

NextReg += 4;

1275

NumAlignedDPRCS2Regs -= 4;

1276

}

1277

1278

// 16-byte aligned vld1.64 with 2 d-regs.

1279

if (NumAlignedDPRCS2Regs >= 2) {

1280

unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,

1281

&ARM::QPRRegClass);

1282

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)

1283

.addReg(ARM::R4).addImm(16));

1284

NextReg += 2;

1285

NumAlignedDPRCS2Regs -= 2;

1286

}

1287

1288

// Finally, use a vanilla vldr.64 for the remaining odd register.

1289

if (NumAlignedDPRCS2Regs)

1290

AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)

1291

.addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));

1292

1293

// Last store kills r4.

1294

std::prev(MI)->addRegisterKilled(ARM::R4, TRI);

1295

}

1296

1297

bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,

1298

MachineBasicBlock::iterator MI,

1299

const std::vector<CalleeSavedInfo> &CSI,

1300

const TargetRegisterInfo *TRI) const {

1301

if (CSI.empty())

1302

return false;

1303

1304

MachineFunction &MF = *MBB.getParent();

1305

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1306

1307

unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;

1308

unsigned PushOneOpc = AFI->isThumbFunction() ?

1309

ARM::t2STR_PRE : ARM::STR_PRE_IMM;

1310

unsigned FltOpc = ARM::VSTMDDB_UPD;

1311

unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();

1312

emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,

1313

MachineInstr::FrameSetup);

1314

emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,

1315

MachineInstr::FrameSetup);

1316

emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,

1317

NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);

1318

1319

// The code above does not insert spill code for the aligned DPRCS2 registers.

1320

// The stack realignment code will be inserted between the push instructions

1321

// and these spills.

1322

if (NumAlignedDPRCS2Regs)

1323

emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);

1324

1325

return true;

1326

}

1327

1328

bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,

1329

MachineBasicBlock::iterator MI,

1330

const std::vector<CalleeSavedInfo> &CSI,

1331

const TargetRegisterInfo *TRI) const {

1332

if (CSI.empty())

1333

return false;

1334

1335

MachineFunction &MF = *MBB.getParent();

1336

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1337

bool isVarArg = AFI->getArgRegsSaveSize() > 0;

1338

unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();

1339

1340

// The emitPopInst calls below do not insert reloads for the aligned DPRCS2

1341

// registers. Do that here instead.

1342

if (NumAlignedDPRCS2Regs)

1343

emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);

1344

1345

unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;

1346

unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;

1347

unsigned FltOpc = ARM::VLDMDIA_UPD;

1348

emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,

1349

NumAlignedDPRCS2Regs);

1350

emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,

1351

&isARMArea2Register, 0);

1352

emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,

1353

&isARMArea1Register, 0);

1354

1355

return true;

1356

}

1357

1358

// FIXME: Make generic?

1359

static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,

1360

const ARMBaseInstrInfo &TII) {

1361

unsigned FnSize = 0;

1362

for (auto &MBB : MF) {

1363

for (auto &MI : MBB)

1364

FnSize += TII.GetInstSizeInBytes(&MI);

1365

}

1366

return FnSize;

1367

}

1368

1369

/// estimateRSStackSizeLimit - Look at each instruction that references stack

1370

/// frames and return the stack size limit beyond which some of these

1371

/// instructions will require a scratch register during their expansion later.

1372

// FIXME: Move to TII?

1373

static unsigned estimateRSStackSizeLimit(MachineFunction &MF,

1374

const TargetFrameLowering *TFI) {

1375

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1376

unsigned Limit = (1 << 12) - 1;

1377

for (auto &MBB : MF) {

1378

for (auto &MI : MBB) {

1379

for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

1380

if (!MI.getOperand(i).isFI())

1381

continue;

1382

1383

// When using ADDri to get the address of a stack object, 255 is the

1384

// largest offset guaranteed to fit in the immediate offset.

1385

if (MI.getOpcode() == ARM::ADDri) {

1386

Limit = std::min(Limit, (1U << 8) - 1);

1387

break;

1388

}

1389

1390

// Otherwise check the addressing mode.

1391

switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {

1392

case ARMII::AddrMode3:

1393

case ARMII::AddrModeT2_i8:

1394

Limit = std::min(Limit, (1U << 8) - 1);

1395

break;

1396

case ARMII::AddrMode5:

1397

case ARMII::AddrModeT2_i8s4:

1398

Limit = std::min(Limit, ((1U << 8) - 1) * 4);

1399

break;

1400

case ARMII::AddrModeT2_i12:

1401

// i12 supports only positive offset so these will be converted to

1402

// i8 opcodes. See llvm::rewriteT2FrameIndex.

1403

if (TFI->hasFP(MF) && AFI->hasStackFrame())

1404

Limit = std::min(Limit, (1U << 8) - 1);

1405

break;

1406

case ARMII::AddrMode4:

1407

case ARMII::AddrMode6:

1408

// Addressing modes 4 & 6 (load/store) instructions can't encode an

1409

// immediate offset for stack references.

1410

return 0;

1411

default:

1412

break;

1413

}

1414

break; // At most one FI per instruction

1415

}

1416

}

1417

}

1418

1419

return Limit;

1420

}

1421

1422

// In functions that realign the stack, it can be an advantage to spill the

1423

// callee-saved vector registers after realigning the stack. The vst1 and vld1

1424

// instructions take alignment hints that can improve performance.

1425

1426

static void

1427

checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {

1428

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);

1429

if (!SpillAlignedNEONRegs)

1430

return;

1431

1432

// Naked functions don't spill callee-saved registers.

1433

if (MF.getFunction()->hasFnAttribute(Attribute::Naked))

1434

return;

1435

1436

// We are planning to use NEON instructions vst1 / vld1.

1437

if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())

1438

return;

1439

1440

// Don't bother if the default stack alignment is sufficiently high.

1441

if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)

1442

return;

1443

1444

// Aligned spills require stack realignment.

1445

if (!static_cast<const ARMBaseRegisterInfo *>(

1446

MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))

1447

return;

1448

1449

// We always spill contiguous d-registers starting from d8. Count how many

1450

// needs spilling. The register allocator will almost always use the

1451

// callee-saved registers in order, but it can happen that there are holes in

1452

// the range. Registers above the hole will be spilled to the standard DPRCS

1453

// area.

1454

unsigned NumSpills = 0;

1455

for (; NumSpills < 8; ++NumSpills)

1456

if (!SavedRegs.test(ARM::D8 + NumSpills))

1457

break;

1458

1459

// Don't do this for just one d-register. It's not worth it.

1460

if (NumSpills < 2)

1461

return;

1462

1463

// Spill the first NumSpills D-registers after realigning the stack.

1464

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);

1465

1466

// A scratch register is required for the vst1 / vld1 instructions.

1467

SavedRegs.set(ARM::R4);

1468

}

1469

1470

void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,

1471

BitVector &SavedRegs,

1472

RegScavenger *RS) const {

1473

TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);

1474

// This tells PEI to spill the FP as if it is any other callee-save register

1475

// to take advantage the eliminateFrameIndex machinery. This also ensures it

1476

// is spilled in the order specified by getCalleeSavedRegs() to make it easier

1477

// to combine multiple loads / stores.

1478

bool CanEliminateFrame = true;

1479

bool CS1Spilled = false;

1480

bool LRSpilled = false;

1481

unsigned NumGPRSpills = 0;

1482

SmallVector<unsigned, 4> UnspilledCS1GPRs;

1483

SmallVector<unsigned, 4> UnspilledCS2GPRs;

1484

const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(

1485

MF.getSubtarget().getRegisterInfo());

1486

const ARMBaseInstrInfo &TII =

1487

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1488

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1489

MachineFrameInfo *MFI = MF.getFrameInfo();

1490

MachineRegisterInfo &MRI = MF.getRegInfo();

1491

unsigned FramePtr = RegInfo->getFrameRegister(MF);

1492

1493

// Spill R4 if Thumb2 function requires stack realignment - it will be used as

1494

// scratch register. Also spill R4 if Thumb2 function has varsized objects,

1495

// since it's not always possible to restore sp from fp in a single

1496

// instruction.

1497

// FIXME: It will be better just to find spare register here.

1498

if (AFI->isThumb2Function() &&

1499

(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))

1500

SavedRegs.set(ARM::R4);

1501

1502

if (AFI->isThumb1OnlyFunction()) {

Taking false branch

→

1503

// Spill LR if Thumb1 function uses variable length argument lists.

1504

if (AFI->getArgRegsSaveSize() > 0)

1505

SavedRegs.set(ARM::LR);

1506

1507

// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know

1508

// for sure what the stack size will be, but for this, an estimate is good

1509

// enough. If there anything changes it, it'll be a spill, which implies

1510

// we've used all the registers and so R4 is already used, so not marking

1511

// it here will be OK.

1512

// FIXME: It will be better just to find spare register here.

1513

unsigned StackSize = MFI->estimateStackSize(MF);

1514

if (MFI->hasVarSizedObjects() || StackSize > 508)

1515

SavedRegs.set(ARM::R4);

1516

}

1517

1518

// See if we can spill vector registers to aligned stack.

1519

checkNumAlignedDPRCS2Regs(MF, SavedRegs);

1520

1521

// Spill the BasePtr if it's used.

1522

if (RegInfo->hasBasePointer(MF))

←

Taking false branch

→

1523

SavedRegs.set(RegInfo->getBaseRegister());

1524

1525

// Don't spill FP if the frame can be eliminated. This is determined

1526

// by scanning the callee-save registers to see if any is modified.

1527

const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

1528

for (unsigned i = 0; CSRegs[i]; ++i) {

←

Loop condition is false. Execution continues on line 1584

→

1529

unsigned Reg = CSRegs[i];

1530

bool Spilled = false;

1531

if (SavedRegs.test(Reg)) {

1532

Spilled = true;

1533

CanEliminateFrame = false;

1534

}

1535

1536

if (!ARM::GPRRegClass.contains(Reg))

1537

continue;

1538

1539

if (Spilled) {

1540

NumGPRSpills++;

1541

1542

if (!STI.isTargetMachO()) {

1543

if (Reg == ARM::LR)

1544

LRSpilled = true;

1545

CS1Spilled = true;

1546

continue;

1547

}

1548

1549

// Keep track if LR and any of R4, R5, R6, and R7 is spilled.

1550

switch (Reg) {

1551

case ARM::LR:

1552

LRSpilled = true;

1553

// Fallthrough

1554

case ARM::R0: case ARM::R1:

1555

case ARM::R2: case ARM::R3:

1556

case ARM::R4: case ARM::R5:

1557

case ARM::R6: case ARM::R7:

1558

CS1Spilled = true;

1559

break;

1560

default:

1561

break;

1562

}

1563

} else {

1564

if (!STI.isTargetMachO()) {

1565

UnspilledCS1GPRs.push_back(Reg);

1566

continue;

1567

}

1568

1569

switch (Reg) {

1570

case ARM::R0: case ARM::R1:

1571

case ARM::R2: case ARM::R3:

1572

case ARM::R4: case ARM::R5:

1573

case ARM::R6: case ARM::R7:

1574

case ARM::LR:

1575

UnspilledCS1GPRs.push_back(Reg);

1576

break;

1577

default:

1578

UnspilledCS2GPRs.push_back(Reg);

1579

break;

1580

}

1581

}

1582

}

1583

1584

bool ForceLRSpill = false;

1585

if (!LRSpilled && AFI->isThumb1OnlyFunction()) {

←

Taking false branch

→

1586

unsigned FnSize = GetFunctionSizeInBytes(MF, TII);

1587

// Force LR to be spilled if the Thumb function size is > 2048. This enables

1588

// use of BL to implement far jump. If it turns out that it's not needed

1589

// then the branch fix up path will undo it.

1590

if (FnSize >= (1 << 11)) {

1591

CanEliminateFrame = false;

1592

ForceLRSpill = true;

1593

}

1594

}

1595

1596

// If any of the stack slot references may be out of range of an immediate

1597

// offset, make sure a register (or a spill slot) is available for the

1598

// register scavenger. Note that if we're indexing off the frame pointer, the

1599

// effective stack size is 4 bytes larger since the FP points to the stack

1600

// slot of the previous FP. Also, if we have variable sized objects in the

1601

// function, stack slot references will often be negative, and some of

1602

// our instructions are positive-offset only, so conservatively consider

1603

// that case to want a spill slot (or register) as well. Similarly, if

1604

// the function adjusts the stack pointer during execution and the

1605

// adjustments aren't already part of our stack size estimate, our offset

1606

// calculations may be off, so be conservative.

1607

// FIXME: We could add logic to be more precise about negative offsets

1608

// and which instructions will need a scratch register for them. Is it

1609

// worth the effort and added fragility?

1610

bool BigStack = (RS && (MFI->estimateStackSize(MF) +

←

Assuming pointer value is null

→

1611

((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=

1612

estimateRSStackSizeLimit(MF, this))) ||

1613

MFI->hasVarSizedObjects() ||

1614

(MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));

1615

1616

bool ExtraCSSpill = false;

1617

if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {

1618

AFI->setHasStackFrame(true);

1619

1620

// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.

1621

// Spill LR as well so we can fold BX_RET to the registers restore (LDM).

1622

if (!LRSpilled && CS1Spilled) {

←

Taking false branch

→

1623

SavedRegs.set(ARM::LR);

1624

NumGPRSpills++;

1625

SmallVectorImpl<unsigned>::iterator LRPos;

1626

LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

1627

(unsigned)ARM::LR);

1628

if (LRPos != UnspilledCS1GPRs.end())

1629

UnspilledCS1GPRs.erase(LRPos);

1630

1631

ForceLRSpill = false;

1632

ExtraCSSpill = true;

1633

}

1634

1635

if (hasFP(MF)) {

←

Taking false branch

→

1636

SavedRegs.set(FramePtr);

1637

auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

1638

FramePtr);

1639

if (FPPos != UnspilledCS1GPRs.end())

1640

UnspilledCS1GPRs.erase(FPPos);

1641

NumGPRSpills++;

1642

}

1643

1644

// If stack and double are 8-byte aligned and we are spilling an odd number

1645

// of GPRs, spill one extra callee save GPR so we won't have to pad between

1646

// the integer and double callee save areas.

1647

unsigned TargetAlign = getStackAlignment();

1648

if (TargetAlign >= 8 && (NumGPRSpills & 1)) {

←

Assuming 'TargetAlign' is < 8

→

1649

if (CS1Spilled && !UnspilledCS1GPRs.empty()) {

1650

for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {

1651

unsigned Reg = UnspilledCS1GPRs[i];

1652

// Don't spill high register if the function is thumb. In the case of

1653

// Windows on ARM, accept R11 (frame pointer)

1654

if (!AFI->isThumbFunction() ||

1655

(STI.isTargetWindows() && Reg == ARM::R11) ||

1656

isARMLowRegister(Reg) || Reg == ARM::LR) {

1657

SavedRegs.set(Reg);

1658

if (!MRI.isReserved(Reg))

1659

ExtraCSSpill = true;

1660

break;

1661

}

1662

}

1663

} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {

1664

unsigned Reg = UnspilledCS2GPRs.front();

1665

SavedRegs.set(Reg);

1666

if (!MRI.isReserved(Reg))

1667

ExtraCSSpill = true;

1668

}

1669

}

1670

1671

// Estimate if we might need to scavenge a register at some point in order

1672

// to materialize a stack offset. If so, either spill one additional

1673

// callee-saved register or reserve a special spill slot to facilitate

1674

// register scavenging. Thumb1 needs a spill slot for stack pointer

1675

// adjustments also, even when the frame itself is small.

1676

if (BigStack && !ExtraCSSpill) {

←

Taking true branch

→

1677

// If any non-reserved CS register isn't spilled, just spill one or two

1678

// extra. That should take care of it!

1679

unsigned NumExtras = TargetAlign / 4;

1680

SmallVector<unsigned, 2> Extras;

1681

while (NumExtras && !UnspilledCS1GPRs.empty()) {

1682

unsigned Reg = UnspilledCS1GPRs.back();

1683

UnspilledCS1GPRs.pop_back();

1684

if (!MRI.isReserved(Reg) &&

1685

(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||

1686

Reg == ARM::LR)) {

1687

Extras.push_back(Reg);

1688

NumExtras--;

1689

}

1690

}

1691

// For non-Thumb1 functions, also check for hi-reg CS registers

1692

if (!AFI->isThumb1OnlyFunction()) {

←

Taking true branch

→

1693

while (NumExtras && !UnspilledCS2GPRs.empty()) {

1694

unsigned Reg = UnspilledCS2GPRs.back();

1695

UnspilledCS2GPRs.pop_back();

1696

if (!MRI.isReserved(Reg)) {

1697

Extras.push_back(Reg);

1698

NumExtras--;

1699

}

1700

}

1701

}

1702

if (Extras.size() && NumExtras == 0) {

1703

for (unsigned i = 0, e = Extras.size(); i != e; ++i) {

1704

SavedRegs.set(Extras[i]);

1705

}

1706

} else if (!AFI->isThumb1OnlyFunction()) {

←

Taking true branch

→

1707

// note: Thumb1 functions spill to R12, not the stack. Reserve a slot

1708

// closest to SP or frame pointer.

1709

const TargetRegisterClass *RC = &ARM::GPRRegClass;

1710

RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),

←

Called C++ object pointer is null

1711

RC->getAlignment(),

1712

false));

1713

}

1714

}

1715

}

1716

1717

if (ForceLRSpill) {

1718

SavedRegs.set(ARM::LR);

1719

AFI->setLRIsSpilledForFarJump(true);

1720

}

1721

}

1722

1723

MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(

1724

MachineFunction &MF, MachineBasicBlock &MBB,

1725

MachineBasicBlock::iterator I) const {

1726

const ARMBaseInstrInfo &TII =

1727

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1728

if (!hasReservedCallFrame(MF)) {

1729

// If we have alloca, convert as follows:

1730

// ADJCALLSTACKDOWN -> sub, sp, sp, amount

1731

// ADJCALLSTACKUP -> add, sp, sp, amount

1732

MachineInstr *Old = I;

1733

DebugLoc dl = Old->getDebugLoc();

1734

unsigned Amount = Old->getOperand(0).getImm();

1735

if (Amount != 0) {

1736

// We need to keep the stack aligned properly. To do this, we round the

1737

// amount of space needed for the outgoing arguments up to the next

1738

// alignment boundary.

1739

Amount = alignSPAdjust(Amount);

1740

1741

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

1742

assert(!AFI->isThumb1OnlyFunction() &&((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1743, __PRETTY_FUNCTION__))

1743

"This eliminateCallFramePseudoInstr does not support Thumb1!")((!AFI->isThumb1OnlyFunction() && "This eliminateCallFramePseudoInstr does not support Thumb1!"
) ? static_cast<void> (0) : __assert_fail ("!AFI->isThumb1OnlyFunction() && \"This eliminateCallFramePseudoInstr does not support Thumb1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1743, __PRETTY_FUNCTION__));

1744

bool isARM = !AFI->isThumbFunction();

1745

1746

// Replace the pseudo instruction with a new instruction...

1747

unsigned Opc = Old->getOpcode();

1748

int PIdx = Old->findFirstPredOperandIdx();

1749

ARMCC::CondCodes Pred = (PIdx == -1)

1750

? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();

1751

if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {

1752

// Note: PredReg is operand 2 for ADJCALLSTACKDOWN.

1753

unsigned PredReg = Old->getOperand(2).getReg();

1754

emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,

1755

Pred, PredReg);

1756

} else {

1757

// Note: PredReg is operand 3 for ADJCALLSTACKUP.

1758

unsigned PredReg = Old->getOperand(3).getReg();

1759

assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP)((Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP) ?
static_cast<void> (0) : __assert_fail ("Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1759, __PRETTY_FUNCTION__));

1760

emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,

1761

Pred, PredReg);

1762

}

1763

}

1764

}

1765

return MBB.erase(I);

1766

}

1767

1768

/// Get the minimum constant for ARM that is greater than or equal to the

1769

/// argument. In ARM, constants can have any value that can be produced by

1770

/// rotating an 8-bit value to the right by an even number of bits within a

1771

/// 32-bit word.

1772

static uint32_t alignToARMConstant(uint32_t Value) {

1773

unsigned Shifted = 0;

1774

1775

if (Value == 0)

1776

return 0;

1777

1778

while (!(Value & 0xC0000000)) {

1779

Value = Value << 2;

1780

Shifted += 2;

1781

}

1782

1783

bool Carry = (Value & 0x00FFFFFF);

1784

Value = ((Value & 0xFF000000) >> 24) + Carry;

1785

1786

if (Value & 0x0000100)

1787

Value = Value & 0x000001FC;

1788

1789

if (Shifted > 24)

1790

Value = Value >> (Shifted - 24);

1791

else

1792

Value = Value << (24 - Shifted);

1793

1794

return Value;

1795

}

1796

1797

// The stack limit in the TCB is set to this many bytes above the actual

1798

// stack limit.

1799

static const uint64_t kSplitStackAvailable = 256;

1800

1801

// Adjust the function prologue to enable split stacks. This currently only

1802

// supports android and linux.

1803

1804

// The ABI of the segmented stack prologue is a little arbitrarily chosen, but

1805

// must be well defined in order to allow for consistent implementations of the

1806

// __morestack helper function. The ABI is also not a normal ABI in that it

1807

// doesn't follow the normal calling conventions because this allows the

1808

// prologue of each function to be optimized further.

1809

1810

// Currently, the ABI looks like (when calling __morestack)

1811

1812

// * r4 holds the minimum stack size requested for this function call

1813

// * r5 holds the stack size of the arguments to the function

1814

// * the beginning of the function is 3 instructions after the call to

1815

// __morestack

1816

1817

// Implementations of __morestack should use r4 to allocate a new stack, r5 to

1818

// place the arguments on to the new stack, and the 3-instruction knowledge to

1819

// jump directly to the body of the function when working on the new stack.

1820

1821

// An old (and possibly no longer compatible) implementation of __morestack for

1822

// ARM can be found at [1].

1823

1824

// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S

1825

void ARMFrameLowering::adjustForSegmentedStacks(

1826

MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {

1827

unsigned Opcode;

1828

unsigned CFIIndex;

1829

const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();

1830

bool Thumb = ST->isThumb();

1831

1832

// Sadly, this currently doesn't support varargs, platforms other than

1833

// android/linux. Note that thumb1/thumb2 are support for android/linux.

1834

if (MF.getFunction()->isVarArg())

1835

report_fatal_error("Segmented stacks do not support vararg functions.");

1836

if (!ST->isTargetAndroid() && !ST->isTargetLinux())

1837

report_fatal_error("Segmented stacks not supported on this platform.");

1838

1839

MachineFrameInfo *MFI = MF.getFrameInfo();

1840

MachineModuleInfo &MMI = MF.getMMI();

1841

MCContext &Context = MMI.getContext();

1842

const MCRegisterInfo *MRI = Context.getRegisterInfo();

1843

const ARMBaseInstrInfo &TII =

1844

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

1845

ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();

1846

DebugLoc DL;

1847

1848

uint64_t StackSize = MFI->getStackSize();

1849

1850

// Do not generate a prologue for functions with a stack of size zero

1851

if (StackSize == 0)

1852

return;

1853

1854

// Use R4 and R5 as scratch registers.

1855

// We save R4 and R5 before use and restore them before leaving the function.

1856

unsigned ScratchReg0 = ARM::R4;

1857

unsigned ScratchReg1 = ARM::R5;

1858

uint64_t AlignedStackSize;

1859

1860

MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();

1861

MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();

1862

MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();

1863

MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();

1864

MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();

1865

1866

// Grab everything that reaches PrologueMBB to update there liveness as well.

1867

SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;

1868

SmallVector<MachineBasicBlock *, 2> WalkList;

1869

WalkList.push_back(&PrologueMBB);

1870

1871

do {

1872

MachineBasicBlock *CurMBB = WalkList.pop_back_val();

1873

for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {

1874

if (BeforePrologueRegion.insert(PredBB).second)

1875

WalkList.push_back(PredBB);

1876

}

1877

} while (!WalkList.empty());

1878

1879

// The order in that list is important.

1880

// The blocks will all be inserted before PrologueMBB using that order.

1881

// Therefore the block that should appear first in the CFG should appear

1882

// first in the list.

1883

MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,

1884

PostStackMBB};

1885

1886

for (MachineBasicBlock *B : AddedBlocks)

1887

BeforePrologueRegion.insert(B);

1888

1889

for (const auto &LI : PrologueMBB.liveins()) {

1890

for (MachineBasicBlock *PredBB : BeforePrologueRegion)

1891

PredBB->addLiveIn(LI);

1892

}

1893

1894

// Remove the newly added blocks from the list, since we know

1895

// we do not have to do the following updates for them.

1896

for (MachineBasicBlock *B : AddedBlocks) {

1897

BeforePrologueRegion.erase(B);

1898

MF.insert(PrologueMBB.getIterator(), B);

1899

}

1900

1901

for (MachineBasicBlock *MBB : BeforePrologueRegion) {

1902

// Make sure the LiveIns are still sorted and unique.

1903

MBB->sortUniqueLiveIns();

1904

// Replace the edges to PrologueMBB by edges to the sequences

1905

// we are about to add.

1906

MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);

1907

}

1908

1909

// The required stack size that is aligned to ARM constant criterion.

1910

AlignedStackSize = alignToARMConstant(StackSize);

1911

1912

// When the frame size is less than 256 we just compare the stack

1913

// boundary directly to the value of the stack pointer, per gcc.

1914

bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;

1915

1916

// We will use two of the callee save registers as scratch registers so we

1917

// need to save those registers onto the stack.

1918

// We will use SR0 to hold stack limit and SR1 to hold the stack size

1919

// requested and arguments for __morestack().

1920

// SR0: Scratch Register #0

1921

// SR1: Scratch Register #1

1922

// push {SR0, SR1}

1923

if (Thumb) {

1924

AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))

1925

.addReg(ScratchReg0).addReg(ScratchReg1);

1926

} else {

1927

AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))

1928

.addReg(ARM::SP, RegState::Define).addReg(ARM::SP))

1929

.addReg(ScratchReg0).addReg(ScratchReg1);

1930

}

1931

1932

// Emit the relevant DWARF information about the change in stack pointer as

1933

// well as where to find both r4 and r5 (the callee-save registers)

1934

CFIIndex =

1935

MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));

1936

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1937

.addCFIIndex(CFIIndex);

1938

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

1939

nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));

1940

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1941

.addCFIIndex(CFIIndex);

1942

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

1943

nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));

1944

BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

1945

.addCFIIndex(CFIIndex);

1946

1947

// mov SR1, sp

1948

if (Thumb) {

1949

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)

1950

.addReg(ARM::SP));

1951

} else if (CompareStackPointer) {

1952

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)

1953

.addReg(ARM::SP)).addReg(0);

1954

}

1955

1956

// sub SR1, sp, #StackSize

1957

if (!CompareStackPointer && Thumb) {

1958

AddDefaultPred(

1959

AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))

1960

.addReg(ScratchReg1).addImm(AlignedStackSize));

1961

} else if (!CompareStackPointer) {

1962

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)

1963

.addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);

1964

}

1965

1966

if (Thumb && ST->isThumb1Only()) {

1967

unsigned PCLabelId = ARMFI->createPICLabelUId();

1968

ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(

1969

MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);

1970

MachineConstantPool *MCP = MF.getConstantPool();

1971

unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);

1972

1973

// ldr SR0, [pc, offset(STACK_LIMIT)]

1974

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)

1975

.addConstantPoolIndex(CPI));

1976

1977

// ldr SR0, [SR0]

1978

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)

1979

.addReg(ScratchReg0).addImm(0));

1980

} else {

1981

// Get TLS base address from the coprocessor

1982

// mrc p15, #0, SR0, c13, c0, #3

1983

AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)

1984

.addImm(15)

1985

.addImm(0)

1986

.addImm(13)

1987

.addImm(0)

1988

.addImm(3));

1989

1990

// Use the last tls slot on android and a private field of the TCP on linux.

1991

assert(ST->isTargetAndroid() || ST->isTargetLinux())((ST->isTargetAndroid() || ST->isTargetLinux()) ? static_cast
<void> (0) : __assert_fail ("ST->isTargetAndroid() || ST->isTargetLinux()"
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/ARMFrameLowering.cpp"
, 1991, __PRETTY_FUNCTION__));

1992

unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;

1993

1994

// Get the stack limit from the right offset

1995

// ldr SR0, [sr0, #4 * TlsOffset]

1996

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)

1997

.addReg(ScratchReg0).addImm(4 * TlsOffset));

1998

}

1999

2000

// Compare stack limit with stack size requested.

2001

// cmp SR0, SR1

2002

Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;

2003

AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))

2004

.addReg(ScratchReg0)

2005

.addReg(ScratchReg1));

2006

2007

// This jump is taken if StackLimit < SP - stack required.

2008

Opcode = Thumb ? ARM::tBcc : ARM::Bcc;

2009

BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)

2010

.addImm(ARMCC::LO)

2011

.addReg(ARM::CPSR);

2012

2013

2014

// Calling __morestack(StackSize, Size of stack arguments).

2015

// __morestack knows that the stack size requested is in SR0(r4)

2016

// and amount size of stack arguments is in SR1(r5).

2017

2018

// Pass first argument for the __morestack by Scratch Register #0.

2019

// The amount size of stack required

2020

if (Thumb) {

2021

AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),

2022

ScratchReg0)).addImm(AlignedStackSize));

2023

} else {

2024

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)

2025

.addImm(AlignedStackSize)).addReg(0);

2026

}

2027

// Pass second argument for the __morestack by Scratch Register #1.

2028

// The amount size of stack consumed to save function arguments.

2029

if (Thumb) {

2030

AddDefaultPred(

2031

AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))

2032

.addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));

2033

} else {

2034

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)

2035

.addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))

2036

.addReg(0);

2037

}

2038

2039

// push {lr} - Save return address of this function.

2040

if (Thumb) {

2041

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))

2042

.addReg(ARM::LR);

2043

} else {

2044

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))

2045

.addReg(ARM::SP, RegState::Define)

2046

.addReg(ARM::SP))

2047

.addReg(ARM::LR);

2048

}

2049

2050

// Emit the DWARF info about the change in stack as well as where to find the

2051

// previous link register

2052

CFIIndex =

2053

MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));

2054

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2055

.addCFIIndex(CFIIndex);

2056

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(

2057

nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));

2058

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2059

.addCFIIndex(CFIIndex);

2060

2061

// Call __morestack().

2062

if (Thumb) {

2063

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))

2064

.addExternalSymbol("__morestack");

2065

} else {

2066

BuildMI(AllocMBB, DL, TII.get(ARM::BL))

2067

.addExternalSymbol("__morestack");

2068

}

2069

2070

// pop {lr} - Restore return address of this original function.

2071

if (Thumb) {

2072

if (ST->isThumb1Only()) {

2073

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))

2074

.addReg(ScratchReg0);

2075

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)

2076

.addReg(ScratchReg0));

2077

} else {

2078

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))

2079

.addReg(ARM::LR, RegState::Define)

2080

.addReg(ARM::SP, RegState::Define)

2081

.addReg(ARM::SP)

2082

.addImm(4));

2083

}

2084

} else {

2085

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))

2086

.addReg(ARM::SP, RegState::Define)

2087

.addReg(ARM::SP))

2088

.addReg(ARM::LR);

2089

}

2090

2091

// Restore SR0 and SR1 in case of __morestack() was called.

2092

// __morestack() will skip PostStackMBB block so we need to restore

2093

// scratch registers from here.

2094

// pop {SR0, SR1}

2095

if (Thumb) {

2096

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))

2097

.addReg(ScratchReg0)

2098

.addReg(ScratchReg1);

2099

} else {

2100

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))

2101

.addReg(ARM::SP, RegState::Define)

2102

.addReg(ARM::SP))

2103

.addReg(ScratchReg0)

2104

.addReg(ScratchReg1);

2105

}

2106

2107

// Update the CFA offset now that we've popped

2108

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));

2109

BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2110

.addCFIIndex(CFIIndex);

2111

2112

// bx lr - Return from this function.

2113

Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;

2114

AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));

2115

2116

// Restore SR0 and SR1 in case of __morestack() was not called.

2117

// pop {SR0, SR1}

2118

if (Thumb) {

2119

AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))

2120

.addReg(ScratchReg0)

2121

.addReg(ScratchReg1);

2122

} else {

2123

AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))

2124

.addReg(ARM::SP, RegState::Define)

2125

.addReg(ARM::SP))

2126

.addReg(ScratchReg0)

2127

.addReg(ScratchReg1);

2128

}

2129

2130

// Update the CFA offset now that we've popped

2131

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));

2132

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2133

.addCFIIndex(CFIIndex);

2134

2135

// Tell debuggers that r4 and r5 are now the same as they were in the

2136

// previous function, that they're the "Same Value".

2137

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(

2138

nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));

2139

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2140

.addCFIIndex(CFIIndex);

2141

CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(

2142

nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));

2143

BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))

2144

.addCFIIndex(CFIIndex);

2145

2146

// Organizing MBB lists

2147

PostStackMBB->addSuccessor(&PrologueMBB);

2148

2149

AllocMBB->addSuccessor(PostStackMBB);

2150

2151

GetMBB->addSuccessor(PostStackMBB);

2152

GetMBB->addSuccessor(AllocMBB);

2153

2154

McrMBB->addSuccessor(GetMBB);

2155

2156

PrevStackMBB->addSuccessor(McrMBB);

2157

2158

#ifdef XDEBUG

2159

MF.verify();

2160

#endif

2161

}