/tmp/buildd/llvm-toolchain-snapshot-3.9~svn266909/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Bug Summary

File:	lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Location:	line 1251, column 28
Description:	Called C++ object pointer is null

Annotated Source Code

//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This file implements the visit functions for load, store and alloca.

//===----------------------------------------------------------------------===//

#include "InstCombineInternal.h"

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/Loads.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/LLVMContext.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Local.h"

using namespace llvm;

#define DEBUG_TYPE"instcombine" "instcombine"

STATISTIC(NumDeadStore, "Number of dead stores eliminated")static llvm::Statistic NumDeadStore = { "instcombine", "Number of dead stores eliminated"
, 0, 0 };

STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global")static llvm::Statistic NumGlobalCopies = { "instcombine", "Number of allocas copied from constant global"
, 0, 0 };

/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to

/// some part of a constant global variable. This intentionally only accepts

/// constant expressions because we can't rewrite arbitrary instructions.

static bool pointsToConstantGlobal(Value *V) {

if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))

return GV->isConstant();

if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {

if (CE->getOpcode() == Instruction::BitCast ||

CE->getOpcode() == Instruction::AddrSpaceCast ||

CE->getOpcode() == Instruction::GetElementPtr)

return pointsToConstantGlobal(CE->getOperand(0));

}

return false;

}

/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)

/// pointer to an alloca. Ignore any reads of the pointer, return false if we

/// see any stores or other unknown uses. If we see pointer arithmetic, keep

/// track of whether it moves the pointer (with IsOffset) but otherwise traverse

/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to

/// the alloca, and if the source pointer is a pointer to a constant global, we

/// can optimize this.

static bool

isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,

SmallVectorImpl<Instruction *> &ToDelete) {

// We track lifetime intrinsics as we encounter them. If we decide to go

// ahead and replace the value with the global, this lets the caller quickly

// eliminate the markers.

SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;

ValuesToInspect.push_back(std::make_pair(V, false));

while (!ValuesToInspect.empty()) {

auto ValuePair = ValuesToInspect.pop_back_val();

const bool IsOffset = ValuePair.second;

for (auto &U : ValuePair.first->uses()) {

Instruction *I = cast<Instruction>(U.getUser());

if (LoadInst *LI = dyn_cast<LoadInst>(I)) {

// Ignore non-volatile loads, they are always ok.

if (!LI->isSimple()) return false;

continue;

}

if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {

// If uses of the bitcast are ok, we are ok.

ValuesToInspect.push_back(std::make_pair(I, IsOffset));

continue;

}

if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {

// If the GEP has all zero indices, it doesn't offset the pointer. If it

// doesn't, it does.

ValuesToInspect.push_back(

std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));

continue;

}

if (auto CS = CallSite(I)) {

// If this is the function being called then we treat it like a load and

// ignore it.

if (CS.isCallee(&U))

continue;

unsigned DataOpNo = CS.getDataOperandNo(&U);

bool IsArgOperand = CS.isArgOperand(&U);

// Inalloca arguments are clobbered by the call.

if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))

return false;

100

101

// If this is a readonly/readnone call site, then we know it is just a

102

// load (but one that potentially returns the value itself), so we can

103

// ignore it if we know that the value isn't captured.

104

if (CS.onlyReadsMemory() &&

105

(CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))

106

continue;

107

108

// If this is being passed as a byval argument, the caller is making a

109

// copy, so it is only a read of the alloca.

110

if (IsArgOperand && CS.isByValArgument(DataOpNo))

111

continue;

112

}

113

114

// Lifetime intrinsics can be handled by the caller.

115

if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {

116

if (II->getIntrinsicID() == Intrinsic::lifetime_start ||

117

II->getIntrinsicID() == Intrinsic::lifetime_end) {

118

assert(II->use_empty() && "Lifetime markers have no result to use!")((II->use_empty() && "Lifetime markers have no result to use!"
) ? static_cast<void> (0) : __assert_fail ("II->use_empty() && \"Lifetime markers have no result to use!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn266909/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp"
, 118, __PRETTY_FUNCTION__));

119

ToDelete.push_back(II);

120

continue;

121

}

122

}

123

124

// If this is isn't our memcpy/memmove, reject it as something we can't

125

// handle.

126

MemTransferInst *MI = dyn_cast<MemTransferInst>(I);

127

if (!MI)

128

return false;

129

130

// If the transfer is using the alloca as a source of the transfer, then

131

// ignore it since it is a load (unless the transfer is volatile).

132

if (U.getOperandNo() == 1) {

133

if (MI->isVolatile()) return false;

134

continue;

135

}

136

137

// If we already have seen a copy, reject the second one.

138

if (TheCopy) return false;

139

140

// If the pointer has been offset from the start of the alloca, we can't

141

// safely handle this.

142

if (IsOffset) return false;

143

144

// If the memintrinsic isn't using the alloca as the dest, reject it.

145

if (U.getOperandNo() != 0) return false;

146

147

// If the source of the memcpy/move is not a constant global, reject it.

148

if (!pointsToConstantGlobal(MI->getSource()))

149

return false;

150

151

// Otherwise, the transform is safe. Remember the copy instruction.

152

TheCopy = MI;

153

}

154

}

155

return true;

156

}

157

158

/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only

159

/// modified by a copy from a constant global. If we can prove this, we can

160

/// replace any uses of the alloca with uses of the global directly.

161

static MemTransferInst *

162

isOnlyCopiedFromConstantGlobal(AllocaInst *AI,

163

SmallVectorImpl<Instruction *> &ToDelete) {

164

MemTransferInst *TheCopy = nullptr;

165

if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))

166

return TheCopy;

167

return nullptr;

168

}

169

170

static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {

171

// Check for array size of 1 (scalar allocation).

172

if (!AI.isArrayAllocation()) {

173

// i32 1 is the canonical array size for scalar allocations.

174

if (AI.getArraySize()->getType()->isIntegerTy(32))

175

return nullptr;

176

177

// Canonicalize it.

178

Value *V = IC.Builder->getInt32(1);

179

AI.setOperand(0, V);

180

return &AI;

181

}

182

183

// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1

184

if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {

185

Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());

186

AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());

187

New->setAlignment(AI.getAlignment());

188

189

// Scan to the end of the allocation instructions, to skip over a block of

190

// allocas if possible...also skip interleaved debug info

191

192

BasicBlock::iterator It(New);

193

while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))

194

++It;

195

196

// Now that I is pointing to the first non-allocation-inst in the block,

197

// insert our getelementptr instruction...

198

199

Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());

200

Value *NullIdx = Constant::getNullValue(IdxTy);

201

Value *Idx[2] = {NullIdx, NullIdx};

202

Instruction *GEP =

203

GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");

204

IC.InsertNewInstBefore(GEP, *It);

205

206

// Now make everything use the getelementptr instead of the original

207

// allocation.

208

return IC.replaceInstUsesWith(AI, GEP);

209

}

210

211

if (isa<UndefValue>(AI.getArraySize()))

212

return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));

213

214

// Ensure that the alloca array size argument has type intptr_t, so that

215

// any casting is exposed early.

216

Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());

217

if (AI.getArraySize()->getType() != IntPtrTy) {

218

Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);

219

AI.setOperand(0, V);

220

return &AI;

221

}

222

223

return nullptr;

224

}

225

226

Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {

227

if (auto *I = simplifyAllocaArraySize(*this, AI))

228

return I;

229

230

if (AI.getAllocatedType()->isSized()) {

231

// If the alignment is 0 (unspecified), assign it the preferred alignment.

232

if (AI.getAlignment() == 0)

233

AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));

234

235

// Move all alloca's of zero byte objects to the entry block and merge them

236

// together. Note that we only do this for alloca's, because malloc should

237

// allocate and return a unique pointer, even for a zero byte allocation.

238

if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {

239

// For a zero sized alloca there is no point in doing an array allocation.

240

// This is helpful if the array size is a complicated expression not used

241

// elsewhere.

242

if (AI.isArrayAllocation()) {

243

AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));

244

return &AI;

245

}

246

247

// Get the first instruction in the entry block.

248

BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();

249

Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();

250

if (FirstInst != &AI) {

251

// If the entry block doesn't start with a zero-size alloca then move

252

// this one to the start of the entry block. There is no problem with

253

// dominance as the array size was forced to a constant earlier already.

254

AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);

255

if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||

256

DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {

257

AI.moveBefore(FirstInst);

258

return &AI;

259

}

260

261

// If the alignment of the entry block alloca is 0 (unspecified),

262

// assign it the preferred alignment.

263

if (EntryAI->getAlignment() == 0)

264

EntryAI->setAlignment(

265

DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));

266

// Replace this zero-sized alloca with the one at the start of the entry

267

// block after ensuring that the address will be aligned enough for both

268

// types.

269

unsigned MaxAlign = std::max(EntryAI->getAlignment(),

270

AI.getAlignment());

271

EntryAI->setAlignment(MaxAlign);

272

if (AI.getType() != EntryAI->getType())

273

return new BitCastInst(EntryAI, AI.getType());

274

return replaceInstUsesWith(AI, EntryAI);

275

}

276

}

277

}

278

279

if (AI.getAlignment()) {

280

// Check to see if this allocation is only modified by a memcpy/memmove from

281

// a constant global whose alignment is equal to or exceeds that of the

282

// allocation. If this is the case, we can change all users to use

283

// the constant global instead. This is commonly produced by the CFE by

284

// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'

285

// is only subsequently read.

286

SmallVector<Instruction *, 4> ToDelete;

287

if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {

288

unsigned SourceAlign = getOrEnforceKnownAlignment(

289

Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);

290

if (AI.getAlignment() <= SourceAlign) {

291

DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("instcombine")) { dbgs() << "Found alloca equal to global: "
<< AI << '\n'; } } while (0);

292

DEBUG(dbgs() << " memcpy = " << *Copy << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("instcombine")) { dbgs() << " memcpy = " << *Copy
<< '\n'; } } while (0);

293

for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)

294

eraseInstFromFunction(*ToDelete[i]);

295

Constant *TheSrc = cast<Constant>(Copy->getSource());

296

Constant *Cast

297

= ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());

298

Instruction *NewI = replaceInstUsesWith(AI, Cast);

299

eraseInstFromFunction(*Copy);

300

++NumGlobalCopies;

301

return NewI;

302

}

303

}

304

}

305

306

// At last, use the generic allocation site handler to aggressively remove

307

// unused allocas.

308

return visitAllocSite(AI);

309

}

310

311

/// \brief Helper to combine a load to a new type.

312

///

313

/// This just does the work of combining a load to a new type. It handles

314

/// metadata, etc., and returns the new instruction. The \c NewTy should be the

315

/// loaded *value* type. This will convert it to a pointer, cast the operand to

316

/// that pointer type, load it, etc.

317

///

318

/// Note that this will create all of the instructions with whatever insert

319

/// point the \c InstCombiner currently is using.

320

static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,

321

const Twine &Suffix = "") {

322

Value *Ptr = LI.getPointerOperand();

323

unsigned AS = LI.getPointerAddressSpace();

324

SmallVector<std::pair<unsigned, MDNode *>, 8> MD;

325

LI.getAllMetadata(MD);

326

327

LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(

328

IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),

329

LI.getAlignment(), LI.getName() + Suffix);

330

MDBuilder MDB(NewLoad->getContext());

331

for (const auto &MDPair : MD) {

332

unsigned ID = MDPair.first;

333

MDNode *N = MDPair.second;

334

// Note, essentially every kind of metadata should be preserved here! This

335

// routine is supposed to clone a load instruction changing *only its type*.

336

// The only metadata it makes sense to drop is metadata which is invalidated

337

// when the pointer type changes. This should essentially never be the case

338

// in LLVM, but we explicitly switch over only known metadata to be

339

// conservatively correct. If you are adding metadata to LLVM which pertains

340

// to loads, you almost certainly want to add it here.

341

switch (ID) {

342

case LLVMContext::MD_dbg:

343

case LLVMContext::MD_tbaa:

344

case LLVMContext::MD_prof:

345

case LLVMContext::MD_fpmath:

346

case LLVMContext::MD_tbaa_struct:

347

case LLVMContext::MD_invariant_load:

348

case LLVMContext::MD_alias_scope:

349

case LLVMContext::MD_noalias:

350

case LLVMContext::MD_nontemporal:

351

case LLVMContext::MD_mem_parallel_loop_access:

352

// All of these directly apply.

353

NewLoad->setMetadata(ID, N);

354

break;

355

356

case LLVMContext::MD_nonnull:

357

// This only directly applies if the new type is also a pointer.

358

if (NewTy->isPointerTy()) {

359

NewLoad->setMetadata(ID, N);

360

break;

361

}

362

// If it's integral now, translate it to !range metadata.

363

if (NewTy->isIntegerTy()) {

364

auto *ITy = cast<IntegerType>(NewTy);

365

auto *NullInt = ConstantExpr::getPtrToInt(

366

ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);

367

auto *NonNullInt =

368

ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));

369

NewLoad->setMetadata(LLVMContext::MD_range,

370

MDB.createRange(NonNullInt, NullInt));

371

}

372

break;

373

case LLVMContext::MD_align:

374

case LLVMContext::MD_dereferenceable:

375

case LLVMContext::MD_dereferenceable_or_null:

376

// These only directly apply if the new type is also a pointer.

377

if (NewTy->isPointerTy())

378

NewLoad->setMetadata(ID, N);

379

break;

380

case LLVMContext::MD_range:

381

// FIXME: It would be nice to propagate this in some way, but the type

382

// conversions make it hard. If the new type is a pointer, we could

383

// translate it to !nonnull metadata.

384

break;

385

}

386

}

387

return NewLoad;

388

}

389

390

/// \brief Combine a store to a new type.

391

///

392

/// Returns the newly created store instruction.

393

static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {

394

Value *Ptr = SI.getPointerOperand();

395

unsigned AS = SI.getPointerAddressSpace();

396

SmallVector<std::pair<unsigned, MDNode *>, 8> MD;

397

SI.getAllMetadata(MD);

398

399

StoreInst *NewStore = IC.Builder->CreateAlignedStore(

400

V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),

401

SI.getAlignment());

402

for (const auto &MDPair : MD) {

403

unsigned ID = MDPair.first;

404

MDNode *N = MDPair.second;

405

// Note, essentially every kind of metadata should be preserved here! This

406

// routine is supposed to clone a store instruction changing *only its

407

// type*. The only metadata it makes sense to drop is metadata which is

408

// invalidated when the pointer type changes. This should essentially

409

// never be the case in LLVM, but we explicitly switch over only known

410

// metadata to be conservatively correct. If you are adding metadata to

411

// LLVM which pertains to stores, you almost certainly want to add it

412

// here.

413

switch (ID) {

414

case LLVMContext::MD_dbg:

415

case LLVMContext::MD_tbaa:

416

case LLVMContext::MD_prof:

417

case LLVMContext::MD_fpmath:

418

case LLVMContext::MD_tbaa_struct:

419

case LLVMContext::MD_alias_scope:

420

case LLVMContext::MD_noalias:

421

case LLVMContext::MD_nontemporal:

422

case LLVMContext::MD_mem_parallel_loop_access:

423

// All of these directly apply.

424

NewStore->setMetadata(ID, N);

425

break;

426

427

case LLVMContext::MD_invariant_load:

428

case LLVMContext::MD_nonnull:

429

case LLVMContext::MD_range:

430

case LLVMContext::MD_align:

431

case LLVMContext::MD_dereferenceable:

432

case LLVMContext::MD_dereferenceable_or_null:

433

// These don't apply for stores.

434

break;

435

}

436

}

437

438

return NewStore;

439

}

440

441

/// \brief Combine loads to match the type of value their uses after looking

442

/// through intervening bitcasts.

443

///

444

/// The core idea here is that if the result of a load is used in an operation,

445

/// we should load the type most conducive to that operation. For example, when

446

/// loading an integer and converting that immediately to a pointer, we should

447

/// instead directly load a pointer.

448

///

449

/// However, this routine must never change the width of a load or the number of

450

/// loads as that would introduce a semantic change. This combine is expected to

451

/// be a semantic no-op which just allows loads to more closely model the types

452

/// of their consuming operations.

453

///

454

/// Currently, we also refuse to change the precise type used for an atomic load

455

/// or a volatile load. This is debatable, and might be reasonable to change

456

/// later. However, it is risky in case some backend or other part of LLVM is

457

/// relying on the exact type loaded to select appropriate atomic operations.

458

static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {

459

// FIXME: We could probably with some care handle both volatile and atomic

460

// loads here but it isn't clear that this is important.

461

if (!LI.isSimple())

462

return nullptr;

463

464

if (LI.use_empty())

465

return nullptr;

466

467

Type *Ty = LI.getType();

468

const DataLayout &DL = IC.getDataLayout();

469

470

// Try to canonicalize loads which are only ever stored to operate over

471

// integers instead of any other type. We only do this when the loaded type

472

// is sized and has a size exactly the same as its store size and the store

473

// size is a legal integer type.

474

if (!Ty->isIntegerTy() && Ty->isSized() &&

475

DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&

476

DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {

477

if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {

478

auto *SI = dyn_cast<StoreInst>(U);

479

return SI && SI->getPointerOperand() != &LI;

480

})) {

481

LoadInst *NewLoad = combineLoadToNewType(

482

IC, LI,

483

Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));

484

// Replace all the stores with stores of the newly loaded value.

485

for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {

486

auto *SI = cast<StoreInst>(*UI++);

487

IC.Builder->SetInsertPoint(SI);

488

combineStoreToNewValue(IC, *SI, NewLoad);

489

IC.eraseInstFromFunction(*SI);

490

}

491

assert(LI.use_empty() && "Failed to remove all users of the load!")((LI.use_empty() && "Failed to remove all users of the load!"
) ? static_cast<void> (0) : __assert_fail ("LI.use_empty() && \"Failed to remove all users of the load!\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn266909/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp"
, 491, __PRETTY_FUNCTION__));

492

// Return the old load so the combiner can delete it safely.

493

return &LI;

494

}

495

}

496

497

// Fold away bit casts of the loaded value by loading the desired type.

498

// We can do this for BitCastInsts as well as casts from and to pointer types,

499

// as long as those are noops (i.e., the source or dest type have the same

500

// bitwidth as the target's pointers).

501

if (LI.hasOneUse())

502

if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {

503

if (CI->isNoopCast(DL)) {

504

LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());

505

CI->replaceAllUsesWith(NewLoad);

506

IC.eraseInstFromFunction(*CI);

507

return &LI;

508

}

509

}

510

511

// FIXME: We should also canonicalize loads of vectors when their elements are

512

// cast to other types.

513

return nullptr;

514

}

515

516

static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {

517

// FIXME: We could probably with some care handle both volatile and atomic

518

// stores here but it isn't clear that this is important.

519

if (!LI.isSimple())

520

return nullptr;

521

522

Type *T = LI.getType();

523

if (!T->isAggregateType())

524

return nullptr;

525

526

StringRef Name = LI.getName();

527

assert(LI.getAlignment() && "Alignment must be set at this point")((LI.getAlignment() && "Alignment must be set at this point"
) ? static_cast<void> (0) : __assert_fail ("LI.getAlignment() && \"Alignment must be set at this point\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn266909/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp"
, 527, __PRETTY_FUNCTION__));

528

529

if (auto *ST = dyn_cast<StructType>(T)) {

530

// If the struct only have one element, we unpack.

531

auto NumElements = ST->getNumElements();

532

if (NumElements == 1) {

533

LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),

534

".unpack");

535

return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(

536

UndefValue::get(T), NewLoad, 0, Name));

537

}

538

539

// We don't want to break loads with padding here as we'd loose

540

// the knowledge that padding exists for the rest of the pipeline.

541

const DataLayout &DL = IC.getDataLayout();

542

auto *SL = DL.getStructLayout(ST);

543

if (SL->hasPadding())

544

return nullptr;

545

546

auto Align = LI.getAlignment();

547

if (!Align)

548

Align = DL.getABITypeAlignment(ST);

549

550

auto *Addr = LI.getPointerOperand();

551

auto *IdxType = Type::getInt32Ty(T->getContext());

552

auto *Zero = ConstantInt::get(IdxType, 0);

553

554

Value *V = UndefValue::get(T);

555

for (unsigned i = 0; i < NumElements; i++) {

556

Value *Indices[2] = {

557

Zero,

558

ConstantInt::get(IdxType, i),

559

};

560

auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),

561

Name + ".elt");

562

auto EltAlign = MinAlign(Align, SL->getElementOffset(i));

563

auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");

564

V = IC.Builder->CreateInsertValue(V, L, i);

565

}

566

567

V->setName(Name);

568

return IC.replaceInstUsesWith(LI, V);

569

}

570

571

if (auto *AT = dyn_cast<ArrayType>(T)) {

572

auto *ET = AT->getElementType();

573

auto NumElements = AT->getNumElements();

574

if (NumElements == 1) {

575

LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");

576

return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(

577

UndefValue::get(T), NewLoad, 0, Name));

578

}

579

580

const DataLayout &DL = IC.getDataLayout();

581

auto EltSize = DL.getTypeAllocSize(ET);

582

auto Align = LI.getAlignment();

583

if (!Align)

584

Align = DL.getABITypeAlignment(T);

585

586

auto *Addr = LI.getPointerOperand();

587

auto *IdxType = Type::getInt64Ty(T->getContext());

588

auto *Zero = ConstantInt::get(IdxType, 0);

589

590

Value *V = UndefValue::get(T);

591

uint64_t Offset = 0;

592

for (uint64_t i = 0; i < NumElements; i++) {

593

Value *Indices[2] = {

594

Zero,

595

ConstantInt::get(IdxType, i),

596

};

597

auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),

598

Name + ".elt");

599

auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),

600

Name + ".unpack");

601

V = IC.Builder->CreateInsertValue(V, L, i);

602

Offset += EltSize;

603

}

604

605

V->setName(Name);

606

return IC.replaceInstUsesWith(LI, V);

607

}

608

609

return nullptr;

610

}

611

612

// If we can determine that all possible objects pointed to by the provided

613

// pointer value are, not only dereferenceable, but also definitively less than

614

// or equal to the provided maximum size, then return true. Otherwise, return

615

// false (constant global values and allocas fall into this category).

616

617

// FIXME: This should probably live in ValueTracking (or similar).

618

static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,

619

const DataLayout &DL) {

620

SmallPtrSet<Value *, 4> Visited;

621

SmallVector<Value *, 4> Worklist(1, V);

622

623

do {

624

Value *P = Worklist.pop_back_val();

625

P = P->stripPointerCasts();

626

627

if (!Visited.insert(P).second)

628

continue;

629

630

if (SelectInst *SI = dyn_cast<SelectInst>(P)) {

631

Worklist.push_back(SI->getTrueValue());

632

Worklist.push_back(SI->getFalseValue());

633

continue;

634

}

635

636

if (PHINode *PN = dyn_cast<PHINode>(P)) {

637

for (Value *IncValue : PN->incoming_values())

638

Worklist.push_back(IncValue);

639

continue;

640

}

641

642

if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {

643

if (GA->isInterposable())

644

return false;

645

Worklist.push_back(GA->getAliasee());

646

continue;

647

}

648

649

// If we know how big this object is, and it is less than MaxSize, continue

650

// searching. Otherwise, return false.

651

if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {

652

if (!AI->getAllocatedType()->isSized())

653

return false;

654

655

ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());

656

if (!CS)

657

return false;

658

659

uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());

660

// Make sure that, even if the multiplication below would wrap as an

661

// uint64_t, we still do the right thing.

662

if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))

663

return false;

664

continue;

665

}

666

667

if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {

668

if (!GV->hasDefinitiveInitializer() || !GV->isConstant())

669

return false;

670

671

uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());

672

if (InitSize > MaxSize)

673

return false;

674

continue;

675

}

676

677

return false;

678

} while (!Worklist.empty());

679

680

return true;

681

}

682

683

// If we're indexing into an object of a known size, and the outer index is

684

// not a constant, but having any value but zero would lead to undefined

685

// behavior, replace it with zero.

686

687

// For example, if we have:

688

// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4

689

// ...

690

// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x

691

// ... = load i32* %arrayidx, align 4

692

// Then we know that we can replace %x in the GEP with i64 0.

693

694

// FIXME: We could fold any GEP index to zero that would cause UB if it were

695

// not zero. Currently, we only handle the first such index. Also, we could

696

// also search through non-zero constant indices if we kept track of the

697

// offsets those indices implied.

698

static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,

699

Instruction *MemI, unsigned &Idx) {

700

if (GEPI->getNumOperands() < 2)

701

return false;

702

703

// Find the first non-zero index of a GEP. If all indices are zero, return

704

// one past the last index.

705

auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {

706

unsigned I = 1;

707

for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {

708

Value *V = GEPI->getOperand(I);

709

if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))

710

if (CI->isZero())

711

continue;

712

713

break;

714

}

715

716

return I;

717

};

718

719

// Skip through initial 'zero' indices, and find the corresponding pointer

720

// type. See if the next index is not a constant.

721

Idx = FirstNZIdx(GEPI);

722

if (Idx == GEPI->getNumOperands())

723

return false;

724

if (isa<Constant>(GEPI->getOperand(Idx)))

725

return false;

726

727

SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);

728

Type *AllocTy =

729

GetElementPtrInst::getIndexedType(GEPI->getSourceElementType(), Ops);

730

if (!AllocTy || !AllocTy->isSized())

731

return false;

732

const DataLayout &DL = IC.getDataLayout();

733

uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);

734

735

// If there are more indices after the one we might replace with a zero, make

736

// sure they're all non-negative. If any of them are negative, the overall

737

// address being computed might be before the base address determined by the

738

// first non-zero index.

739

auto IsAllNonNegative = [&]() {

740

for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {

741

bool KnownNonNegative, KnownNegative;

742

IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,

743

KnownNegative, 0, MemI);

744

if (KnownNonNegative)

745

continue;

746

return false;

747

}

748

749

return true;

750

};

751

752

// FIXME: If the GEP is not inbounds, and there are extra indices after the

753

// one we'll replace, those could cause the address computation to wrap

754

// (rendering the IsAllNonNegative() check below insufficient). We can do

755

// better, ignoring zero indices (and other indices we can prove small

756

// enough not to wrap).

757

if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())

758

return false;

759

760

// Note that isObjectSizeLessThanOrEq will return true only if the pointer is

761

// also known to be dereferenceable.

762

return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&

763

IsAllNonNegative();

764

}

765

766

// If we're indexing into an object with a variable index for the memory

767

// access, but the object has only one element, we can assume that the index

768

// will always be zero. If we replace the GEP, return it.

769

template <typename T>

770

static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,

771

T &MemI) {

772

if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {

773

unsigned Idx;

774

if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {

775

Instruction *NewGEPI = GEPI->clone();

776

NewGEPI->setOperand(Idx,

777

ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));

778

NewGEPI->insertBefore(GEPI);

779

MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);

780

return NewGEPI;

781

}

782

}

783

784

return nullptr;

785

}

786

787

Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {

788

Value *Op = LI.getOperand(0);

789

790

// Try to canonicalize the loaded type.

791

if (Instruction *Res = combineLoadToOperationType(*this, LI))

792

return Res;

793

794

// Attempt to improve the alignment.

795

unsigned KnownAlign = getOrEnforceKnownAlignment(

796

Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);

797

unsigned LoadAlign = LI.getAlignment();

798

unsigned EffectiveLoadAlign =

799

LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());

800

801

if (KnownAlign > EffectiveLoadAlign)

802

LI.setAlignment(KnownAlign);

803

else if (LoadAlign == 0)

804

LI.setAlignment(EffectiveLoadAlign);

805

806

// Replace GEP indices if possible.

807

if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {

808

Worklist.Add(NewGEPI);

809

return &LI;

810

}

811

812

// None of the following transforms are legal for volatile/atomic loads.

813

// FIXME: Some of it is okay for atomic loads; needs refactoring.

814

if (!LI.isSimple()) return nullptr;

815

816

if (Instruction *Res = unpackLoadToAggregate(*this, LI))

817

return Res;

818

819

// Do really simple store-to-load forwarding and load CSE, to catch cases

820

// where there are several consecutive memory accesses to the same location,

821

// separated by a few arithmetic operations.

822

BasicBlock::iterator BBI(LI);

823

AAMDNodes AATags;

824

if (Value *AvailableVal =

825

FindAvailableLoadedValue(&LI, LI.getParent(), BBI,

826

DefMaxInstsToScan, AA, &AATags)) {

827

if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {

828

unsigned KnownIDs[] = {

829

LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

830

LLVMContext::MD_noalias, LLVMContext::MD_range,

831

LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,

832

LLVMContext::MD_invariant_group, LLVMContext::MD_align,

833

LLVMContext::MD_dereferenceable,

834

LLVMContext::MD_dereferenceable_or_null};

835

combineMetadata(NLI, &LI, KnownIDs);

836

};

837

838

return replaceInstUsesWith(

839

LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),

840

LI.getName() + ".cast"));

841

}

842

843

// load(gep null, ...) -> unreachable

844

if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {

845

const Value *GEPI0 = GEPI->getOperand(0);

846

// TODO: Consider a target hook for valid address spaces for this xform.

847

if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){

848

// Insert a new store to null instruction before the load to indicate

849

// that this code is not reachable. We do this instead of inserting

850

// an unreachable instruction directly because we cannot modify the

851

// CFG.

852

new StoreInst(UndefValue::get(LI.getType()),

853

Constant::getNullValue(Op->getType()), &LI);

854

return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));

855

}

856

}

857

858

// load null/undef -> unreachable

859

// TODO: Consider a target hook for valid address spaces for this xform.

860

if (isa<UndefValue>(Op) ||

861

(isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {

862

// Insert a new store to null instruction before the load to indicate that

863

// this code is not reachable. We do this instead of inserting an

864

// unreachable instruction directly because we cannot modify the CFG.

865

new StoreInst(UndefValue::get(LI.getType()),

866

Constant::getNullValue(Op->getType()), &LI);

867

return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));

868

}

869

870

if (Op->hasOneUse()) {

871

// Change select and PHI nodes to select values instead of addresses: this

872

// helps alias analysis out a lot, allows many others simplifications, and

873

// exposes redundancy in the code.

874

875

// Note that we cannot do the transformation unless we know that the

876

// introduced loads cannot trap! Something like this is valid as long as

877

// the condition is always false: load (select bool %C, int* null, int* %G),

878

// but it would not be valid if we transformed it to load from null

879

// unconditionally.

880

881

if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {

882

// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).

883

unsigned Align = LI.getAlignment();

884

if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, SI) &&

885

isSafeToLoadUnconditionally(SI->getOperand(2), Align, SI)) {

886

LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),

887

SI->getOperand(1)->getName()+".val");

888

LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),

889

SI->getOperand(2)->getName()+".val");

890

V1->setAlignment(Align);

891

V2->setAlignment(Align);

892

return SelectInst::Create(SI->getCondition(), V1, V2);

893

}

894

895

// load (select (cond, null, P)) -> load P

896

if (isa<ConstantPointerNull>(SI->getOperand(1)) &&

897

LI.getPointerAddressSpace() == 0) {

898

LI.setOperand(0, SI->getOperand(2));

899

return &LI;

900

}

901

902

// load (select (cond, P, null)) -> load P

903

if (isa<ConstantPointerNull>(SI->getOperand(2)) &&

904

LI.getPointerAddressSpace() == 0) {

905

LI.setOperand(0, SI->getOperand(1));

906

return &LI;

907

}

908

}

909

}

910

return nullptr;

911

}

912

913

/// \brief Combine stores to match the type of value being stored.

914

///

915

/// The core idea here is that the memory does not have any intrinsic type and

916

/// where we can we should match the type of a store to the type of value being

917

/// stored.

918

///

919

/// However, this routine must never change the width of a store or the number of

920

/// stores as that would introduce a semantic change. This combine is expected to

921

/// be a semantic no-op which just allows stores to more closely model the types

922

/// of their incoming values.

923

///

924

/// Currently, we also refuse to change the precise type used for an atomic or

925

/// volatile store. This is debatable, and might be reasonable to change later.

926

/// However, it is risky in case some backend or other part of LLVM is relying

927

/// on the exact type stored to select appropriate atomic operations.

928

///

929

/// \returns true if the store was successfully combined away. This indicates

930

/// the caller must erase the store instruction. We have to let the caller erase

931

/// the store instruction as otherwise there is no way to signal whether it was

932

/// combined or not: IC.EraseInstFromFunction returns a null pointer.

933

static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {

934

// FIXME: We could probably with some care handle both volatile and atomic

935

// stores here but it isn't clear that this is important.

936

if (!SI.isSimple())

937

return false;

938

939

Value *V = SI.getValueOperand();

940

941

// Fold away bit casts of the stored value by storing the original type.

942

if (auto *BC = dyn_cast<BitCastInst>(V)) {

943

V = BC->getOperand(0);

944

combineStoreToNewValue(IC, SI, V);

945

return true;

946

}

947

948

// FIXME: We should also canonicalize loads of vectors when their elements are

949

// cast to other types.

950

return false;

951

}

952

953

static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {

954

// FIXME: We could probably with some care handle both volatile and atomic

955

// stores here but it isn't clear that this is important.

956

if (!SI.isSimple())

957

return false;

958

959

Value *V = SI.getValueOperand();

960

Type *T = V->getType();

961

962

if (!T->isAggregateType())

963

return false;

964

965

if (auto *ST = dyn_cast<StructType>(T)) {

966

// If the struct only have one element, we unpack.

967

unsigned Count = ST->getNumElements();

968

if (Count == 1) {

969

V = IC.Builder->CreateExtractValue(V, 0);

970

combineStoreToNewValue(IC, SI, V);

971

return true;

972

}

973

974

// We don't want to break loads with padding here as we'd loose

975

// the knowledge that padding exists for the rest of the pipeline.

976

const DataLayout &DL = IC.getDataLayout();

977

auto *SL = DL.getStructLayout(ST);

978

if (SL->hasPadding())

979

return false;

980

981

auto Align = SI.getAlignment();

982

if (!Align)

983

Align = DL.getABITypeAlignment(ST);

984

985

SmallString<16> EltName = V->getName();

986

EltName += ".elt";

987

auto *Addr = SI.getPointerOperand();

988

SmallString<16> AddrName = Addr->getName();

989

AddrName += ".repack";

990

991

auto *IdxType = Type::getInt32Ty(ST->getContext());

992

auto *Zero = ConstantInt::get(IdxType, 0);

993

for (unsigned i = 0; i < Count; i++) {

994

Value *Indices[2] = {

995

Zero,

996

ConstantInt::get(IdxType, i),

997

};

998

auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),

999

AddrName);

1000

auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);

1001

auto EltAlign = MinAlign(Align, SL->getElementOffset(i));

1002

IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);

1003

}

1004

1005

return true;

1006

}

1007

1008

if (auto *AT = dyn_cast<ArrayType>(T)) {

1009

// If the array only have one element, we unpack.

1010

auto NumElements = AT->getNumElements();

1011

if (NumElements == 1) {

1012

V = IC.Builder->CreateExtractValue(V, 0);

1013

combineStoreToNewValue(IC, SI, V);

1014

return true;

1015

}

1016

1017

const DataLayout &DL = IC.getDataLayout();

1018

auto EltSize = DL.getTypeAllocSize(AT->getElementType());

1019

auto Align = SI.getAlignment();

1020

if (!Align)

1021

Align = DL.getABITypeAlignment(T);

1022

1023

SmallString<16> EltName = V->getName();

1024

EltName += ".elt";

1025

auto *Addr = SI.getPointerOperand();

1026

SmallString<16> AddrName = Addr->getName();

1027

AddrName += ".repack";

1028

1029

auto *IdxType = Type::getInt64Ty(T->getContext());

1030

auto *Zero = ConstantInt::get(IdxType, 0);

1031

1032

uint64_t Offset = 0;

1033

for (uint64_t i = 0; i < NumElements; i++) {

1034

Value *Indices[2] = {

1035

Zero,

1036

ConstantInt::get(IdxType, i),

1037

};

1038

auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),

1039

AddrName);

1040

auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);

1041

auto EltAlign = MinAlign(Align, Offset);

1042

IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);

1043

Offset += EltSize;

1044

}

1045

1046

return true;

1047

}

1048

1049

return false;

1050

}

1051

1052

/// equivalentAddressValues - Test if A and B will obviously have the same

1053

/// value. This includes recognizing that %t0 and %t1 will have the same

1054

/// value in code like this:

1055

/// %t0 = getelementptr \@a, 0, 3

1056

/// store i32 0, i32* %t0

1057

/// %t1 = getelementptr \@a, 0, 3

1058

/// %t2 = load i32* %t1

1059

///

1060

static bool equivalentAddressValues(Value *A, Value *B) {

1061

// Test if the values are trivially equivalent.

1062

if (A == B) return true;

1063

1064

// Test if the values come form identical arithmetic instructions.

1065

// This uses isIdenticalToWhenDefined instead of isIdenticalTo because

1066

// its only used to compare two uses within the same basic block, which

1067

// means that they'll always either have the same value or one of them

1068

// will have an undefined value.

1069

if (isa<BinaryOperator>(A) ||

1070

isa<CastInst>(A) ||

1071

isa<PHINode>(A) ||

1072

isa<GetElementPtrInst>(A))

1073

if (Instruction *BI = dyn_cast<Instruction>(B))

1074

if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))

1075

return true;

1076

1077

// Otherwise they may not be equivalent.

1078

return false;

1079

}

1080

1081

Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {

1082

Value *Val = SI.getOperand(0);

1083

Value *Ptr = SI.getOperand(1);

1084

1085

// Try to canonicalize the stored type.

1086

if (combineStoreToValueType(*this, SI))

1087

return eraseInstFromFunction(SI);

1088

1089

// Attempt to improve the alignment.

1090

unsigned KnownAlign = getOrEnforceKnownAlignment(

1091

Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);

1092

unsigned StoreAlign = SI.getAlignment();

1093

unsigned EffectiveStoreAlign =

1094

StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());

1095

1096

if (KnownAlign > EffectiveStoreAlign)

1097

SI.setAlignment(KnownAlign);

1098

else if (StoreAlign == 0)

1099

SI.setAlignment(EffectiveStoreAlign);

1100

1101

// Try to canonicalize the stored type.

1102

if (unpackStoreToAggregate(*this, SI))

1103

return eraseInstFromFunction(SI);

1104

1105

// Replace GEP indices if possible.

1106

if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {

1107

Worklist.Add(NewGEPI);

1108

return &SI;

1109

}

1110

1111

// Don't hack volatile/ordered stores.

1112

// FIXME: Some bits are legal for ordered atomic stores; needs refactoring.

1113

if (!SI.isUnordered()) return nullptr;

1114

1115

// If the RHS is an alloca with a single use, zapify the store, making the

1116

// alloca dead.

1117

if (Ptr->hasOneUse()) {

1118

if (isa<AllocaInst>(Ptr))

1119

return eraseInstFromFunction(SI);

1120

if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {

1121

if (isa<AllocaInst>(GEP->getOperand(0))) {

1122

if (GEP->getOperand(0)->hasOneUse())

1123

return eraseInstFromFunction(SI);

1124

}

1125

}

1126

}

1127

1128

// Do really simple DSE, to catch cases where there are several consecutive

1129

// stores to the same location, separated by a few arithmetic operations. This

1130

// situation often occurs with bitfield accesses.

1131

BasicBlock::iterator BBI(SI);

1132

for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;

1133

--ScanInsts) {

1134

--BBI;

1135

// Don't count debug info directives, lest they affect codegen,

1136

// and we skip pointer-to-pointer bitcasts, which are NOPs.

1137

if (isa<DbgInfoIntrinsic>(BBI) ||

1138

(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {

1139

ScanInsts++;

1140

continue;

1141

}

1142

1143

if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {

1144

// Prev store isn't volatile, and stores to the same location?

1145

if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),

1146

SI.getOperand(1))) {

1147

++NumDeadStore;

1148

++BBI;

1149

eraseInstFromFunction(*PrevSI);

1150

continue;

1151

}

1152

break;

1153

}

1154

1155

// If this is a load, we have to stop. However, if the loaded value is from

1156

// the pointer we're loading and is producing the pointer we're storing,

1157

// then *this* store is dead (X = load P; store X -> P).

1158

if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {

1159

if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {

1160

assert(SI.isUnordered() && "can't eliminate ordering operation")((SI.isUnordered() && "can't eliminate ordering operation"
) ? static_cast<void> (0) : __assert_fail ("SI.isUnordered() && \"can't eliminate ordering operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn266909/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp"
, 1160, __PRETTY_FUNCTION__));

1161

return eraseInstFromFunction(SI);

1162

}

1163

1164

// Otherwise, this is a load from some other location. Stores before it

1165

// may not be dead.

1166

break;

1167

}

1168

1169

// Don't skip over loads or things that can modify memory.

1170

if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())

1171

break;

1172

}

1173

1174

// store X, null -> turns into 'unreachable' in SimplifyCFG

1175

if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {

1176

if (!isa<UndefValue>(Val)) {

1177

SI.setOperand(0, UndefValue::get(Val->getType()));

1178

if (Instruction *U = dyn_cast<Instruction>(Val))

1179

Worklist.Add(U); // Dropped a use.

1180

}

1181

return nullptr; // Do not modify these!

1182

}

1183

1184

// store undef, Ptr -> noop

1185

if (isa<UndefValue>(Val))

1186

return eraseInstFromFunction(SI);

1187

1188

// The code below needs to be audited and adjusted for unordered atomics

1189

if (!SI.isSimple())

1190

return nullptr;

1191

1192

// If this store is the last instruction in the basic block (possibly

1193

// excepting debug info instructions), and if the block ends with an

1194

// unconditional branch, try to move it to the successor block.

1195

BBI = SI.getIterator();

1196

do {

1197

++BBI;

1198

} while (isa<DbgInfoIntrinsic>(BBI) ||

1199

(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));

1200

if (BranchInst *BI = dyn_cast<BranchInst>(BBI))

1201

if (BI->isUnconditional())

1202

if (SimplifyStoreAtEndOfBlock(SI))

1203

return nullptr; // xform done!

1204

1205

return nullptr;

1206

}

1207

1208

/// SimplifyStoreAtEndOfBlock - Turn things like:

1209

/// if () { *P = v1; } else { *P = v2 }

1210

/// into a phi node with a store in the successor.

1211

///

1212

/// Simplify things like:

1213

/// *P = v1; if () { *P = v2; }

1214

/// into a phi node with a store in the successor.

1215

///

1216

bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {

1217

BasicBlock *StoreBB = SI.getParent();

1218

1219

// Check to see if the successor block has exactly two incoming edges. If

1220

// so, see if the other predecessor contains a store to the same location.

1221

// if so, insert a PHI node (if needed) and move the stores down.

1222

BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);

1223

1224

// Determine whether Dest has exactly two predecessors and, if so, compute

1225

// the other predecessor.

1226

pred_iterator PI = pred_begin(DestBB);

1227

BasicBlock *P = *PI;

1228

BasicBlock *OtherBB = nullptr;

'OtherBB' initialized to a null pointer value

→

1229

1230

if (P != StoreBB)

←

Assuming 'P' is equal to 'StoreBB'

→

←

Taking false branch

→

1231

OtherBB = P;

1232

1233

if (++PI == pred_end(DestBB))

←

Taking false branch

→

1234

return false;

1235

1236

P = *PI;

1237

if (P != StoreBB) {

←

Assuming 'P' is equal to 'StoreBB'

→

←

Taking false branch

→

1238

if (OtherBB)

1239

return false;

1240

OtherBB = P;

1241

}

1242

if (++PI != pred_end(DestBB))

←

Taking false branch

→

1243

return false;

1244

1245

// Bail out if all the relevant blocks aren't distinct (this can happen,

1246

// for example, if SI is in an infinite loop)

1247

if (StoreBB == DestBB || OtherBB == DestBB)

←

Assuming 'StoreBB' is not equal to 'DestBB'

→

←

Assuming 'OtherBB' is not equal to 'DestBB'

→

←

Taking false branch

→

1248

return false;

1249

1250

// Verify that the other block ends in a branch and is not otherwise empty.

1251

BasicBlock::iterator BBI(OtherBB->getTerminator());

←

Called C++ object pointer is null

1252

BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);

1253

if (!OtherBr || BBI == OtherBB->begin())

1254

return false;

1255

1256

// If the other block ends in an unconditional branch, check for the 'if then

1257

// else' case. there is an instruction before the branch.

1258

StoreInst *OtherStore = nullptr;

1259

if (OtherBr->isUnconditional()) {

1260

--BBI;

1261

// Skip over debugging info.

1262

while (isa<DbgInfoIntrinsic>(BBI) ||

1263

(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {

1264

if (BBI==OtherBB->begin())

1265

return false;

1266

--BBI;

1267

}

1268

// If this isn't a store, isn't a store to the same location, or is not the

1269

// right kind of store, bail out.

1270

OtherStore = dyn_cast<StoreInst>(BBI);

1271

if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||

1272

!SI.isSameOperationAs(OtherStore))

1273

return false;

1274

} else {

1275

// Otherwise, the other block ended with a conditional branch. If one of the

1276

// destinations is StoreBB, then we have the if/then case.

1277

if (OtherBr->getSuccessor(0) != StoreBB &&

1278

OtherBr->getSuccessor(1) != StoreBB)

1279

return false;

1280

1281

// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an

1282

// if/then triangle. See if there is a store to the same ptr as SI that

1283

// lives in OtherBB.

1284

for (;; --BBI) {

1285

// Check to see if we find the matching store.

1286

if ((OtherStore = dyn_cast<StoreInst>(BBI))) {

1287

if (OtherStore->getOperand(1) != SI.getOperand(1) ||

1288

!SI.isSameOperationAs(OtherStore))

1289

return false;

1290

break;

1291

}

1292

// If we find something that may be using or overwriting the stored

1293

// value, or if we run out of instructions, we can't do the xform.

1294

if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||

1295

BBI == OtherBB->begin())

1296

return false;

1297

}

1298

1299

// In order to eliminate the store in OtherBr, we have to

1300

// make sure nothing reads or overwrites the stored value in

1301

// StoreBB.

1302

for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {

1303

// FIXME: This should really be AA driven.

1304

if (I->mayReadFromMemory() || I->mayWriteToMemory())

1305

return false;

1306

}

1307

}

1308

1309

// Insert a PHI node now if we need it.

1310

Value *MergedVal = OtherStore->getOperand(0);

1311

if (MergedVal != SI.getOperand(0)) {

1312

PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");

1313

PN->addIncoming(SI.getOperand(0), SI.getParent());

1314

PN->addIncoming(OtherStore->getOperand(0), OtherBB);

1315

MergedVal = InsertNewInstBefore(PN, DestBB->front());

1316

}

1317

1318

// Advance to a place where it is safe to insert the new store and

1319

// insert it.

1320

BBI = DestBB->getFirstInsertionPt();

1321

StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),

1322

SI.isVolatile(),

1323

SI.getAlignment(),

1324

SI.getOrdering(),

1325

SI.getSynchScope());

1326

InsertNewInstBefore(NewSI, *BBI);

1327

NewSI->setDebugLoc(OtherStore->getDebugLoc());

1328

1329

// If the two stores had AA tags, merge them.

1330

AAMDNodes AATags;

1331

SI.getAAMetadata(AATags);

1332

if (AATags) {

1333

OtherStore->getAAMetadata(AATags, /* Merge = */ true);

1334

NewSI->setAAMetadata(AATags);

1335

}

1336

1337

// Nuke the old stores.

1338

eraseInstFromFunction(SI);

1339

eraseInstFromFunction(*OtherStore);

1340

return true;

1341

}