/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp

Bug Summary

File:	lib/CodeGen/CodeGenPrepare.cpp
Warning:	line 2236, column 18 Called C++ object pointer is null

Annotated Source Code

//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//

// The LLVM Compiler Infrastructure

// This file is distributed under the University of Illinois Open Source

// License. See LICENSE.TXT for details.

//===----------------------------------------------------------------------===//

// This pass munges the code in the input function to better prepare it for

// SelectionDAG-based code generation. This works around limitations in it's

// basic-block-at-a-time approach. It should eventually be removed.

//===----------------------------------------------------------------------===//

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/BlockFrequencyInfo.h"

#include "llvm/Analysis/BranchProbabilityInfo.h"

#include "llvm/Analysis/CFG.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/MemoryBuiltins.h"

#include "llvm/Analysis/ProfileSummaryInfo.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/Analysis.h"

#include "llvm/CodeGen/Passes.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/CallSite.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InlineAsm.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Statepoint.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/IR/ValueMap.h"

#include "llvm/Pass.h"

#include "llvm/Support/BranchProbability.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetLowering.h"

#include "llvm/Target/TargetSubtargetInfo.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/BypassSlowDivision.h"

#include "llvm/Transforms/Utils/Cloning.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/SimplifyLibCalls.h"

#include "llvm/Transforms/Utils/ValueMapper.h"

using namespace llvm;

using namespace llvm::PatternMatch;

#define DEBUG_TYPE"codegenprepare" "codegenprepare"

STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated", {0}, false};

STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated", {0}, false};

STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts", {0}, false};

STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, false}

"sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, false};

STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, false}

"of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, false};

STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, false}

"computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, false};

STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads", {0},
false};

STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized", {0}, false
};

STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, false}

"Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, false};

STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized", {0}, false
};

STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated", {0}, false};

STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved", {0}, false};

STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
, {0}, false};

STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
, {0}, false};

STATISTIC(NumMemCmpCalls, "Number of memcmp calls")static llvm::Statistic NumMemCmpCalls = {"codegenprepare", "NumMemCmpCalls"
, "Number of memcmp calls", {0}, false};

STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size")static llvm::Statistic NumMemCmpNotConstant = {"codegenprepare"
, "NumMemCmpNotConstant", "Number of memcmp calls without constant size"
, {0}, false};

STATISTIC(NumMemCmpGreaterThanMax,static llvm::Statistic NumMemCmpGreaterThanMax = {"codegenprepare"
, "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size"
, {0}, false}

"Number of memcmp calls with size greater than max size")static llvm::Statistic NumMemCmpGreaterThanMax = {"codegenprepare"
, "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size"
, {0}, false};

STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls")static llvm::Statistic NumMemCmpInlined = {"codegenprepare", "NumMemCmpInlined"
, "Number of inlined memcmp calls", {0}, false};

static cl::opt<bool> DisableBranchOpts(

"disable-cgp-branch-opts", cl::Hidden, cl::init(false),

cl::desc("Disable branch optimizations in CodeGenPrepare"));

static cl::opt<bool>

DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),

100

cl::desc("Disable GC optimizations in CodeGenPrepare"));

101

102

static cl::opt<bool> DisableSelectToBranch(

103

"disable-cgp-select2branch", cl::Hidden, cl::init(false),

104

cl::desc("Disable select to branch conversion."));

105

106

static cl::opt<bool> AddrSinkUsingGEPs(

107

"addr-sink-using-gep", cl::Hidden, cl::init(true),

108

cl::desc("Address sinking in CGP using GEPs."));

109

110

static cl::opt<bool> EnableAndCmpSinking(

111

"enable-andcmp-sinking", cl::Hidden, cl::init(true),

112

cl::desc("Enable sinkinig and/cmp into branches."));

113

114

static cl::opt<bool> DisableStoreExtract(

115

"disable-cgp-store-extract", cl::Hidden, cl::init(false),

116

cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));

117

118

static cl::opt<bool> StressStoreExtract(

119

"stress-cgp-store-extract", cl::Hidden, cl::init(false),

120

cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));

121

122

static cl::opt<bool> DisableExtLdPromotion(

123

"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),

124

cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "

125

"CodeGenPrepare"));

126

127

static cl::opt<bool> StressExtLdPromotion(

128

"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),

129

cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "

130

"optimization in CodeGenPrepare"));

131

132

static cl::opt<bool> DisablePreheaderProtect(

133

"disable-preheader-prot", cl::Hidden, cl::init(false),

134

cl::desc("Disable protection against removing loop preheaders"));

135

136

static cl::opt<bool> ProfileGuidedSectionPrefix(

137

"profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,

138

cl::desc("Use profile info to add section prefix for hot/cold functions"));

139

140

static cl::opt<unsigned> FreqRatioToSkipMerge(

141

"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),

142

cl::desc("Skip merging empty blocks if (frequency of empty block) / "

143

"(frequency of destination block) is greater than this ratio"));

144

145

static cl::opt<bool> ForceSplitStore(

146

"force-split-store", cl::Hidden, cl::init(false),

147

cl::desc("Force store splitting no matter what the target query says."));

148

149

static cl::opt<bool>

150

EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,

151

cl::desc("Enable merging of redundant sexts when one is dominating"

152

" the other."), cl::init(true));

153

154

static cl::opt<unsigned> MemCmpNumLoadsPerBlock(

155

"memcmp-num-loads-per-block", cl::Hidden, cl::init(1),

156

cl::desc("The number of loads per basic block for inline expansion of "

157

"memcmp that is only being compared against zero."));

158

159

namespace {

160

typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;

161

typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;

162

typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;

163

typedef SmallVector<Instruction *, 16> SExts;

164

typedef DenseMap<Value *, SExts> ValueToSExts;

165

class TypePromotionTransaction;

166

167

class CodeGenPrepare : public FunctionPass {

168

const TargetMachine *TM;

169

const TargetSubtargetInfo *SubtargetInfo;

170

const TargetLowering *TLI;

171

const TargetRegisterInfo *TRI;

172

const TargetTransformInfo *TTI;

173

const TargetLibraryInfo *TLInfo;

174

const LoopInfo *LI;

175

std::unique_ptr<BlockFrequencyInfo> BFI;

176

std::unique_ptr<BranchProbabilityInfo> BPI;

177

178

/// As we scan instructions optimizing them, this is the next instruction

179

/// to optimize. Transforms that can invalidate this should update it.

180

BasicBlock::iterator CurInstIterator;

181

182

/// Keeps track of non-local addresses that have been sunk into a block.

183

/// This allows us to avoid inserting duplicate code for blocks with

184

/// multiple load/stores of the same address.

185

ValueMap<Value*, Value*> SunkAddrs;

186

187

/// Keeps track of all instructions inserted for the current function.

188

SetOfInstrs InsertedInsts;

189

/// Keeps track of the type of the related instruction before their

190

/// promotion for the current function.

191

InstrToOrigTy PromotedInsts;

192

193

/// Keep track of instructions removed during promotion.

194

SetOfInstrs RemovedInsts;

195

196

/// Keep track of sext chains based on their initial value.

197

DenseMap<Value *, Instruction *> SeenChainsForSExt;

198

199

/// Keep track of SExt promoted.

200

ValueToSExts ValToSExtendedUses;

201

202

/// True if CFG is modified in any way.

203

bool ModifiedDT;

204

205

/// True if optimizing for size.

206

bool OptSize;

207

208

/// DataLayout for the Function being processed.

209

const DataLayout *DL;

210

211

public:

212

static char ID; // Pass identification, replacement for typeid

213

CodeGenPrepare()

214

: FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),

215

DL(nullptr) {

216

initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());

217

}

218

bool runOnFunction(Function &F) override;

219

220

StringRef getPassName() const override { return "CodeGen Prepare"; }

221

222

void getAnalysisUsage(AnalysisUsage &AU) const override {

223

// FIXME: When we can selectively preserve passes, preserve the domtree.

224

AU.addRequired<ProfileSummaryInfoWrapperPass>();

225

AU.addRequired<TargetLibraryInfoWrapperPass>();

226

AU.addRequired<TargetTransformInfoWrapperPass>();

227

AU.addRequired<LoopInfoWrapperPass>();

228

}

229

230

private:

231

bool eliminateFallThrough(Function &F);

232

bool eliminateMostlyEmptyBlocks(Function &F);

233

BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);

234

bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;

235

void eliminateMostlyEmptyBlock(BasicBlock *BB);

236

bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,

237

bool isPreheader);

238

bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);

239

bool optimizeInst(Instruction *I, bool &ModifiedDT);

240

bool optimizeMemoryInst(Instruction *I, Value *Addr,

241

Type *AccessTy, unsigned AS);

242

bool optimizeInlineAsmInst(CallInst *CS);

243

bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);

244

bool optimizeExt(Instruction *&I);

245

bool optimizeExtUses(Instruction *I);

246

bool optimizeLoadExt(LoadInst *I);

247

bool optimizeSelectInst(SelectInst *SI);

248

bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);

249

bool optimizeSwitchInst(SwitchInst *CI);

250

bool optimizeExtractElementInst(Instruction *Inst);

251

bool dupRetToEnableTailCallOpts(BasicBlock *BB);

252

bool placeDbgValues(Function &F);

253

bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,

254

LoadInst *&LI, Instruction *&Inst, bool HasPromoted);

255

bool tryToPromoteExts(TypePromotionTransaction &TPT,

256

const SmallVectorImpl<Instruction *> &Exts,

257

SmallVectorImpl<Instruction *> &ProfitablyMovedExts,

258

unsigned CreatedInstsCost = 0);

259

bool mergeSExts(Function &F);

260

bool performAddressTypePromotion(

261

Instruction *&Inst,

262

bool AllowPromotionWithoutCommonHeader,

263

bool HasPromoted, TypePromotionTransaction &TPT,

264

SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);

265

bool splitBranchCondition(Function &F);

266

bool simplifyOffsetableRelocate(Instruction &I);

267

bool splitIndirectCriticalEdges(Function &F);

268

};

269

}

270

271

char CodeGenPrepare::ID = 0;

272

INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {

273

"Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {

274

INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);

275

INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }

276

"Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }

277

278

FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }

279

280

bool CodeGenPrepare::runOnFunction(Function &F) {

281

if (skipFunction(F))

282

return false;

283

284

DL = &F.getParent()->getDataLayout();

285

286

bool EverMadeChange = false;

287

// Clear per function information.

288

InsertedInsts.clear();

289

PromotedInsts.clear();

290

BFI.reset();

291

BPI.reset();

292

293

ModifiedDT = false;

294

if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {

295

TM = &TPC->getTM<TargetMachine>();

296

SubtargetInfo = TM->getSubtargetImpl(F);

297

TLI = SubtargetInfo->getTargetLowering();

298

TRI = SubtargetInfo->getRegisterInfo();

299

}

300

TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();

301

TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

302

LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

303

OptSize = F.optForSize();

304

305

if (ProfileGuidedSectionPrefix) {

306

ProfileSummaryInfo *PSI =

307

getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();

308

if (PSI->isFunctionHotInCallGraph(&F))

309

F.setSectionPrefix(".hot");

310

else if (PSI->isFunctionColdInCallGraph(&F))

311

F.setSectionPrefix(".unlikely");

312

}

313

314

/// This optimization identifies DIV instructions that can be

315

/// profitably bypassed and carried out with a shorter, faster divide.

316

if (!OptSize && TLI && TLI->isSlowDivBypassed()) {

317

const DenseMap<unsigned int, unsigned int> &BypassWidths =

318

TLI->getBypassSlowDivWidths();

319

BasicBlock* BB = &*F.begin();

320

while (BB != nullptr) {

321

// bypassSlowDivision may create new BBs, but we don't want to reapply the

322

// optimization to those blocks.

323

BasicBlock* Next = BB->getNextNode();

324

EverMadeChange |= bypassSlowDivision(BB, BypassWidths);

325

BB = Next;

326

}

327

}

328

329

// Eliminate blocks that contain only PHI nodes and an

330

// unconditional branch.

331

EverMadeChange |= eliminateMostlyEmptyBlocks(F);

332

333

// llvm.dbg.value is far away from the value then iSel may not be able

334

// handle it properly. iSel will drop llvm.dbg.value if it can not

335

// find a node corresponding to the value.

336

EverMadeChange |= placeDbgValues(F);

337

338

if (!DisableBranchOpts)

339

EverMadeChange |= splitBranchCondition(F);

340

341

// Split some critical edges where one of the sources is an indirect branch,

342

// to help generate sane code for PHIs involving such edges.

343

EverMadeChange |= splitIndirectCriticalEdges(F);

344

345

bool MadeChange = true;

346

while (MadeChange) {

347

MadeChange = false;

348

SeenChainsForSExt.clear();

349

ValToSExtendedUses.clear();

350

RemovedInsts.clear();

351

for (Function::iterator I = F.begin(); I != F.end(); ) {

352

BasicBlock *BB = &*I++;

353

bool ModifiedDTOnIteration = false;

354

MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);

355

356

// Restart BB iteration if the dominator tree of the Function was changed

357

if (ModifiedDTOnIteration)

358

break;

359

}

360

if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())

361

MadeChange |= mergeSExts(F);

362

363

// Really free removed instructions during promotion.

364

for (Instruction *I : RemovedInsts)

365

I->deleteValue();

366

367

EverMadeChange |= MadeChange;

368

}

369

370

SunkAddrs.clear();

371

372

if (!DisableBranchOpts) {

373

MadeChange = false;

374

SmallPtrSet<BasicBlock*, 8> WorkList;

375

for (BasicBlock &BB : F) {

376

SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));

377

MadeChange |= ConstantFoldTerminator(&BB, true);

378

if (!MadeChange) continue;

379

380

for (SmallVectorImpl<BasicBlock*>::iterator

381

II = Successors.begin(), IE = Successors.end(); II != IE; ++II)

382

if (pred_begin(*II) == pred_end(*II))

383

WorkList.insert(*II);

384

}

385

386

// Delete the dead blocks and any of their dead successors.

387

MadeChange |= !WorkList.empty();

388

while (!WorkList.empty()) {

389

BasicBlock *BB = *WorkList.begin();

390

WorkList.erase(BB);

391

SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));

392

393

DeleteDeadBlock(BB);

394

395

for (SmallVectorImpl<BasicBlock*>::iterator

396

II = Successors.begin(), IE = Successors.end(); II != IE; ++II)

397

if (pred_begin(*II) == pred_end(*II))

398

WorkList.insert(*II);

399

}

400

401

// Merge pairs of basic blocks with unconditional branches, connected by

402

// a single edge.

403

if (EverMadeChange || MadeChange)

404

MadeChange |= eliminateFallThrough(F);

405

406

EverMadeChange |= MadeChange;

407

}

408

409

if (!DisableGCOpts) {

410

SmallVector<Instruction *, 2> Statepoints;

411

for (BasicBlock &BB : F)

412

for (Instruction &I : BB)

413

if (isStatepoint(I))

414

Statepoints.push_back(&I);

415

for (auto &I : Statepoints)

416

EverMadeChange |= simplifyOffsetableRelocate(*I);

417

}

418

419

return EverMadeChange;

420

}

421

422

/// Merge basic blocks which are connected by a single edge, where one of the

423

/// basic blocks has a single successor pointing to the other basic block,

424

/// which has a single predecessor.

425

bool CodeGenPrepare::eliminateFallThrough(Function &F) {

426

bool Changed = false;

427

// Scan all of the blocks in the function, except for the entry block.

428

for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {

429

BasicBlock *BB = &*I++;

430

// If the destination block has a single pred, then this is a trivial

431

// edge, just collapse it.

432

BasicBlock *SinglePred = BB->getSinglePredecessor();

433

434

// Don't merge if BB's address is taken.

435

if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;

436

437

BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());

438

if (Term && !Term->isConditional()) {

439

Changed = true;

440

DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n"<< *
SinglePred << "\n\n\n"; } } while (false);

441

// Remember if SinglePred was the entry block of the function.

442

// If so, we will need to move BB back to the entry position.

443

bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();

444

MergeBasicBlockIntoOnlyPred(BB, nullptr);

445

446

if (isEntry && BB != &BB->getParent()->getEntryBlock())

447

BB->moveBefore(&BB->getParent()->getEntryBlock());

448

449

// We have erased a block. Update the iterator.

450

I = BB->getIterator();

451

}

452

}

453

return Changed;

454

}

455

456

/// Find a destination block from BB if BB is mergeable empty block.

457

BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {

458

// If this block doesn't end with an uncond branch, ignore it.

459

BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());

460

if (!BI || !BI->isUnconditional())

461

return nullptr;

462

463

// If the instruction before the branch (skipping debug info) isn't a phi

464

// node, then other stuff is happening here.

465

BasicBlock::iterator BBI = BI->getIterator();

466

if (BBI != BB->begin()) {

467

--BBI;

468

while (isa<DbgInfoIntrinsic>(BBI)) {

469

if (BBI == BB->begin())

470

break;

471

--BBI;

472

}

473

if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))

474

return nullptr;

475

}

476

477

// Do not break infinite loops.

478

BasicBlock *DestBB = BI->getSuccessor(0);

479

if (DestBB == BB)

480

return nullptr;

481

482

if (!canMergeBlocks(BB, DestBB))

483

DestBB = nullptr;

484

485

return DestBB;

486

}

487

488

// Return the unique indirectbr predecessor of a block. This may return null

489

// even if such a predecessor exists, if it's not useful for splitting.

490

// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)

491

// predecessors of BB.

492

static BasicBlock *

493

findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {

494

// If the block doesn't have any PHIs, we don't care about it, since there's

495

// no point in splitting it.

496

PHINode *PN = dyn_cast<PHINode>(BB->begin());

497

if (!PN)

498

return nullptr;

499

500

// Verify we have exactly one IBR predecessor.

501

// Conservatively bail out if one of the other predecessors is not a "regular"

502

// terminator (that is, not a switch or a br).

503

BasicBlock *IBB = nullptr;

504

for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {

505

BasicBlock *PredBB = PN->getIncomingBlock(Pred);

506

TerminatorInst *PredTerm = PredBB->getTerminator();

507

switch (PredTerm->getOpcode()) {

508

case Instruction::IndirectBr:

509

if (IBB)

510

return nullptr;

511

IBB = PredBB;

512

break;

513

case Instruction::Br:

514

case Instruction::Switch:

515

OtherPreds.push_back(PredBB);

516

continue;

517

default:

518

return nullptr;

519

}

520

}

521

522

return IBB;

523

}

524

525

// Split critical edges where the source of the edge is an indirectbr

526

// instruction. This isn't always possible, but we can handle some easy cases.

527

// This is useful because MI is unable to split such critical edges,

528

// which means it will not be able to sink instructions along those edges.

529

// This is especially painful for indirect branches with many successors, where

530

// we end up having to prepare all outgoing values in the origin block.

531

532

// Our normal algorithm for splitting critical edges requires us to update

533

// the outgoing edges of the edge origin block, but for an indirectbr this

534

// is hard, since it would require finding and updating the block addresses

535

// the indirect branch uses. But if a block only has a single indirectbr

536

// predecessor, with the others being regular branches, we can do it in a

537

// different way.

538

// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.

539

// We can split D into D0 and D1, where D0 contains only the PHIs from D,

540

// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and

541

// create the following structure:

542

// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1

543

bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {

544

// Check whether the function has any indirectbrs, and collect which blocks

545

// they may jump to. Since most functions don't have indirect branches,

546

// this lowers the common case's overhead to O(Blocks) instead of O(Edges).

547

SmallSetVector<BasicBlock *, 16> Targets;

548

for (auto &BB : F) {

549

auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());

550

if (!IBI)

551

continue;

552

553

for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)

554

Targets.insert(IBI->getSuccessor(Succ));

555

}

556

557

if (Targets.empty())

558

return false;

559

560

bool Changed = false;

561

for (BasicBlock *Target : Targets) {

562

SmallVector<BasicBlock *, 16> OtherPreds;

563

BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);

564

// If we did not found an indirectbr, or the indirectbr is the only

565

// incoming edge, this isn't the kind of edge we're looking for.

566

if (!IBRPred || OtherPreds.empty())

567

continue;

568

569

// Don't even think about ehpads/landingpads.

570

Instruction *FirstNonPHI = Target->getFirstNonPHI();

571

if (FirstNonPHI->isEHPad() || Target->isLandingPad())

572

continue;

573

574

BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");

575

// It's possible Target was its own successor through an indirectbr.

576

// In this case, the indirectbr now comes from BodyBlock.

577

if (IBRPred == Target)

578

IBRPred = BodyBlock;

579

580

// At this point Target only has PHIs, and BodyBlock has the rest of the

581

// block's body. Create a copy of Target that will be used by the "direct"

582

// preds.

583

ValueToValueMapTy VMap;

584

BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);

585

586

for (BasicBlock *Pred : OtherPreds) {

587

// If the target is a loop to itself, then the terminator of the split

588

// block needs to be updated.

589

if (Pred == Target)

590

BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);

591

else

592

Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);

593

}

594

595

// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that

596

// they are clones, so the number of PHIs are the same.

597

// (a) Remove the edge coming from IBRPred from the "Direct" PHI

598

// (b) Leave that as the only edge in the "Indirect" PHI.

599

// (c) Merge the two in the body block.

600

BasicBlock::iterator Indirect = Target->begin(),

601

End = Target->getFirstNonPHI()->getIterator();

602

BasicBlock::iterator Direct = DirectSucc->begin();

603

BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();

604

605

assert(&*End == Target->getTerminator() &&((&*End == Target->getTerminator() && "Block was expected to only contain PHIs"
) ? static_cast<void> (0) : __assert_fail ("&*End == Target->getTerminator() && \"Block was expected to only contain PHIs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 606, __PRETTY_FUNCTION__))

606

"Block was expected to only contain PHIs")((&*End == Target->getTerminator() && "Block was expected to only contain PHIs"
) ? static_cast<void> (0) : __assert_fail ("&*End == Target->getTerminator() && \"Block was expected to only contain PHIs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 606, __PRETTY_FUNCTION__));

607

608

while (Indirect != End) {

609

PHINode *DirPHI = cast<PHINode>(Direct);

610

PHINode *IndPHI = cast<PHINode>(Indirect);

611

612

// Now, clean up - the direct block shouldn't get the indirect value,

613

// and vice versa.

614

DirPHI->removeIncomingValue(IBRPred);

615

Direct++;

616

617

// Advance the pointer here, to avoid invalidation issues when the old

618

// PHI is erased.

619

Indirect++;

620

621

PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);

622

NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),

623

IBRPred);

624

625

// Create a PHI in the body block, to merge the direct and indirect

626

// predecessors.

627

PHINode *MergePHI =

628

PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);

629

MergePHI->addIncoming(NewIndPHI, Target);

630

MergePHI->addIncoming(DirPHI, DirectSucc);

631

632

IndPHI->replaceAllUsesWith(MergePHI);

633

IndPHI->eraseFromParent();

634

}

635

636

Changed = true;

637

}

638

639

return Changed;

640

}

641

642

/// Eliminate blocks that contain only PHI nodes, debug info directives, and an

643

/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split

644

/// edges in ways that are non-optimal for isel. Start by eliminating these

645

/// blocks so we can split them the way we want them.

646

bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {

647

SmallPtrSet<BasicBlock *, 16> Preheaders;

648

SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());

649

while (!LoopList.empty()) {

650

Loop *L = LoopList.pop_back_val();

651

LoopList.insert(LoopList.end(), L->begin(), L->end());

652

if (BasicBlock *Preheader = L->getLoopPreheader())

653

Preheaders.insert(Preheader);

654

}

655

656

bool MadeChange = false;

657

// Note that this intentionally skips the entry block.

658

for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {

659

BasicBlock *BB = &*I++;

660

BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);

661

if (!DestBB ||

662

!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))

663

continue;

664

665

eliminateMostlyEmptyBlock(BB);

666

MadeChange = true;

667

}

668

return MadeChange;

669

}

670

671

bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,

672

BasicBlock *DestBB,

673

bool isPreheader) {

674

// Do not delete loop preheaders if doing so would create a critical edge.

675

// Loop preheaders can be good locations to spill registers. If the

676

// preheader is deleted and we create a critical edge, registers may be

677

// spilled in the loop body instead.

678

if (!DisablePreheaderProtect && isPreheader &&

679

!(BB->getSinglePredecessor() &&

680

BB->getSinglePredecessor()->getSingleSuccessor()))

681

return false;

682

683

// Try to skip merging if the unique predecessor of BB is terminated by a

684

// switch or indirect branch instruction, and BB is used as an incoming block

685

// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to

686

// add COPY instructions in the predecessor of BB instead of BB (if it is not

687

// merged). Note that the critical edge created by merging such blocks wont be

688

// split in MachineSink because the jump table is not analyzable. By keeping

689

// such empty block (BB), ISel will place COPY instructions in BB, not in the

690

// predecessor of BB.

691

BasicBlock *Pred = BB->getUniquePredecessor();

692

if (!Pred ||

693

!(isa<SwitchInst>(Pred->getTerminator()) ||

694

isa<IndirectBrInst>(Pred->getTerminator())))

695

return true;

696

697

if (BB->getTerminator() != BB->getFirstNonPHI())

698

return true;

699

700

// We use a simple cost heuristic which determine skipping merging is

701

// profitable if the cost of skipping merging is less than the cost of

702

// merging : Cost(skipping merging) < Cost(merging BB), where the

703

// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and

704

// the Cost(merging BB) is Freq(Pred) * Cost(Copy).

705

// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :

706

// Freq(Pred) / Freq(BB) > 2.

707

// Note that if there are multiple empty blocks sharing the same incoming

708

// value for the PHIs in the DestBB, we consider them together. In such

709

// case, Cost(merging BB) will be the sum of their frequencies.

710

711

if (!isa<PHINode>(DestBB->begin()))

712

return true;

713

714

SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;

715

716

// Find all other incoming blocks from which incoming values of all PHIs in

717

// DestBB are the same as the ones from BB.

718

for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;

719

++PI) {

720

BasicBlock *DestBBPred = *PI;

721

if (DestBBPred == BB)

722

continue;

723

724

bool HasAllSameValue = true;

725

BasicBlock::const_iterator DestBBI = DestBB->begin();

726

while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {

727

if (DestPN->getIncomingValueForBlock(BB) !=

728

DestPN->getIncomingValueForBlock(DestBBPred)) {

729

HasAllSameValue = false;

730

break;

731

}

732

}

733

if (HasAllSameValue)

734

SameIncomingValueBBs.insert(DestBBPred);

735

}

736

737

// See if all BB's incoming values are same as the value from Pred. In this

738

// case, no reason to skip merging because COPYs are expected to be place in

739

// Pred already.

740

if (SameIncomingValueBBs.count(Pred))

741

return true;

742

743

if (!BFI) {

744

Function &F = *BB->getParent();

745

LoopInfo LI{DominatorTree(F)};

746

BPI.reset(new BranchProbabilityInfo(F, LI));

747

BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));

748

}

749

750

BlockFrequency PredFreq = BFI->getBlockFreq(Pred);

751

BlockFrequency BBFreq = BFI->getBlockFreq(BB);

752

753

for (auto SameValueBB : SameIncomingValueBBs)

754

if (SameValueBB->getUniquePredecessor() == Pred &&

755

DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))

756

BBFreq += BFI->getBlockFreq(SameValueBB);

757

758

return PredFreq.getFrequency() <=

759

BBFreq.getFrequency() * FreqRatioToSkipMerge;

760

}

761

762

/// Return true if we can merge BB into DestBB if there is a single

763

/// unconditional branch between them, and BB contains no other non-phi

764

/// instructions.

765

bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,

766

const BasicBlock *DestBB) const {

767

// We only want to eliminate blocks whose phi nodes are used by phi nodes in

768

// the successor. If there are more complex condition (e.g. preheaders),

769

// don't mess around with them.

770

BasicBlock::const_iterator BBI = BB->begin();

771

while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {

772

for (const User *U : PN->users()) {

773

const Instruction *UI = cast<Instruction>(U);

774

if (UI->getParent() != DestBB || !isa<PHINode>(UI))

775

return false;

776

// If User is inside DestBB block and it is a PHINode then check

777

// incoming value. If incoming value is not from BB then this is

778

// a complex condition (e.g. preheaders) we want to avoid here.

779

if (UI->getParent() == DestBB) {

780

if (const PHINode *UPN = dyn_cast<PHINode>(UI))

781

for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {

782

Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));

783

if (Insn && Insn->getParent() == BB &&

784

Insn->getParent() != UPN->getIncomingBlock(I))

785

return false;

786

}

787

}

788

}

789

}

790

791

// If BB and DestBB contain any common predecessors, then the phi nodes in BB

792

// and DestBB may have conflicting incoming values for the block. If so, we

793

// can't merge the block.

794

const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());

795

if (!DestBBPN) return true; // no conflict.

796

797

// Collect the preds of BB.

798

SmallPtrSet<const BasicBlock*, 16> BBPreds;

799

if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {

800

// It is faster to get preds from a PHI than with pred_iterator.

801

for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)

802

BBPreds.insert(BBPN->getIncomingBlock(i));

803

} else {

804

BBPreds.insert(pred_begin(BB), pred_end(BB));

805

}

806

807

// Walk the preds of DestBB.

808

for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {

809

BasicBlock *Pred = DestBBPN->getIncomingBlock(i);

810

if (BBPreds.count(Pred)) { // Common predecessor?

811

BBI = DestBB->begin();

812

while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {

813

const Value *V1 = PN->getIncomingValueForBlock(Pred);

814

const Value *V2 = PN->getIncomingValueForBlock(BB);

815

816

// If V2 is a phi node in BB, look up what the mapped value will be.

817

if (const PHINode *V2PN = dyn_cast<PHINode>(V2))

818

if (V2PN->getParent() == BB)

819

V2 = V2PN->getIncomingValueForBlock(Pred);

820

821

// If there is a conflict, bail out.

822

if (V1 != V2) return false;

823

}

824

}

825

}

826

827

return true;

828

}

829

830

831

/// Eliminate a basic block that has only phi's and an unconditional branch in

832

/// it.

833

void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {

834

BranchInst *BI = cast<BranchInst>(BB->getTerminator());

835

BasicBlock *DestBB = BI->getSuccessor(0);

836

837

DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false);

838

839

// If the destination block has a single pred, then this is a trivial edge,

840

// just collapse it.

841

if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {

842

if (SinglePred != DestBB) {

843

// Remember if SinglePred was the entry block of the function. If so, we

844

// will need to move BB back to the entry position.

845

bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();

846

MergeBasicBlockIntoOnlyPred(DestBB, nullptr);

847

848

if (isEntry && BB != &BB->getParent()->getEntryBlock())

849

BB->moveBefore(&BB->getParent()->getEntryBlock());

850

851

DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false);

852

return;

853

}

854

}

855

856

// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB

857

// to handle the new incoming edges it is about to have.

858

PHINode *PN;

859

for (BasicBlock::iterator BBI = DestBB->begin();

860

(PN = dyn_cast<PHINode>(BBI)); ++BBI) {

861

// Remove the incoming value for BB, and remember it.

862

Value *InVal = PN->removeIncomingValue(BB, false);

863

864

// Two options: either the InVal is a phi node defined in BB or it is some

865

// value that dominates BB.

866

PHINode *InValPhi = dyn_cast<PHINode>(InVal);

867

if (InValPhi && InValPhi->getParent() == BB) {

868

// Add all of the input values of the input PHI as inputs of this phi.

869

for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)

870

PN->addIncoming(InValPhi->getIncomingValue(i),

871

InValPhi->getIncomingBlock(i));

872

} else {

873

// Otherwise, add one instance of the dominating value for each edge that

874

// we will be adding.

875

if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {

876

for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)

877

PN->addIncoming(InVal, BBPN->getIncomingBlock(i));

878

} else {

879

for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)

880

PN->addIncoming(InVal, *PI);

881

}

882

}

883

}

884

885

// The PHIs are now updated, change everything that refers to BB to use

886

// DestBB and remove BB.

887

BB->replaceAllUsesWith(DestBB);

888

BB->eraseFromParent();

889

++NumBlocksElim;

890

891

DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false);

892

}

893

894

// Computes a map of base pointer relocation instructions to corresponding

895

// derived pointer relocation instructions given a vector of all relocate calls

896

static void computeBaseDerivedRelocateMap(

897

const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,

898

DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>

899

&RelocateInstMap) {

900

// Collect information in two maps: one primarily for locating the base object

901

// while filling the second map; the second map is the final structure holding

902

// a mapping between Base and corresponding Derived relocate calls

903

DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;

904

for (auto *ThisRelocate : AllRelocateCalls) {

905

auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),

906

ThisRelocate->getDerivedPtrIndex());

907

RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));

908

}

909

for (auto &Item : RelocateIdxMap) {

910

std::pair<unsigned, unsigned> Key = Item.first;

911

if (Key.first == Key.second)

912

// Base relocation: nothing to insert

913

continue;

914

915

GCRelocateInst *I = Item.second;

916

auto BaseKey = std::make_pair(Key.first, Key.first);

917

918

// We're iterating over RelocateIdxMap so we cannot modify it.

919

auto MaybeBase = RelocateIdxMap.find(BaseKey);

920

if (MaybeBase == RelocateIdxMap.end())

921

// TODO: We might want to insert a new base object relocate and gep off

922

// that, if there are enough derived object relocates.

923

continue;

924

925

RelocateInstMap[MaybeBase->second].push_back(I);

926

}

927

}

928

929

// Accepts a GEP and extracts the operands into a vector provided they're all

930

// small integer constants

931

static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,

932

SmallVectorImpl<Value *> &OffsetV) {

933

for (unsigned i = 1; i < GEP->getNumOperands(); i++) {

934

// Only accept small constant integer operands

935

auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));

936

if (!Op || Op->getZExtValue() > 20)

937

return false;

938

}

939

940

for (unsigned i = 1; i < GEP->getNumOperands(); i++)

941

OffsetV.push_back(GEP->getOperand(i));

942

return true;

943

}

944

945

// Takes a RelocatedBase (base pointer relocation instruction) and Targets to

946

// replace, computes a replacement, and affects it.

947

static bool

948

simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,

949

const SmallVectorImpl<GCRelocateInst *> &Targets) {

950

bool MadeChange = false;

951

for (GCRelocateInst *ToReplace : Targets) {

952

assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 953, __PRETTY_FUNCTION__))

953

"Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 953, __PRETTY_FUNCTION__));

954

if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {

955

// A duplicate relocate call. TODO: coalesce duplicates.

956

continue;

957

}

958

959

if (RelocatedBase->getParent() != ToReplace->getParent()) {

960

// Base and derived relocates are in different basic blocks.

961

// In this case transform is only valid when base dominates derived

962

// relocate. However it would be too expensive to check dominance

963

// for each such relocate, so we skip the whole transformation.

964

continue;

965

}

966

967

Value *Base = ToReplace->getBasePtr();

968

auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());

969

if (!Derived || Derived->getPointerOperand() != Base)

970

continue;

971

972

SmallVector<Value *, 2> OffsetV;

973

if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))

974

continue;

975

976

// Create a Builder and replace the target callsite with a gep

977

assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 978, __PRETTY_FUNCTION__))

978

"Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 978, __PRETTY_FUNCTION__));

979

980

// Insert after RelocatedBase

981

IRBuilder<> Builder(RelocatedBase->getNextNode());

982

Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());

983

984

// If gc_relocate does not match the actual type, cast it to the right type.

985

// In theory, there must be a bitcast after gc_relocate if the type does not

986

// match, and we should reuse it to get the derived pointer. But it could be

987

// cases like this:

988

// bb1:

989

// ...

990

// %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)

991

// br label %merge

992

993

// bb2:

994

// ...

995

// %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)

996

// br label %merge

997

998

// merge:

999

// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]

1000

// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*

1001

1002

// In this case, we can not find the bitcast any more. So we insert a new bitcast

1003

// no matter there is already one or not. In this way, we can handle all cases, and

1004

// the extra bitcast should be optimized away in later passes.

1005

Value *ActualRelocatedBase = RelocatedBase;

1006

if (RelocatedBase->getType() != Base->getType()) {

1007

ActualRelocatedBase =

1008

Builder.CreateBitCast(RelocatedBase, Base->getType());

1009

}

1010

Value *Replacement = Builder.CreateGEP(

1011

Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));

1012

Replacement->takeName(ToReplace);

1013

// If the newly generated derived pointer's type does not match the original derived

1014

// pointer's type, cast the new derived pointer to match it. Same reasoning as above.

1015

Value *ActualReplacement = Replacement;

1016

if (Replacement->getType() != ToReplace->getType()) {

1017

ActualReplacement =

1018

Builder.CreateBitCast(Replacement, ToReplace->getType());

1019

}

1020

ToReplace->replaceAllUsesWith(ActualReplacement);

1021

ToReplace->eraseFromParent();

1022

1023

MadeChange = true;

1024

}

1025

return MadeChange;

1026

}

1027

1028

// Turns this:

1029

1030

// %base = ...

1031

// %ptr = gep %base + 15

1032

// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)

1033

// %base' = relocate(%tok, i32 4, i32 4)

1034

// %ptr' = relocate(%tok, i32 4, i32 5)

1035

// %val = load %ptr'

1036

1037

// into this:

1038

1039

// %base = ...

1040

// %ptr = gep %base + 15

1041

// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)

1042

// %base' = gc.relocate(%tok, i32 4, i32 4)

1043

// %ptr' = gep %base' + 15

1044

// %val = load %ptr'

1045

bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {

1046

bool MadeChange = false;

1047

SmallVector<GCRelocateInst *, 2> AllRelocateCalls;

1048

1049

for (auto *U : I.users())

1050

if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))

1051

// Collect all the relocate calls associated with a statepoint

1052

AllRelocateCalls.push_back(Relocate);

1053

1054

// We need atleast one base pointer relocation + one derived pointer

1055

// relocation to mangle

1056

if (AllRelocateCalls.size() < 2)

1057

return false;

1058

1059

// RelocateInstMap is a mapping from the base relocate instruction to the

1060

// corresponding derived relocate instructions

1061

DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;

1062

computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);

1063

if (RelocateInstMap.empty())

1064

return false;

1065

1066

for (auto &Item : RelocateInstMap)

1067

// Item.first is the RelocatedBase to offset against

1068

// Item.second is the vector of Targets to replace

1069

MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);

1070

return MadeChange;

1071

}

1072

1073

/// SinkCast - Sink the specified cast instruction into its user blocks

1074

static bool SinkCast(CastInst *CI) {

1075

BasicBlock *DefBB = CI->getParent();

1076

1077

/// InsertedCasts - Only insert a cast in each block once.

1078

DenseMap<BasicBlock*, CastInst*> InsertedCasts;

1079

1080

bool MadeChange = false;

1081

for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();

1082

UI != E; ) {

1083

Use &TheUse = UI.getUse();

1084

Instruction *User = cast<Instruction>(*UI);

1085

1086

// Figure out which BB this cast is used in. For PHI's this is the

1087

// appropriate predecessor block.

1088

BasicBlock *UserBB = User->getParent();

1089

if (PHINode *PN = dyn_cast<PHINode>(User)) {

1090

UserBB = PN->getIncomingBlock(TheUse);

1091

}

1092

1093

// Preincrement use iterator so we don't invalidate it.

1094

++UI;

1095

1096

// The first insertion point of a block containing an EH pad is after the

1097

// pad. If the pad is the user, we cannot sink the cast past the pad.

1098

if (User->isEHPad())

1099

continue;

1100

1101

// If the block selected to receive the cast is an EH pad that does not

1102

// allow non-PHI instructions before the terminator, we can't sink the

1103

// cast.

1104

if (UserBB->getTerminator()->isEHPad())

1105

continue;

1106

1107

// If this user is in the same block as the cast, don't change the cast.

1108

if (UserBB == DefBB) continue;

1109

1110

// If we have already inserted a cast into this block, use it.

1111

CastInst *&InsertedCast = InsertedCasts[UserBB];

1112

1113

if (!InsertedCast) {

1114

BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

1115

assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1115, __PRETTY_FUNCTION__));

1116

InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),

1117

CI->getType(), "", &*InsertPt);

1118

}

1119

1120

// Replace a use of the cast with a use of the new cast.

1121

TheUse = InsertedCast;

1122

MadeChange = true;

1123

++NumCastUses;

1124

}

1125

1126

// If we removed all uses, nuke the cast.

1127

if (CI->use_empty()) {

1128

CI->eraseFromParent();

1129

MadeChange = true;

1130

}

1131

1132

return MadeChange;

1133

}

1134

1135

/// If the specified cast instruction is a noop copy (e.g. it's casting from

1136

/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to

1137

/// reduce the number of virtual registers that must be created and coalesced.

1138

///

1139

/// Return true if any changes are made.

1140

///

1141

static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,

1142

const DataLayout &DL) {

1143

// Sink only "cheap" (or nop) address-space casts. This is a weaker condition

1144

// than sinking only nop casts, but is helpful on some platforms.

1145

if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {

1146

if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),

1147

ASC->getDestAddressSpace()))

1148

return false;

1149

}

1150

1151

// If this is a noop copy,

1152

EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());

1153

EVT DstVT = TLI.getValueType(DL, CI->getType());

1154

1155

// This is an fp<->int conversion?

1156

if (SrcVT.isInteger() != DstVT.isInteger())

1157

return false;

1158

1159

// If this is an extension, it will be a zero or sign extension, which

1160

// isn't a noop.

1161

if (SrcVT.bitsLT(DstVT)) return false;

1162

1163

// If these values will be promoted, find out what they will be promoted

1164

// to. This helps us consider truncates on PPC as noop copies when they

1165

// are.

1166

if (TLI.getTypeAction(CI->getContext(), SrcVT) ==

1167

TargetLowering::TypePromoteInteger)

1168

SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);

1169

if (TLI.getTypeAction(CI->getContext(), DstVT) ==

1170

TargetLowering::TypePromoteInteger)

1171

DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);

1172

1173

// If, after promotion, these are the same types, this is a noop copy.

1174

if (SrcVT != DstVT)

1175

return false;

1176

1177

return SinkCast(CI);

1178

}

1179

1180

/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if

1181

/// possible.

1182

///

1183

/// Return true if any changes were made.

1184

static bool CombineUAddWithOverflow(CmpInst *CI) {

1185

Value *A, *B;

1186

Instruction *AddI;

1187

if (!match(CI,

1188

m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))

1189

return false;

1190

1191

Type *Ty = AddI->getType();

1192

if (!isa<IntegerType>(Ty))

1193

return false;

1194

1195

// We don't want to move around uses of condition values this late, so we we

1196

// check if it is legal to create the call to the intrinsic in the basic

1197

// block containing the icmp:

1198

1199

if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())

1200

return false;

1201

1202

#ifndef NDEBUG

1203

// Someday m_UAddWithOverflow may get smarter, but this is a safe assumption

1204

// for now:

1205

if (AddI->hasOneUse())

1206

assert(*AddI->user_begin() == CI && "expected!")((*AddI->user_begin() == CI && "expected!") ? static_cast
<void> (0) : __assert_fail ("*AddI->user_begin() == CI && \"expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1206, __PRETTY_FUNCTION__));

1207

#endif

1208

1209

Module *M = CI->getModule();

1210

Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);

1211

1212

auto *InsertPt = AddI->hasOneUse() ? CI : AddI;

1213

1214

auto *UAddWithOverflow =

1215

CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);

1216

auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);

1217

auto *Overflow =

1218

ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);

1219

1220

CI->replaceAllUsesWith(Overflow);

1221

AddI->replaceAllUsesWith(UAdd);

1222

CI->eraseFromParent();

1223

AddI->eraseFromParent();

1224

return true;

1225

}

1226

1227

/// Sink the given CmpInst into user blocks to reduce the number of virtual

1228

/// registers that must be created and coalesced. This is a clear win except on

1229

/// targets with multiple condition code registers (PowerPC), where it might

1230

/// lose; some adjustment may be wanted there.

1231

///

1232

/// Return true if any changes are made.

1233

static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {

1234

BasicBlock *DefBB = CI->getParent();

1235

1236

// Avoid sinking soft-FP comparisons, since this can move them into a loop.

1237

if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))

1238

return false;

1239

1240

// Only insert a cmp in each block once.

1241

DenseMap<BasicBlock*, CmpInst*> InsertedCmps;

1242

1243

bool MadeChange = false;

1244

for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();

1245

UI != E; ) {

1246

Use &TheUse = UI.getUse();

1247

Instruction *User = cast<Instruction>(*UI);

1248

1249

// Preincrement use iterator so we don't invalidate it.

1250

++UI;

1251

1252

// Don't bother for PHI nodes.

1253

if (isa<PHINode>(User))

1254

continue;

1255

1256

// Figure out which BB this cmp is used in.

1257

BasicBlock *UserBB = User->getParent();

1258

1259

// If this user is in the same block as the cmp, don't change the cmp.

1260

if (UserBB == DefBB) continue;

1261

1262

// If we have already inserted a cmp into this block, use it.

1263

CmpInst *&InsertedCmp = InsertedCmps[UserBB];

1264

1265

if (!InsertedCmp) {

1266

BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

1267

1268

InsertedCmp =

1269

CmpInst::Create(CI->getOpcode(), CI->getPredicate(),

1270

CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);

1271

// Propagate the debug info.

1272

InsertedCmp->setDebugLoc(CI->getDebugLoc());

1273

}

1274

1275

// Replace a use of the cmp with a use of the new cmp.

1276

TheUse = InsertedCmp;

1277

MadeChange = true;

1278

++NumCmpUses;

1279

}

1280

1281

// If we removed all uses, nuke the cmp.

1282

if (CI->use_empty()) {

1283

CI->eraseFromParent();

1284

MadeChange = true;

1285

}

1286

1287

return MadeChange;

1288

}

1289

1290

static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {

1291

if (SinkCmpExpression(CI, TLI))

1292

return true;

1293

1294

if (CombineUAddWithOverflow(CI))

1295

return true;

1296

1297

return false;

1298

}

1299

1300

/// Duplicate and sink the given 'and' instruction into user blocks where it is

1301

/// used in a compare to allow isel to generate better code for targets where

1302

/// this operation can be combined.

1303

///

1304

/// Return true if any changes are made.

1305

static bool sinkAndCmp0Expression(Instruction *AndI,

1306

const TargetLowering &TLI,

1307

SetOfInstrs &InsertedInsts) {

1308

// Double-check that we're not trying to optimize an instruction that was

1309

// already optimized by some other part of this pass.

1310

assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1311, __PRETTY_FUNCTION__))

1311

"Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1311, __PRETTY_FUNCTION__));

1312

(void) InsertedInsts;

1313

1314

// Nothing to do for single use in same basic block.

1315

if (AndI->hasOneUse() &&

1316

AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())

1317

return false;

1318

1319

// Try to avoid cases where sinking/duplicating is likely to increase register

1320

// pressure.

1321

if (!isa<ConstantInt>(AndI->getOperand(0)) &&

1322

!isa<ConstantInt>(AndI->getOperand(1)) &&

1323

AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())

1324

return false;

1325

1326

for (auto *U : AndI->users()) {

1327

Instruction *User = cast<Instruction>(U);

1328

1329

// Only sink for and mask feeding icmp with 0.

1330

if (!isa<ICmpInst>(User))

1331

return false;

1332

1333

auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));

1334

if (!CmpC || !CmpC->isZero())

1335

return false;

1336

}

1337

1338

if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))

1339

return false;

1340

1341

DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false);

1342

DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false);

1343

1344

// Push the 'and' into the same block as the icmp 0. There should only be

1345

// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any

1346

// others, so we don't need to keep track of which BBs we insert into.

1347

for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();

1348

UI != E; ) {

1349

Use &TheUse = UI.getUse();

1350

Instruction *User = cast<Instruction>(*UI);

1351

1352

// Preincrement use iterator so we don't invalidate it.

1353

++UI;

1354

1355

DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false);

1356

1357

// Keep the 'and' in the same place if the use is already in the same block.

1358

Instruction *InsertPt =

1359

User->getParent() == AndI->getParent() ? AndI : User;

1360

Instruction *InsertedAnd =

1361

BinaryOperator::Create(Instruction::And, AndI->getOperand(0),

1362

AndI->getOperand(1), "", InsertPt);

1363

// Propagate the debug info.

1364

InsertedAnd->setDebugLoc(AndI->getDebugLoc());

1365

1366

// Replace a use of the 'and' with a use of the new 'and'.

1367

TheUse = InsertedAnd;

1368

++NumAndUses;

1369

DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false);

1370

}

1371

1372

// We removed all uses, nuke the and.

1373

AndI->eraseFromParent();

1374

return true;

1375

}

1376

1377

/// Check if the candidates could be combined with a shift instruction, which

1378

/// includes:

1379

/// 1. Truncate instruction

1380

/// 2. And instruction and the imm is a mask of the low bits:

1381

/// imm & (imm+1) == 0

1382

static bool isExtractBitsCandidateUse(Instruction *User) {

1383

if (!isa<TruncInst>(User)) {

1384

if (User->getOpcode() != Instruction::And ||

1385

!isa<ConstantInt>(User->getOperand(1)))

1386

return false;

1387

1388

const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();

1389

1390

if ((Cimm & (Cimm + 1)).getBoolValue())

1391

return false;

1392

}

1393

return true;

1394

}

1395

1396

/// Sink both shift and truncate instruction to the use of truncate's BB.

1397

static bool

1398

SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,

1399

DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,

1400

const TargetLowering &TLI, const DataLayout &DL) {

1401

BasicBlock *UserBB = User->getParent();

1402

DenseMap<BasicBlock *, CastInst *> InsertedTruncs;

1403

TruncInst *TruncI = dyn_cast<TruncInst>(User);

1404

bool MadeChange = false;

1405

1406

for (Value::user_iterator TruncUI = TruncI->user_begin(),

1407

TruncE = TruncI->user_end();

1408

TruncUI != TruncE;) {

1409

1410

Use &TruncTheUse = TruncUI.getUse();

1411

Instruction *TruncUser = cast<Instruction>(*TruncUI);

1412

// Preincrement use iterator so we don't invalidate it.

1413

1414

++TruncUI;

1415

1416

int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());

1417

if (!ISDOpcode)

1418

continue;

1419

1420

// If the use is actually a legal node, there will not be an

1421

// implicit truncate.

1422

// FIXME: always querying the result type is just an

1423

// approximation; some nodes' legality is determined by the

1424

// operand or other means. There's no good way to find out though.

1425

if (TLI.isOperationLegalOrCustom(

1426

ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))

1427

continue;

1428

1429

// Don't bother for PHI nodes.

1430

if (isa<PHINode>(TruncUser))

1431

continue;

1432

1433

BasicBlock *TruncUserBB = TruncUser->getParent();

1434

1435

if (UserBB == TruncUserBB)

1436

continue;

1437

1438

BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];

1439

CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];

1440

1441

if (!InsertedShift && !InsertedTrunc) {

1442

BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();

1443

assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1443, __PRETTY_FUNCTION__));

1444

// Sink the shift

1445

if (ShiftI->getOpcode() == Instruction::AShr)

1446

InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,

1447

"", &*InsertPt);

1448

else

1449

InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,

1450

"", &*InsertPt);

1451

1452

// Sink the trunc

1453

BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();

1454

TruncInsertPt++;

1455

assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1455, __PRETTY_FUNCTION__));

1456

1457

InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,

1458

TruncI->getType(), "", &*TruncInsertPt);

1459

1460

MadeChange = true;

1461

1462

TruncTheUse = InsertedTrunc;

1463

}

1464

}

1465

return MadeChange;

1466

}

1467

1468

/// Sink the shift *right* instruction into user blocks if the uses could

1469

/// potentially be combined with this shift instruction and generate BitExtract

1470

/// instruction. It will only be applied if the architecture supports BitExtract

1471

/// instruction. Here is an example:

1472

/// BB1:

1473

/// %x.extract.shift = lshr i64 %arg1, 32

1474

/// BB2:

1475

/// %x.extract.trunc = trunc i64 %x.extract.shift to i16

1476

/// ==>

1477

///

1478

/// BB2:

1479

/// %x.extract.shift.1 = lshr i64 %arg1, 32

1480

/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16

1481

///

1482

/// CodeGen will recoginze the pattern in BB2 and generate BitExtract

1483

/// instruction.

1484

/// Return true if any changes are made.

1485

static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,

1486

const TargetLowering &TLI,

1487

const DataLayout &DL) {

1488

BasicBlock *DefBB = ShiftI->getParent();

1489

1490

/// Only insert instructions in each block once.

1491

DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;

1492

1493

bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));

1494

1495

bool MadeChange = false;

1496

for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();

1497

UI != E;) {

1498

Use &TheUse = UI.getUse();

1499

Instruction *User = cast<Instruction>(*UI);

1500

// Preincrement use iterator so we don't invalidate it.

1501

++UI;

1502

1503

// Don't bother for PHI nodes.

1504

if (isa<PHINode>(User))

1505

continue;

1506

1507

if (!isExtractBitsCandidateUse(User))

1508

continue;

1509

1510

BasicBlock *UserBB = User->getParent();

1511

1512

if (UserBB == DefBB) {

1513

// If the shift and truncate instruction are in the same BB. The use of

1514

// the truncate(TruncUse) may still introduce another truncate if not

1515

// legal. In this case, we would like to sink both shift and truncate

1516

// instruction to the BB of TruncUse.

1517

// for example:

1518

// BB1:

1519

// i64 shift.result = lshr i64 opnd, imm

1520

// trunc.result = trunc shift.result to i16

1521

1522

// BB2:

1523

// ----> We will have an implicit truncate here if the architecture does

1524

// not have i16 compare.

1525

// cmp i16 trunc.result, opnd2

1526

1527

if (isa<TruncInst>(User) && shiftIsLegal

1528

// If the type of the truncate is legal, no trucate will be

1529

// introduced in other basic blocks.

1530

1531

(!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))

1532

MadeChange =

1533

SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);

1534

1535

continue;

1536

}

1537

// If we have already inserted a shift into this block, use it.

1538

BinaryOperator *&InsertedShift = InsertedShifts[UserBB];

1539

1540

if (!InsertedShift) {

1541

BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

1542

1543

1544

if (ShiftI->getOpcode() == Instruction::AShr)

1545

InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,

1546

"", &*InsertPt);

1547

else

1548

InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,

1549

"", &*InsertPt);

1550

1551

MadeChange = true;

1552

}

1553

1554

// Replace a use of the shift with a use of the new shift.

1555

TheUse = InsertedShift;

1556

}

1557

1558

// If we removed all uses, nuke the shift.

1559

if (ShiftI->use_empty())

1560

ShiftI->eraseFromParent();

1561

1562

return MadeChange;

1563

}

1564

1565

/// If counting leading or trailing zeros is an expensive operation and a zero

1566

/// input is defined, add a check for zero to avoid calling the intrinsic.

1567

///

1568

/// We want to transform:

1569

/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)

1570

///

1571

/// into:

1572

/// entry:

1573

/// %cmpz = icmp eq i64 %A, 0

1574

/// br i1 %cmpz, label %cond.end, label %cond.false

1575

/// cond.false:

1576

/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)

1577

/// br label %cond.end

1578

/// cond.end:

1579

/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]

1580

///

1581

/// If the transform is performed, return true and set ModifiedDT to true.

1582

static bool despeculateCountZeros(IntrinsicInst *CountZeros,

1583

const TargetLowering *TLI,

1584

const DataLayout *DL,

1585

bool &ModifiedDT) {

1586

if (!TLI || !DL)

1587

return false;

1588

1589

// If a zero input is undefined, it doesn't make sense to despeculate that.

1590

if (match(CountZeros->getOperand(1), m_One()))

1591

return false;

1592

1593

// If it's cheap to speculate, there's nothing to do.

1594

auto IntrinsicID = CountZeros->getIntrinsicID();

1595

if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||

1596

(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))

1597

return false;

1598

1599

// Only handle legal scalar cases. Anything else requires too much work.

1600

Type *Ty = CountZeros->getType();

1601

unsigned SizeInBits = Ty->getPrimitiveSizeInBits();

1602

if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())

1603

return false;

1604

1605

// The intrinsic will be sunk behind a compare against zero and branch.

1606

BasicBlock *StartBlock = CountZeros->getParent();

1607

BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");

1608

1609

// Create another block after the count zero intrinsic. A PHI will be added

1610

// in this block to select the result of the intrinsic or the bit-width

1611

// constant if the input to the intrinsic is zero.

1612

BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));

1613

BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");

1614

1615

// Set up a builder to create a compare, conditional branch, and PHI.

1616

IRBuilder<> Builder(CountZeros->getContext());

1617

Builder.SetInsertPoint(StartBlock->getTerminator());

1618

Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());

1619

1620

// Replace the unconditional branch that was created by the first split with

1621

// a compare against zero and a conditional branch.

1622

Value *Zero = Constant::getNullValue(Ty);

1623

Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");

1624

Builder.CreateCondBr(Cmp, EndBlock, CallBlock);

1625

StartBlock->getTerminator()->eraseFromParent();

1626

1627

// Create a PHI in the end block to select either the output of the intrinsic

1628

// or the bit width of the operand.

1629

Builder.SetInsertPoint(&EndBlock->front());

1630

PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");

1631

CountZeros->replaceAllUsesWith(PN);

1632

Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));

1633

PN->addIncoming(BitWidth, StartBlock);

1634

PN->addIncoming(CountZeros, CallBlock);

1635

1636

// We are explicitly handling the zero case, so we can set the intrinsic's

1637

// undefined zero argument to 'true'. This will also prevent reprocessing the

1638

// intrinsic; we only despeculate when a zero input is defined.

1639

CountZeros->setArgOperand(1, Builder.getTrue());

1640

ModifiedDT = true;

1641

return true;

1642

}

1643

1644

// This class provides helper functions to expand a memcmp library call into an

1645

// inline expansion.

1646

class MemCmpExpansion {

1647

struct ResultBlock {

1648

BasicBlock *BB;

1649

PHINode *PhiSrc1;

1650

PHINode *PhiSrc2;

1651

ResultBlock();

1652

};

1653

1654

CallInst *CI;

1655

ResultBlock ResBlock;

1656

unsigned MaxLoadSize;

1657

unsigned NumBlocks;

1658

unsigned NumBlocksNonOneByte;

1659

unsigned NumLoadsPerBlock;

1660

std::vector<BasicBlock *> LoadCmpBlocks;

1661

BasicBlock *EndBlock;

1662

PHINode *PhiRes;

1663

bool IsUsedForZeroCmp;

1664

const DataLayout &DL;

1665

IRBuilder<> Builder;

1666

1667

unsigned calculateNumBlocks(unsigned Size);

1668

void createLoadCmpBlocks();

1669

void createResultBlock();

1670

void setupResultBlockPHINodes();

1671

void setupEndBlockPHINodes();

1672

void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,

1673

unsigned GEPIndex);

1674

Value *getCompareLoadPairs(unsigned Index, unsigned Size,

1675

unsigned &NumBytesProcessed);

1676

void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,

1677

unsigned &NumBytesProcessed);

1678

void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);

1679

void emitMemCmpResultBlock();

1680

Value *getMemCmpExpansionZeroCase(unsigned Size);

1681

Value *getMemCmpEqZeroOneBlock(unsigned Size);

1682

unsigned getLoadSize(unsigned Size);

1683

unsigned getNumLoads(unsigned Size);

1684

1685

public:

1686

MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,

1687

unsigned NumLoadsPerBlock, const DataLayout &DL);

1688

Value *getMemCmpExpansion(uint64_t Size);

1689

};

1690

1691

MemCmpExpansion::ResultBlock::ResultBlock()

1692

: BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}

1693

1694

// Initialize the basic block structure required for expansion of memcmp call

1695

// with given maximum load size and memcmp size parameter.

1696

// This structure includes:

1697

// 1. A list of load compare blocks - LoadCmpBlocks.

1698

// 2. An EndBlock, split from original instruction point, which is the block to

1699

// return from.

1700

// 3. ResultBlock, block to branch to for early exit when a

1701

// LoadCmpBlock finds a difference.

1702

MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,

1703

unsigned MaxLoadSize, unsigned LoadsPerBlock,

1704

const DataLayout &TheDataLayout)

1705

: CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),

1706

DL(TheDataLayout), Builder(CI) {

1707

1708

// A memcmp with zero-comparison with only one block of load and compare does

1709

// not need to set up any extra blocks. This case could be handled in the DAG,

1710

// but since we have all of the machinery to flexibly expand any memcpy here,

1711

// we choose to handle this case too to avoid fragmented lowering.

1712

IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);

1713

NumBlocks = calculateNumBlocks(Size);

1714

if (!IsUsedForZeroCmp || NumBlocks != 1) {

1715

BasicBlock *StartBlock = CI->getParent();

1716

EndBlock = StartBlock->splitBasicBlock(CI, "endblock");

1717

setupEndBlockPHINodes();

1718

createResultBlock();

1719

1720

// If return value of memcmp is not used in a zero equality, we need to

1721

// calculate which source was larger. The calculation requires the

1722

// two loaded source values of each load compare block.

1723

// These will be saved in the phi nodes created by setupResultBlockPHINodes.

1724

if (!IsUsedForZeroCmp)

1725

setupResultBlockPHINodes();

1726

1727

// Create the number of required load compare basic blocks.

1728

createLoadCmpBlocks();

1729

1730

// Update the terminator added by splitBasicBlock to branch to the first

1731

// LoadCmpBlock.

1732

StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);

1733

}

1734

1735

Builder.SetCurrentDebugLocation(CI->getDebugLoc());

1736

}

1737

1738

void MemCmpExpansion::createLoadCmpBlocks() {

1739

for (unsigned i = 0; i < NumBlocks; i++) {

1740

BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",

1741

EndBlock->getParent(), EndBlock);

1742

LoadCmpBlocks.push_back(BB);

1743

}

1744

}

1745

1746

void MemCmpExpansion::createResultBlock() {

1747

ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",

1748

EndBlock->getParent(), EndBlock);

1749

}

1750

1751

// This function creates the IR instructions for loading and comparing 1 byte.

1752

// It loads 1 byte from each source of the memcmp parameters with the given

1753

// GEPIndex. It then subtracts the two loaded values and adds this result to the

1754

// final phi node for selecting the memcmp result.

1755

void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,

1756

unsigned GEPIndex) {

1757

Value *Source1 = CI->getArgOperand(0);

1758

Value *Source2 = CI->getArgOperand(1);

1759

1760

Builder.SetInsertPoint(LoadCmpBlocks[Index]);

1761

Type *LoadSizeType = Type::getInt8Ty(CI->getContext());

1762

// Cast source to LoadSizeType*.

1763

if (Source1->getType() != LoadSizeType)

1764

Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());

1765

if (Source2->getType() != LoadSizeType)

1766

Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

1767

1768

// Get the base address using the GEPIndex.

1769

if (GEPIndex != 0) {

1770

Source1 = Builder.CreateGEP(LoadSizeType, Source1,

1771

ConstantInt::get(LoadSizeType, GEPIndex));

1772

Source2 = Builder.CreateGEP(LoadSizeType, Source2,

1773

ConstantInt::get(LoadSizeType, GEPIndex));

1774

}

1775

1776

Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

1777

Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

1778

1779

LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));

1780

LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));

1781

Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);

1782

1783

PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);

1784

1785

if (Index < (LoadCmpBlocks.size() - 1)) {

1786

// Early exit branch if difference found to EndBlock. Otherwise, continue to

1787

// next LoadCmpBlock,

1788

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,

1789

ConstantInt::get(Diff->getType(), 0));

1790

BranchInst *CmpBr =

1791

BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);

1792

Builder.Insert(CmpBr);

1793

} else {

1794

// The last block has an unconditional branch to EndBlock.

1795

BranchInst *CmpBr = BranchInst::Create(EndBlock);

1796

Builder.Insert(CmpBr);

1797

}

1798

}

1799

1800

unsigned MemCmpExpansion::getNumLoads(unsigned Size) {

1801

return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);

1802

}

1803

1804

unsigned MemCmpExpansion::getLoadSize(unsigned Size) {

1805

return MinAlign(PowerOf2Floor(Size), MaxLoadSize);

1806

}

1807

1808

/// Generate an equality comparison for one or more pairs of loaded values.

1809

/// This is used in the case where the memcmp() call is compared equal or not

1810

/// equal to zero.

1811

Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,

1812

unsigned &NumBytesProcessed) {

1813

std::vector<Value *> XorList, OrList;

1814

Value *Diff;

1815

1816

unsigned RemainingBytes = Size - NumBytesProcessed;

1817

unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);

1818

unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);

1819

1820

// For a single-block expansion, start inserting before the memcmp call.

1821

if (LoadCmpBlocks.empty())

1822

Builder.SetInsertPoint(CI);

1823

else

1824

Builder.SetInsertPoint(LoadCmpBlocks[Index]);

1825

1826

Value *Cmp = nullptr;

1827

for (unsigned i = 0; i < NumLoads; ++i) {

1828

unsigned LoadSize = getLoadSize(RemainingBytes);

1829

unsigned GEPIndex = NumBytesProcessed / LoadSize;

1830

NumBytesProcessed += LoadSize;

1831

RemainingBytes -= LoadSize;

1832

1833

Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);

1834

Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);

1835

assert(LoadSize <= MaxLoadSize && "Unexpected load type")((LoadSize <= MaxLoadSize && "Unexpected load type"
) ? static_cast<void> (0) : __assert_fail ("LoadSize <= MaxLoadSize && \"Unexpected load type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1835, __PRETTY_FUNCTION__));

1836

1837

Value *Source1 = CI->getArgOperand(0);

1838

Value *Source2 = CI->getArgOperand(1);

1839

1840

// Cast source to LoadSizeType*.

1841

if (Source1->getType() != LoadSizeType)

1842

Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());

1843

if (Source2->getType() != LoadSizeType)

1844

Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

1845

1846

// Get the base address using the GEPIndex.

1847

if (GEPIndex != 0) {

1848

Source1 = Builder.CreateGEP(LoadSizeType, Source1,

1849

ConstantInt::get(LoadSizeType, GEPIndex));

1850

Source2 = Builder.CreateGEP(LoadSizeType, Source2,

1851

ConstantInt::get(LoadSizeType, GEPIndex));

1852

}

1853

1854

// Get a constant or load a value for each source address.

1855

Value *LoadSrc1 = nullptr;

1856

if (auto *Source1C = dyn_cast<Constant>(Source1))

1857

LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);

1858

if (!LoadSrc1)

1859

LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

1860

1861

Value *LoadSrc2 = nullptr;

1862

if (auto *Source2C = dyn_cast<Constant>(Source2))

1863

LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);

1864

if (!LoadSrc2)

1865

LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

1866

1867

if (NumLoads != 1) {

1868

if (LoadSizeType != MaxLoadType) {

1869

LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);

1870

LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);

1871

}

1872

// If we have multiple loads per block, we need to generate a composite

1873

// comparison using xor+or.

1874

Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);

1875

Diff = Builder.CreateZExt(Diff, MaxLoadType);

1876

XorList.push_back(Diff);

1877

} else {

1878

// If there's only one load per block, we just compare the loaded values.

1879

Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);

1880

}

1881

}

1882

1883

auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {

1884

std::vector<Value *> OutList;

1885

for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {

1886

Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);

1887

OutList.push_back(Or);

1888

}

1889

if (InList.size() % 2 != 0)

1890

OutList.push_back(InList.back());

1891

return OutList;

1892

};

1893

1894

if (!Cmp) {

1895

// Pairwise OR the XOR results.

1896

OrList = pairWiseOr(XorList);

1897

1898

// Pairwise OR the OR results until one result left.

1899

while (OrList.size() != 1) {

1900

OrList = pairWiseOr(OrList);

1901

}

1902

Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));

1903

}

1904

1905

return Cmp;

1906

}

1907

1908

void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(

1909

unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {

1910

Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);

1911

1912

BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))

1913

? EndBlock

1914

: LoadCmpBlocks[Index + 1];

1915

// Early exit branch if difference found to ResultBlock. Otherwise,

1916

// continue to next LoadCmpBlock or EndBlock.

1917

BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);

1918

Builder.Insert(CmpBr);

1919

1920

// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0

1921

// since early exit to ResultBlock was not taken (no difference was found in

1922

// any of the bytes).

1923

if (Index == LoadCmpBlocks.size() - 1) {

1924

Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);

1925

PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);

1926

}

1927

}

1928

1929

// This function creates the IR intructions for loading and comparing using the

1930

// given LoadSize. It loads the number of bytes specified by LoadSize from each

1931

// source of the memcmp parameters. It then does a subtract to see if there was

1932

// a difference in the loaded values. If a difference is found, it branches

1933

// with an early exit to the ResultBlock for calculating which source was

1934

// larger. Otherwise, it falls through to the either the next LoadCmpBlock or

1935

// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with

1936

// a special case through emitLoadCompareByteBlock. The special handling can

1937

// simply subtract the loaded values and add it to the result phi node.

1938

void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,

1939

unsigned GEPIndex) {

1940

if (LoadSize == 1) {

1941

MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);

1942

return;

1943

}

1944

1945

Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);

1946

Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);

1947

1948

1949

Value *Source1 = CI->getArgOperand(0);

1950

Value *Source2 = CI->getArgOperand(1);

1951

1952

Builder.SetInsertPoint(LoadCmpBlocks[Index]);

1953

// Cast source to LoadSizeType*.

1954

if (Source1->getType() != LoadSizeType)

1955

Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());

1956

if (Source2->getType() != LoadSizeType)

1957

Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

1958

1959

// Get the base address using the GEPIndex.

1960

if (GEPIndex != 0) {

1961

Source1 = Builder.CreateGEP(LoadSizeType, Source1,

1962

ConstantInt::get(LoadSizeType, GEPIndex));

1963

Source2 = Builder.CreateGEP(LoadSizeType, Source2,

1964

ConstantInt::get(LoadSizeType, GEPIndex));

1965

}

1966

1967

// Load LoadSizeType from the base address.

1968

Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

1969

Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

1970

1971

if (DL.isLittleEndian()) {

1972

Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),

1973

Intrinsic::bswap, LoadSizeType);

1974

LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);

1975

LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);

1976

}

1977

1978

if (LoadSizeType != MaxLoadType) {

1979

LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);

1980

LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);

1981

}

1982

1983

// Add the loaded values to the phi nodes for calculating memcmp result only

1984

// if result is not used in a zero equality.

1985

if (!IsUsedForZeroCmp) {

1986

ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);

1987

ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);

1988

}

1989

1990

Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);

1991

1992

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,

1993

ConstantInt::get(Diff->getType(), 0));

1994

BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))

1995

? EndBlock

1996

: LoadCmpBlocks[Index + 1];

1997

// Early exit branch if difference found to ResultBlock. Otherwise, continue

1998

// to next LoadCmpBlock or EndBlock.

1999

BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);

2000

Builder.Insert(CmpBr);

2001

2002

// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0

2003

// since early exit to ResultBlock was not taken (no difference was found in

2004

// any of the bytes).

2005

if (Index == LoadCmpBlocks.size() - 1) {

2006

Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);

2007

PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);

2008

}

2009

}

2010

2011

// This function populates the ResultBlock with a sequence to calculate the

2012

// memcmp result. It compares the two loaded source values and returns -1 if

2013

// src1 < src2 and 1 if src1 > src2.

2014

void MemCmpExpansion::emitMemCmpResultBlock() {

2015

// Special case: if memcmp result is used in a zero equality, result does not

2016

// need to be calculated and can simply return 1.

2017

if (IsUsedForZeroCmp) {

2018

BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();

2019

Builder.SetInsertPoint(ResBlock.BB, InsertPt);

2020

Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);

2021

PhiRes->addIncoming(Res, ResBlock.BB);

2022

BranchInst *NewBr = BranchInst::Create(EndBlock);

2023

Builder.Insert(NewBr);

2024

return;

2025

}

2026

BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();

2027

Builder.SetInsertPoint(ResBlock.BB, InsertPt);

2028

2029

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,

2030

ResBlock.PhiSrc2);

2031

2032

Value *Res =

2033

Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),

2034

ConstantInt::get(Builder.getInt32Ty(), 1));

2035

2036

BranchInst *NewBr = BranchInst::Create(EndBlock);

2037

Builder.Insert(NewBr);

2038

PhiRes->addIncoming(Res, ResBlock.BB);

2039

}

2040

2041

unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {

2042

unsigned NumBlocks = 0;

2043

bool HaveOneByteLoad = false;

2044

unsigned RemainingSize = Size;

2045

unsigned LoadSize = MaxLoadSize;

2046

while (RemainingSize) {

2047

if (LoadSize == 1)

2048

HaveOneByteLoad = true;

2049

NumBlocks += RemainingSize / LoadSize;

2050

RemainingSize = RemainingSize % LoadSize;

2051

LoadSize = LoadSize / 2;

2052

}

2053

NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;

2054

2055

if (IsUsedForZeroCmp)

2056

NumBlocks = NumBlocks / NumLoadsPerBlock +

2057

(NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);

2058

2059

return NumBlocks;

2060

}

2061

2062

void MemCmpExpansion::setupResultBlockPHINodes() {

2063

Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);

2064

Builder.SetInsertPoint(ResBlock.BB);

2065

ResBlock.PhiSrc1 =

2066

Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");

2067

ResBlock.PhiSrc2 =

2068

Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");

2069

}

2070

2071

void MemCmpExpansion::setupEndBlockPHINodes() {

2072

Builder.SetInsertPoint(&EndBlock->front());

2073

PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");

2074

}

2075

2076

Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {

2077

unsigned NumBytesProcessed = 0;

2078

// This loop populates each of the LoadCmpBlocks with the IR sequence to

2079

// handle multiple loads per block.

2080

for (unsigned i = 0; i < NumBlocks; ++i)

2081

emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);

2082

2083

emitMemCmpResultBlock();

2084

return PhiRes;

2085

}

2086

2087

/// A memcmp expansion that compares equality with 0 and only has one block of

2088

/// load and compare can bypass the compare, branch, and phi IR that is required

2089

/// in the general case.

2090

Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {

2091

unsigned NumBytesProcessed = 0;

2092

Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);

2093

return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));

2094

}

2095

2096

// This function expands the memcmp call into an inline expansion and returns

2097

// the memcmp result.

2098

Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {

2099

if (IsUsedForZeroCmp)

2100

return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :

2101

getMemCmpExpansionZeroCase(Size);

2102

2103

// This loop calls emitLoadCompareBlock for comparing Size bytes of the two

2104

// memcmp sources. It starts with loading using the maximum load size set by

2105

// the target. It processes any remaining bytes using a load size which is the

2106

// next smallest power of 2.

2107

unsigned LoadSize = MaxLoadSize;

2108

unsigned NumBytesToBeProcessed = Size;

2109

unsigned Index = 0;

2110

while (NumBytesToBeProcessed) {

2111

// Calculate how many blocks we can create with the current load size.

2112

unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;

2113

unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;

2114

NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;

2115

2116

// For each NumBlocks, populate the instruction sequence for loading and

2117

// comparing LoadSize bytes.

2118

while (NumBlocks--) {

2119

emitLoadCompareBlock(Index, LoadSize, GEPIndex);

2120

Index++;

2121

GEPIndex++;

2122

}

2123

// Get the next LoadSize to use.

2124

LoadSize = LoadSize / 2;

2125

}

2126

2127

emitMemCmpResultBlock();

2128

return PhiRes;

2129

}

2130

2131

// This function checks to see if an expansion of memcmp can be generated.

2132

// It checks for constant compare size that is less than the max inline size.

2133

// If an expansion cannot occur, returns false to leave as a library call.

2134

// Otherwise, the library call is replaced with a new IR instruction sequence.

2135

/// We want to transform:

2136

/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)

2137

/// To:

2138

/// loadbb:

2139

/// %0 = bitcast i32* %buffer2 to i8*

2140

/// %1 = bitcast i32* %buffer1 to i8*

2141

/// %2 = bitcast i8* %1 to i64*

2142

/// %3 = bitcast i8* %0 to i64*

2143

/// %4 = load i64, i64* %2

2144

/// %5 = load i64, i64* %3

2145

/// %6 = call i64 @llvm.bswap.i64(i64 %4)

2146

/// %7 = call i64 @llvm.bswap.i64(i64 %5)

2147

/// %8 = sub i64 %6, %7

2148

/// %9 = icmp ne i64 %8, 0

2149

/// br i1 %9, label %res_block, label %loadbb1

2150

/// res_block: ; preds = %loadbb2,

2151

/// %loadbb1, %loadbb

2152

/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]

2153

/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]

2154

/// %10 = icmp ult i64 %phi.src1, %phi.src2

2155

/// %11 = select i1 %10, i32 -1, i32 1

2156

/// br label %endblock

2157

/// loadbb1: ; preds = %loadbb

2158

/// %12 = bitcast i32* %buffer2 to i8*

2159

/// %13 = bitcast i32* %buffer1 to i8*

2160

/// %14 = bitcast i8* %13 to i32*

2161

/// %15 = bitcast i8* %12 to i32*

2162

/// %16 = getelementptr i32, i32* %14, i32 2

2163

/// %17 = getelementptr i32, i32* %15, i32 2

2164

/// %18 = load i32, i32* %16

2165

/// %19 = load i32, i32* %17

2166

/// %20 = call i32 @llvm.bswap.i32(i32 %18)

2167

/// %21 = call i32 @llvm.bswap.i32(i32 %19)

2168

/// %22 = zext i32 %20 to i64

2169

/// %23 = zext i32 %21 to i64

2170

/// %24 = sub i64 %22, %23

2171

/// %25 = icmp ne i64 %24, 0

2172

/// br i1 %25, label %res_block, label %loadbb2

2173

/// loadbb2: ; preds = %loadbb1

2174

/// %26 = bitcast i32* %buffer2 to i8*

2175

/// %27 = bitcast i32* %buffer1 to i8*

2176

/// %28 = bitcast i8* %27 to i16*

2177

/// %29 = bitcast i8* %26 to i16*

2178

/// %30 = getelementptr i16, i16* %28, i16 6

2179

/// %31 = getelementptr i16, i16* %29, i16 6

2180

/// %32 = load i16, i16* %30

2181

/// %33 = load i16, i16* %31

2182

/// %34 = call i16 @llvm.bswap.i16(i16 %32)

2183

/// %35 = call i16 @llvm.bswap.i16(i16 %33)

2184

/// %36 = zext i16 %34 to i64

2185

/// %37 = zext i16 %35 to i64

2186

/// %38 = sub i64 %36, %37

2187

/// %39 = icmp ne i64 %38, 0

2188

/// br i1 %39, label %res_block, label %loadbb3

2189

/// loadbb3: ; preds = %loadbb2

2190

/// %40 = bitcast i32* %buffer2 to i8*

2191

/// %41 = bitcast i32* %buffer1 to i8*

2192

/// %42 = getelementptr i8, i8* %41, i8 14

2193

/// %43 = getelementptr i8, i8* %40, i8 14

2194

/// %44 = load i8, i8* %42

2195

/// %45 = load i8, i8* %43

2196

/// %46 = zext i8 %44 to i32

2197

/// %47 = zext i8 %45 to i32

2198

/// %48 = sub i32 %46, %47

2199

/// br label %endblock

2200

/// endblock: ; preds = %res_block,

2201

/// %loadbb3

2202

/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]

2203

/// ret i32 %phi.res

2204

static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,

2205

const TargetLowering *TLI, const DataLayout *DL) {

2206

NumMemCmpCalls++;

2207

2208

// TTI call to check if target would like to expand memcmp. Also, get the

2209

// MaxLoadSize.

2210

unsigned MaxLoadSize;

2211

if (!TTI->expandMemCmp(CI, MaxLoadSize))

←

Assuming the condition is false

→

←

Taking false branch

→

2212

return false;

2213

2214

// Early exit from expansion if -Oz.

2215

if (CI->getFunction()->optForMinSize())

←

Assuming the condition is false

→

←

Taking false branch

→

2216

return false;

2217

2218

// Early exit from expansion if size is not a constant.

2219

ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));

2220

if (!SizeCast) {

←

Assuming 'SizeCast' is non-null

→

←

Taking false branch

→

2221

NumMemCmpNotConstant++;

2222

return false;

2223

}

2224

2225

// Early exit from expansion if size greater than max bytes to load.

2226

uint64_t SizeVal = SizeCast->getZExtValue();

2227

unsigned NumLoads = 0;

2228

unsigned RemainingSize = SizeVal;

2229

unsigned LoadSize = MaxLoadSize;

2230

while (RemainingSize) {

←

Loop condition is false. Execution continues on line 2236

→

2231

NumLoads += RemainingSize / LoadSize;

2232

RemainingSize = RemainingSize % LoadSize;

2233

LoadSize = LoadSize / 2;

2234

}

2235

2236

if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {

←

Called C++ object pointer is null

2237

NumMemCmpGreaterThanMax++;

2238

return false;

2239

}

2240

2241

NumMemCmpInlined++;

2242

2243

// MemCmpHelper object creates and sets up basic blocks required for

2244

// expanding memcmp with size SizeVal.

2245

unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;

2246

MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);

2247

2248

Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);

2249

2250

// Replace call with result of expansion and erase call.

2251

CI->replaceAllUsesWith(Res);

2252

CI->eraseFromParent();

2253

2254

return true;

2255

}

2256

2257

bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {

2258

BasicBlock *BB = CI->getParent();

2259

2260

// Lower inline assembly if we can.

2261

// If we found an inline asm expession, and if the target knows how to

2262

// lower it to normal LLVM code, do so now.

2263

if (TLI && isa<InlineAsm>(CI->getCalledValue())) {

Assuming pointer value is null

→

←

Taking false branch

→

2264

if (TLI->ExpandInlineAsm(CI)) {

2265

// Avoid invalidating the iterator.

2266

CurInstIterator = BB->begin();

2267

// Avoid processing instructions out of order, which could cause

2268

// reuse before a value is defined.

2269

SunkAddrs.clear();

2270

return true;

2271

}

2272

// Sink address computing for memory operands into the block.

2273

if (optimizeInlineAsmInst(CI))

2274

return true;

2275

}

2276

2277

// Align the pointer arguments to this call if the target thinks it's a good

2278

// idea

2279

unsigned MinSize, PrefAlign;

2280

if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {

2281

for (auto &Arg : CI->arg_operands()) {

2282

// We want to align both objects whose address is used directly and

2283

// objects whose address is used in casts and GEPs, though it only makes

2284

// sense for GEPs if the offset is a multiple of the desired alignment and

2285

// if size - offset meets the size threshold.

2286

if (!Arg->getType()->isPointerTy())

2287

continue;

2288

APInt Offset(DL->getPointerSizeInBits(

2289

cast<PointerType>(Arg->getType())->getAddressSpace()),

2290

0);

2291

Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);

2292

uint64_t Offset2 = Offset.getLimitedValue();

2293

if ((Offset2 & (PrefAlign-1)) != 0)

2294

continue;

2295

AllocaInst *AI;

2296

if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&

2297

DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)

2298

AI->setAlignment(PrefAlign);

2299

// Global variables can only be aligned if they are defined in this

2300

// object (i.e. they are uniquely initialized in this object), and

2301

// over-aligning global variables that have an explicit section is

2302

// forbidden.

2303

GlobalVariable *GV;

2304

if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&

2305

GV->getPointerAlignment(*DL) < PrefAlign &&

2306

DL->getTypeAllocSize(GV->getValueType()) >=

2307

MinSize + Offset2)

2308

GV->setAlignment(PrefAlign);

2309

}

2310

// If this is a memcpy (or similar) then we may be able to improve the

2311

// alignment

2312

if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {

2313

unsigned Align = getKnownAlignment(MI->getDest(), *DL);

2314

if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))

2315

Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));

2316

if (Align > MI->getAlignment())

2317

MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));

2318

}

2319

}

2320

2321

// If we have a cold call site, try to sink addressing computation into the

2322

// cold block. This interacts with our handling for loads and stores to

2323

// ensure that we can fold all uses of a potential addressing computation

2324

// into their uses. TODO: generalize this to work over profiling data

2325

if (!OptSize && CI->hasFnAttr(Attribute::Cold))

←

Assuming the condition is false

→

2326

for (auto &Arg : CI->arg_operands()) {

2327

if (!Arg->getType()->isPointerTy())

2328

continue;

2329

unsigned AS = Arg->getType()->getPointerAddressSpace();

2330

return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);

2331

}

2332

2333

IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);

2334

if (II) {

←

Taking false branch

→

2335

switch (II->getIntrinsicID()) {

2336

default: break;

2337

case Intrinsic::objectsize: {

2338

// Lower all uses of llvm.objectsize.*

2339

ConstantInt *RetVal =

2340

lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);

2341

// Substituting this can cause recursive simplifications, which can

2342

// invalidate our iterator. Use a WeakTrackingVH to hold onto it in case

2343

// this

2344

// happens.

2345

Value *CurValue = &*CurInstIterator;

2346

WeakTrackingVH IterHandle(CurValue);

2347

2348

replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);

2349

2350

// If the iterator instruction was recursively deleted, start over at the

2351

// start of the block.

2352

if (IterHandle != CurValue) {

2353

CurInstIterator = BB->begin();

2354

SunkAddrs.clear();

2355

}

2356

return true;

2357

}

2358

case Intrinsic::aarch64_stlxr:

2359

case Intrinsic::aarch64_stxr: {

2360

ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));

2361

if (!ExtVal || !ExtVal->hasOneUse() ||

2362

ExtVal->getParent() == CI->getParent())

2363

return false;

2364

// Sink a zext feeding stlxr/stxr before it, so it can be folded into it.

2365

ExtVal->moveBefore(CI);

2366

// Mark this instruction as "inserted by CGP", so that other

2367

// optimizations don't touch it.

2368

InsertedInsts.insert(ExtVal);

2369

return true;

2370

}

2371

case Intrinsic::invariant_group_barrier:

2372

II->replaceAllUsesWith(II->getArgOperand(0));

2373

II->eraseFromParent();

2374

return true;

2375

2376

case Intrinsic::cttz:

2377

case Intrinsic::ctlz:

2378

// If counting zeros is expensive, try to avoid it.

2379

return despeculateCountZeros(II, TLI, DL, ModifiedDT);

2380

}

2381

2382

if (TLI) {

2383

SmallVector<Value*, 2> PtrOps;

2384

Type *AccessTy;

2385

if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))

2386

while (!PtrOps.empty()) {

2387

Value *PtrVal = PtrOps.pop_back_val();

2388

unsigned AS = PtrVal->getType()->getPointerAddressSpace();

2389

if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))

2390

return true;

2391

}

2392

}

2393

}

2394

2395

// From here on out we're working with named functions.

2396

if (!CI->getCalledFunction()) return false;

←

Assuming the condition is false

→

←

Taking false branch

→

2397

2398

// Lower all default uses of _chk calls. This is very similar

2399

// to what InstCombineCalls does, but here we are only lowering calls

2400

// to fortified library functions (e.g. __memcpy_chk) that have the default

2401

// "don't know" as the objectsize. Anything else should be left alone.

2402

FortifiedLibCallSimplifier Simplifier(TLInfo, true);

2403

if (Value *V = Simplifier.optimizeCall(CI)) {

Assuming 'V' is null

Taking false branch

2404

CI->replaceAllUsesWith(V);

2405

CI->eraseFromParent();

2406

return true;

2407

}

2408

2409

LibFunc Func;

2410

if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&

←

Assuming the condition is true

→

2411

Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {

←

Assuming 'Func' is equal to LibFunc_memcmp

→

←

Passing null pointer value via 3rd parameter 'TLI'

→

←

Calling 'expandMemCmp'

→

2412

ModifiedDT = true;

2413

return true;

2414

}

2415

return false;

2416

}

2417

2418

/// Look for opportunities to duplicate return instructions to the predecessor

2419

/// to enable tail call optimizations. The case it is currently looking for is:

2420

/// @code

2421

/// bb0:

2422

/// %tmp0 = tail call i32 @f0()

2423

/// br label %return

2424

/// bb1:

2425

/// %tmp1 = tail call i32 @f1()

2426

/// br label %return

2427

/// bb2:

2428

/// %tmp2 = tail call i32 @f2()

2429

/// br label %return

2430

/// return:

2431

/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]

2432

/// ret i32 %retval

2433

/// @endcode

2434

///

2435

/// =>

2436

///

2437

/// @code

2438

/// bb0:

2439

/// %tmp0 = tail call i32 @f0()

2440

/// ret i32 %tmp0

2441

/// bb1:

2442

/// %tmp1 = tail call i32 @f1()

2443

/// ret i32 %tmp1

2444

/// bb2:

2445

/// %tmp2 = tail call i32 @f2()

2446

/// ret i32 %tmp2

2447

/// @endcode

2448

bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {

2449

if (!TLI)

2450

return false;

2451

2452

ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());

2453

if (!RetI)

2454

return false;

2455

2456

PHINode *PN = nullptr;

2457

BitCastInst *BCI = nullptr;

2458

Value *V = RetI->getReturnValue();

2459

if (V) {

2460

BCI = dyn_cast<BitCastInst>(V);

2461

if (BCI)

2462

V = BCI->getOperand(0);

2463

2464

PN = dyn_cast<PHINode>(V);

2465

if (!PN)

2466

return false;

2467

}

2468

2469

if (PN && PN->getParent() != BB)

2470

return false;

2471

2472

// Make sure there are no instructions between the PHI and return, or that the

2473

// return is the first instruction in the block.

2474

if (PN) {

2475

BasicBlock::iterator BI = BB->begin();

2476

do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));

2477

if (&*BI == BCI)

2478

// Also skip over the bitcast.

2479

++BI;

2480

if (&*BI != RetI)

2481

return false;

2482

} else {

2483

BasicBlock::iterator BI = BB->begin();

2484

while (isa<DbgInfoIntrinsic>(BI)) ++BI;

2485

if (&*BI != RetI)

2486

return false;

2487

}

2488

2489

/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail

2490

/// call.

2491

const Function *F = BB->getParent();

2492

SmallVector<CallInst*, 4> TailCalls;

2493

if (PN) {

2494

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {

2495

CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));

2496

// Make sure the phi value is indeed produced by the tail call.

2497

if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&

2498

TLI->mayBeEmittedAsTailCall(CI) &&

2499

attributesPermitTailCall(F, CI, RetI, *TLI))

2500

TailCalls.push_back(CI);

2501

}

2502

} else {

2503

SmallPtrSet<BasicBlock*, 4> VisitedBBs;

2504

for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {

2505

if (!VisitedBBs.insert(*PI).second)

2506

continue;

2507

2508

BasicBlock::InstListType &InstList = (*PI)->getInstList();

2509

BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();

2510

BasicBlock::InstListType::reverse_iterator RE = InstList.rend();

2511

do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));

2512

if (RI == RE)

2513

continue;

2514

2515

CallInst *CI = dyn_cast<CallInst>(&*RI);

2516

if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&

2517

attributesPermitTailCall(F, CI, RetI, *TLI))

2518

TailCalls.push_back(CI);

2519

}

2520

}

2521

2522

bool Changed = false;

2523

for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {

2524

CallInst *CI = TailCalls[i];

2525

CallSite CS(CI);

2526

2527

// Conservatively require the attributes of the call to match those of the

2528

// return. Ignore noalias because it doesn't affect the call sequence.

2529

AttributeList CalleeAttrs = CS.getAttributes();

2530

if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)

2531

.removeAttribute(Attribute::NoAlias) !=

2532

AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)

2533

.removeAttribute(Attribute::NoAlias))

2534

continue;

2535

2536

// Make sure the call instruction is followed by an unconditional branch to

2537

// the return block.

2538

BasicBlock *CallBB = CI->getParent();

2539

BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());

2540

if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)

2541

continue;

2542

2543

// Duplicate the return into CallBB.

2544

(void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);

2545

ModifiedDT = Changed = true;

2546

++NumRetsDup;

2547

}

2548

2549

// If we eliminated all predecessors of the block, delete the block now.

2550

if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))

2551

BB->eraseFromParent();

2552

2553

return Changed;

2554

}

2555

2556

//===----------------------------------------------------------------------===//

2557

// Memory Optimization

2558

//===----------------------------------------------------------------------===//

2559

2560

namespace {

2561

2562

/// This is an extended version of TargetLowering::AddrMode

2563

/// which holds actual Value*'s for register values.

2564

struct ExtAddrMode : public TargetLowering::AddrMode {

2565

Value *BaseReg;

2566

Value *ScaledReg;

2567

ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}

2568

void print(raw_ostream &OS) const;

2569

void dump() const;

2570

2571

bool operator==(const ExtAddrMode& O) const {

2572

return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&

2573

(BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&

2574

(HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);

2575

}

2576

};

2577

2578

#ifndef NDEBUG

2579

static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {

2580

AM.print(OS);

2581

return OS;

2582

}

2583

#endif

2584

2585

void ExtAddrMode::print(raw_ostream &OS) const {

2586

bool NeedPlus = false;

2587

OS << "[";

2588

if (BaseGV) {

2589

OS << (NeedPlus ? " + " : "")

2590

<< "GV:";

2591

BaseGV->printAsOperand(OS, /*PrintType=*/false);

2592

NeedPlus = true;

2593

}

2594

2595

if (BaseOffs) {

2596

OS << (NeedPlus ? " + " : "")

2597

<< BaseOffs;

2598

NeedPlus = true;

2599

}

2600

2601

if (BaseReg) {

2602

OS << (NeedPlus ? " + " : "")

2603

<< "Base:";

2604

BaseReg->printAsOperand(OS, /*PrintType=*/false);

2605

NeedPlus = true;

2606

}

2607

if (Scale) {

2608

OS << (NeedPlus ? " + " : "")

2609

<< Scale << "*";

2610

ScaledReg->printAsOperand(OS, /*PrintType=*/false);

2611

}

2612

2613

OS << ']';

2614

}

2615

2616

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

2617

LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {

2618

print(dbgs());

2619

dbgs() << '\n';

2620

}

2621

#endif

2622

2623

/// \brief This class provides transaction based operation on the IR.

2624

/// Every change made through this class is recorded in the internal state and

2625

/// can be undone (rollback) until commit is called.

2626

class TypePromotionTransaction {

2627

2628

/// \brief This represents the common interface of the individual transaction.

2629

/// Each class implements the logic for doing one specific modification on

2630

/// the IR via the TypePromotionTransaction.

2631

class TypePromotionAction {

2632

protected:

2633

/// The Instruction modified.

2634

Instruction *Inst;

2635

2636

public:

2637

/// \brief Constructor of the action.

2638

/// The constructor performs the related action on the IR.

2639

TypePromotionAction(Instruction *Inst) : Inst(Inst) {}

2640

2641

virtual ~TypePromotionAction() {}

2642

2643

/// \brief Undo the modification done by this action.

2644

/// When this method is called, the IR must be in the same state as it was

2645

/// before this action was applied.

2646

/// \pre Undoing the action works if and only if the IR is in the exact same

2647

/// state as it was directly after this action was applied.

2648

virtual void undo() = 0;

2649

2650

/// \brief Advocate every change made by this action.

2651

/// When the results on the IR of the action are to be kept, it is important

2652

/// to call this function, otherwise hidden information may be kept forever.

2653

virtual void commit() {

2654

// Nothing to be done, this action is not doing anything.

2655

}

2656

};

2657

2658

/// \brief Utility to remember the position of an instruction.

2659

class InsertionHandler {

2660

/// Position of an instruction.

2661

/// Either an instruction:

2662

/// - Is the first in a basic block: BB is used.

2663

/// - Has a previous instructon: PrevInst is used.

2664

union {

2665

Instruction *PrevInst;

2666

BasicBlock *BB;

2667

} Point;

2668

/// Remember whether or not the instruction had a previous instruction.

2669

bool HasPrevInstruction;

2670

2671

public:

2672

/// \brief Record the position of \p Inst.

2673

InsertionHandler(Instruction *Inst) {

2674

BasicBlock::iterator It = Inst->getIterator();

2675

HasPrevInstruction = (It != (Inst->getParent()->begin()));

2676

if (HasPrevInstruction)

2677

Point.PrevInst = &*--It;

2678

else

2679

Point.BB = Inst->getParent();

2680

}

2681

2682

/// \brief Insert \p Inst at the recorded position.

2683

void insert(Instruction *Inst) {

2684

if (HasPrevInstruction) {

2685

if (Inst->getParent())

2686

Inst->removeFromParent();

2687

Inst->insertAfter(Point.PrevInst);

2688

} else {

2689

Instruction *Position = &*Point.BB->getFirstInsertionPt();

2690

if (Inst->getParent())

2691

Inst->moveBefore(Position);

2692

else

2693

Inst->insertBefore(Position);

2694

}

2695

}

2696

};

2697

2698

/// \brief Move an instruction before another.

2699

class InstructionMoveBefore : public TypePromotionAction {

2700

/// Original position of the instruction.

2701

InsertionHandler Position;

2702

2703

public:

2704

/// \brief Move \p Inst before \p Before.

2705

InstructionMoveBefore(Instruction *Inst, Instruction *Before)

2706

: TypePromotionAction(Inst), Position(Inst) {

2707

DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false);

2708

Inst->moveBefore(Before);

2709

}

2710

2711

/// \brief Move the instruction back to its original position.

2712

void undo() override {

2713

DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false);

2714

Position.insert(Inst);

2715

}

2716

};

2717

2718

/// \brief Set the operand of an instruction with a new value.

2719

class OperandSetter : public TypePromotionAction {

2720

/// Original operand of the instruction.

2721

Value *Origin;

2722

/// Index of the modified instruction.

2723

unsigned Idx;

2724

2725

public:

2726

/// \brief Set \p Idx operand of \p Inst with \p NewVal.

2727

OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)

2728

: TypePromotionAction(Inst), Idx(Idx) {

2729

DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)

2730

<< "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)

2731

<< "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false);

2732

Origin = Inst->getOperand(Idx);

2733

Inst->setOperand(Idx, NewVal);

2734

}

2735

2736

/// \brief Restore the original value of the instruction.

2737

void undo() override {

2738

DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)

2739

<< "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)

2740

<< "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false);

2741

Inst->setOperand(Idx, Origin);

2742

}

2743

};

2744

2745

/// \brief Hide the operands of an instruction.

2746

/// Do as if this instruction was not using any of its operands.

2747

class OperandsHider : public TypePromotionAction {

2748

/// The list of original operands.

2749

SmallVector<Value *, 4> OriginalValues;

2750

2751

public:

2752

/// \brief Remove \p Inst from the uses of the operands of \p Inst.

2753

OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {

2754

DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false);

2755

unsigned NumOpnds = Inst->getNumOperands();

2756

OriginalValues.reserve(NumOpnds);

2757

for (unsigned It = 0; It < NumOpnds; ++It) {

2758

// Save the current operand.

2759

Value *Val = Inst->getOperand(It);

2760

OriginalValues.push_back(Val);

2761

// Set a dummy one.

2762

// We could use OperandSetter here, but that would imply an overhead

2763

// that we are not willing to pay.

2764

Inst->setOperand(It, UndefValue::get(Val->getType()));

2765

}

2766

}

2767

2768

/// \brief Restore the original list of uses.

2769

void undo() override {

2770

DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false);

2771

for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)

2772

Inst->setOperand(It, OriginalValues[It]);

2773

}

2774

};

2775

2776

/// \brief Build a truncate instruction.

2777

class TruncBuilder : public TypePromotionAction {

2778

Value *Val;

2779

public:

2780

/// \brief Build a truncate instruction of \p Opnd producing a \p Ty

2781

/// result.

2782

/// trunc Opnd to Ty.

2783

TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {

2784

IRBuilder<> Builder(Opnd);

2785

Val = Builder.CreateTrunc(Opnd, Ty, "promoted");

2786

DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false);

2787

}

2788

2789

/// \brief Get the built value.

2790

Value *getBuiltValue() { return Val; }

2791

2792

/// \brief Remove the built instruction.

2793

void undo() override {

2794

DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false);

2795

if (Instruction *IVal = dyn_cast<Instruction>(Val))

2796

IVal->eraseFromParent();

2797

}

2798

};

2799

2800

/// \brief Build a sign extension instruction.

2801

class SExtBuilder : public TypePromotionAction {

2802

Value *Val;

2803

public:

2804

/// \brief Build a sign extension instruction of \p Opnd producing a \p Ty

2805

/// result.

2806

/// sext Opnd to Ty.

2807

SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)

2808

: TypePromotionAction(InsertPt) {

2809

IRBuilder<> Builder(InsertPt);

2810

Val = Builder.CreateSExt(Opnd, Ty, "promoted");

2811

DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false);

2812

}

2813

2814

/// \brief Get the built value.

2815

Value *getBuiltValue() { return Val; }

2816

2817

/// \brief Remove the built instruction.

2818

void undo() override {

2819

DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false);

2820

if (Instruction *IVal = dyn_cast<Instruction>(Val))

2821

IVal->eraseFromParent();

2822

}

2823

};

2824

2825

/// \brief Build a zero extension instruction.

2826

class ZExtBuilder : public TypePromotionAction {

2827

Value *Val;

2828

public:

2829

/// \brief Build a zero extension instruction of \p Opnd producing a \p Ty

2830

/// result.

2831

/// zext Opnd to Ty.

2832

ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)

2833

: TypePromotionAction(InsertPt) {

2834

IRBuilder<> Builder(InsertPt);

2835

Val = Builder.CreateZExt(Opnd, Ty, "promoted");

2836

DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false);

2837

}

2838

2839

/// \brief Get the built value.

2840

Value *getBuiltValue() { return Val; }

2841

2842

/// \brief Remove the built instruction.

2843

void undo() override {

2844

DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false);

2845

if (Instruction *IVal = dyn_cast<Instruction>(Val))

2846

IVal->eraseFromParent();

2847

}

2848

};

2849

2850

/// \brief Mutate an instruction to another type.

2851

class TypeMutator : public TypePromotionAction {

2852

/// Record the original type.

2853

Type *OrigTy;

2854

2855

public:

2856

/// \brief Mutate the type of \p Inst into \p NewTy.

2857

TypeMutator(Instruction *Inst, Type *NewTy)

2858

: TypePromotionAction(Inst), OrigTy(Inst->getType()) {

2859

DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)

2860

<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false);

2861

Inst->mutateType(NewTy);

2862

}

2863

2864

/// \brief Mutate the instruction back to its original type.

2865

void undo() override {

2866

DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)

2867

<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false);

2868

Inst->mutateType(OrigTy);

2869

}

2870

};

2871

2872

/// \brief Replace the uses of an instruction by another instruction.

2873

class UsesReplacer : public TypePromotionAction {

2874

/// Helper structure to keep track of the replaced uses.

2875

struct InstructionAndIdx {

2876

/// The instruction using the instruction.

2877

Instruction *Inst;

2878

/// The index where this instruction is used for Inst.

2879

unsigned Idx;

2880

InstructionAndIdx(Instruction *Inst, unsigned Idx)

2881

: Inst(Inst), Idx(Idx) {}

2882

};

2883

2884

/// Keep track of the original uses (pair Instruction, Index).

2885

SmallVector<InstructionAndIdx, 4> OriginalUses;

2886

typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;

2887

2888

public:

2889

/// \brief Replace all the use of \p Inst by \p New.

2890

UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {

2891

DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)

2892

<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false);

2893

// Record the original uses.

2894

for (Use &U : Inst->uses()) {

2895

Instruction *UserI = cast<Instruction>(U.getUser());

2896

OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));

2897

}

2898

// Now, we can replace the uses.

2899

Inst->replaceAllUsesWith(New);

2900

}

2901

2902

/// \brief Reassign the original uses of Inst to Inst.

2903

void undo() override {

2904

DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false);

2905

for (use_iterator UseIt = OriginalUses.begin(),

2906

EndIt = OriginalUses.end();

2907

UseIt != EndIt; ++UseIt) {

2908

UseIt->Inst->setOperand(UseIt->Idx, Inst);

2909

}

2910

}

2911

};

2912

2913

/// \brief Remove an instruction from the IR.

2914

class InstructionRemover : public TypePromotionAction {

2915

/// Original position of the instruction.

2916

InsertionHandler Inserter;

2917

/// Helper structure to hide all the link to the instruction. In other

2918

/// words, this helps to do as if the instruction was removed.

2919

OperandsHider Hider;

2920

/// Keep track of the uses replaced, if any.

2921

UsesReplacer *Replacer;

2922

/// Keep track of instructions removed.

2923

SetOfInstrs &RemovedInsts;

2924

2925

public:

2926

/// \brief Remove all reference of \p Inst and optinally replace all its

2927

/// uses with New.

2928

/// \p RemovedInsts Keep track of the instructions removed by this Action.

2929

/// \pre If !Inst->use_empty(), then New != nullptr

2930

InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,

2931

Value *New = nullptr)

2932

: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),

2933

Replacer(nullptr), RemovedInsts(RemovedInsts) {

2934

if (New)

2935

Replacer = new UsesReplacer(Inst, New);

2936

DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false);

2937

RemovedInsts.insert(Inst);

2938

/// The instructions removed here will be freed after completing

2939

/// optimizeBlock() for all blocks as we need to keep track of the

2940

/// removed instructions during promotion.

2941

Inst->removeFromParent();

2942

}

2943

2944

~InstructionRemover() override { delete Replacer; }

2945

2946

/// \brief Resurrect the instruction and reassign it to the proper uses if

2947

/// new value was provided when build this action.

2948

void undo() override {

2949

DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false);

2950

Inserter.insert(Inst);

2951

if (Replacer)

2952

Replacer->undo();

2953

Hider.undo();

2954

RemovedInsts.erase(Inst);

2955

}

2956

};

2957

2958

public:

2959

/// Restoration point.

2960

/// The restoration point is a pointer to an action instead of an iterator

2961

/// because the iterator may be invalidated but not the pointer.

2962

typedef const TypePromotionAction *ConstRestorationPt;

2963

2964

TypePromotionTransaction(SetOfInstrs &RemovedInsts)

2965

: RemovedInsts(RemovedInsts) {}

2966

2967

/// Advocate every changes made in that transaction.

2968

void commit();

2969

/// Undo all the changes made after the given point.

2970

void rollback(ConstRestorationPt Point);

2971

/// Get the current restoration point.

2972

ConstRestorationPt getRestorationPoint() const;

2973

2974

/// \name API for IR modification with state keeping to support rollback.

2975

/// @{

2976

/// Same as Instruction::setOperand.

2977

void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);

2978

/// Same as Instruction::eraseFromParent.

2979

void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);

2980

/// Same as Value::replaceAllUsesWith.

2981

void replaceAllUsesWith(Instruction *Inst, Value *New);

2982

/// Same as Value::mutateType.

2983

void mutateType(Instruction *Inst, Type *NewTy);

2984

/// Same as IRBuilder::createTrunc.

2985

Value *createTrunc(Instruction *Opnd, Type *Ty);

2986

/// Same as IRBuilder::createSExt.

2987

Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);

2988

/// Same as IRBuilder::createZExt.

2989

Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);

2990

/// Same as Instruction::moveBefore.

2991

void moveBefore(Instruction *Inst, Instruction *Before);

2992

/// @}

2993

2994

private:

2995

/// The ordered list of actions made so far.

2996

SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;

2997

typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;

2998

SetOfInstrs &RemovedInsts;

2999

};

3000

3001

void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,

3002

Value *NewVal) {

3003

Actions.push_back(

3004

make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));

3005

}

3006

3007

void TypePromotionTransaction::eraseInstruction(Instruction *Inst,

3008

Value *NewVal) {

3009

Actions.push_back(

3010

make_unique<TypePromotionTransaction::InstructionRemover>(Inst,

3011

RemovedInsts, NewVal));

3012

}

3013

3014

void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,

3015

Value *New) {

3016

Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));

3017

}

3018

3019

void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {

3020

Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));

3021

}

3022

3023

Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,

3024

Type *Ty) {

3025

std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));

3026

Value *Val = Ptr->getBuiltValue();

3027

Actions.push_back(std::move(Ptr));

3028

return Val;

3029

}

3030

3031

Value *TypePromotionTransaction::createSExt(Instruction *Inst,

3032

Value *Opnd, Type *Ty) {

3033

std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));

3034

Value *Val = Ptr->getBuiltValue();

3035

Actions.push_back(std::move(Ptr));

3036

return Val;

3037

}

3038

3039

Value *TypePromotionTransaction::createZExt(Instruction *Inst,

3040

Value *Opnd, Type *Ty) {

3041

std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));

3042

Value *Val = Ptr->getBuiltValue();

3043

Actions.push_back(std::move(Ptr));

3044

return Val;

3045

}

3046

3047

void TypePromotionTransaction::moveBefore(Instruction *Inst,

3048

Instruction *Before) {

3049

Actions.push_back(

3050

make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));

3051

}

3052

3053

TypePromotionTransaction::ConstRestorationPt

3054

TypePromotionTransaction::getRestorationPoint() const {

3055

return !Actions.empty() ? Actions.back().get() : nullptr;

3056

}

3057

3058

void TypePromotionTransaction::commit() {

3059

for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;

3060

++It)

3061

(*It)->commit();

3062

Actions.clear();

3063

}

3064

3065

void TypePromotionTransaction::rollback(

3066

TypePromotionTransaction::ConstRestorationPt Point) {

3067

while (!Actions.empty() && Point != Actions.back().get()) {

3068

std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();

3069

Curr->undo();

3070

}

3071

}

3072

3073

/// \brief A helper class for matching addressing modes.

3074

///

3075

/// This encapsulates the logic for matching the target-legal addressing modes.

3076

class AddressingModeMatcher {

3077

SmallVectorImpl<Instruction*> &AddrModeInsts;

3078

const TargetLowering &TLI;

3079

const TargetRegisterInfo &TRI;

3080

const DataLayout &DL;

3081

3082

/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and

3083

/// the memory instruction that we're computing this address for.

3084

Type *AccessTy;

3085

unsigned AddrSpace;

3086

Instruction *MemoryInst;

3087

3088

/// This is the addressing mode that we're building up. This is

3089

/// part of the return value of this addressing mode matching stuff.

3090

ExtAddrMode &AddrMode;

3091

3092

/// The instructions inserted by other CodeGenPrepare optimizations.

3093

const SetOfInstrs &InsertedInsts;

3094

/// A map from the instructions to their type before promotion.

3095

InstrToOrigTy &PromotedInsts;

3096

/// The ongoing transaction where every action should be registered.

3097

TypePromotionTransaction &TPT;

3098

3099

/// This is set to true when we should not do profitability checks.

3100

/// When true, IsProfitableToFoldIntoAddressingMode always returns true.

3101

bool IgnoreProfitability;

3102

3103

AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,

3104

const TargetLowering &TLI,

3105

const TargetRegisterInfo &TRI,

3106

Type *AT, unsigned AS,

3107

Instruction *MI, ExtAddrMode &AM,

3108

const SetOfInstrs &InsertedInsts,

3109

InstrToOrigTy &PromotedInsts,

3110

TypePromotionTransaction &TPT)

3111

: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),

3112

DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),

3113

MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),

3114

PromotedInsts(PromotedInsts), TPT(TPT) {

3115

IgnoreProfitability = false;

3116

}

3117

public:

3118

3119

/// Find the maximal addressing mode that a load/store of V can fold,

3120

/// give an access type of AccessTy. This returns a list of involved

3121

/// instructions in AddrModeInsts.

3122

/// \p InsertedInsts The instructions inserted by other CodeGenPrepare

3123

/// optimizations.

3124

/// \p PromotedInsts maps the instructions to their type before promotion.

3125

/// \p The ongoing transaction where every action should be registered.

3126

static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,

3127

Instruction *MemoryInst,

3128

SmallVectorImpl<Instruction*> &AddrModeInsts,

3129

const TargetLowering &TLI,

3130

const TargetRegisterInfo &TRI,

3131

const SetOfInstrs &InsertedInsts,

3132

InstrToOrigTy &PromotedInsts,

3133

TypePromotionTransaction &TPT) {

3134

ExtAddrMode Result;

3135

3136

bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,

3137

AccessTy, AS,

3138

MemoryInst, Result, InsertedInsts,

3139

PromotedInsts, TPT).matchAddr(V, 0);

3140

(void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3140, __PRETTY_FUNCTION__));

3141

return Result;

3142

}

3143

private:

3144

bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);

3145

bool matchAddr(Value *V, unsigned Depth);

3146

bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,

3147

bool *MovedAway = nullptr);

3148

bool isProfitableToFoldIntoAddressingMode(Instruction *I,

3149

ExtAddrMode &AMBefore,

3150

ExtAddrMode &AMAfter);

3151

bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);

3152

bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,

3153

Value *PromotedOperand) const;

3154

};

3155

3156

/// Try adding ScaleReg*Scale to the current addressing mode.

3157

/// Return true and update AddrMode if this addr mode is legal for the target,

3158

/// false if not.

3159

bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,

3160

unsigned Depth) {

3161

// If Scale is 1, then this is the same as adding ScaleReg to the addressing

3162

// mode. Just process that directly.

3163

if (Scale == 1)

3164

return matchAddr(ScaleReg, Depth);

3165

3166

// If the scale is 0, it takes nothing to add this.

3167

if (Scale == 0)

3168

return true;

3169

3170

// If we already have a scale of this value, we can add to it, otherwise, we

3171

// need an available scale field.

3172

if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)

3173

return false;

3174

3175

ExtAddrMode TestAddrMode = AddrMode;

3176

3177

// Add scale to turn X*4+X*3 -> X*7. This could also do things like

3178

// [A+B + A*7] -> [B+A*8].

3179

TestAddrMode.Scale += Scale;

3180

TestAddrMode.ScaledReg = ScaleReg;

3181

3182

// If the new address isn't legal, bail out.

3183

if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))

3184

return false;

3185

3186

// It was legal, so commit it.

3187

AddrMode = TestAddrMode;

3188

3189

// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now

3190

// to see if ScaleReg is actually X+C. If so, we can turn this into adding

3191

// X*Scale + C*Scale to addr mode.

3192

ConstantInt *CI = nullptr; Value *AddLHS = nullptr;

3193

if (isa<Instruction>(ScaleReg) && // not a constant expr.

3194

match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {

3195

TestAddrMode.ScaledReg = AddLHS;

3196

TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;

3197

3198

// If this addressing mode is legal, commit it and remember that we folded

3199

// this instruction.

3200

if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {

3201

AddrModeInsts.push_back(cast<Instruction>(ScaleReg));

3202

AddrMode = TestAddrMode;

3203

return true;

3204

}

3205

}

3206

3207

// Otherwise, not (x+c)*scale, just return what we have.

3208

return true;

3209

}

3210

3211

/// This is a little filter, which returns true if an addressing computation

3212

/// involving I might be folded into a load/store accessing it.

3213

/// This doesn't need to be perfect, but needs to accept at least

3214

/// the set of instructions that MatchOperationAddr can.

3215

static bool MightBeFoldableInst(Instruction *I) {

3216

switch (I->getOpcode()) {

3217

case Instruction::BitCast:

3218

case Instruction::AddrSpaceCast:

3219

// Don't touch identity bitcasts.

3220

if (I->getType() == I->getOperand(0)->getType())

3221

return false;

3222

return I->getType()->isPointerTy() || I->getType()->isIntegerTy();

3223

case Instruction::PtrToInt:

3224

// PtrToInt is always a noop, as we know that the int type is pointer sized.

3225

return true;

3226

case Instruction::IntToPtr:

3227

// We know the input is intptr_t, so this is foldable.

3228

return true;

3229

case Instruction::Add:

3230

return true;

3231

case Instruction::Mul:

3232

case Instruction::Shl:

3233

// Can only handle X*C and X << C.

3234

return isa<ConstantInt>(I->getOperand(1));

3235

case Instruction::GetElementPtr:

3236

return true;

3237

default:

3238

return false;

3239

}

3240

}

3241

3242

/// \brief Check whether or not \p Val is a legal instruction for \p TLI.

3243

/// \note \p Val is assumed to be the product of some type promotion.

3244

/// Therefore if \p Val has an undefined state in \p TLI, this is assumed

3245

/// to be legal, as the non-promoted value would have had the same state.

3246

static bool isPromotedInstructionLegal(const TargetLowering &TLI,

3247

const DataLayout &DL, Value *Val) {

3248

Instruction *PromotedInst = dyn_cast<Instruction>(Val);

3249

if (!PromotedInst)

3250

return false;

3251

int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());

3252

// If the ISDOpcode is undefined, it was undefined before the promotion.

3253

if (!ISDOpcode)

3254

return true;

3255

// Otherwise, check if the promoted instruction is legal or not.

3256

return TLI.isOperationLegalOrCustom(

3257

ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));

3258

}

3259

3260

/// \brief Hepler class to perform type promotion.

3261

class TypePromotionHelper {

3262

/// \brief Utility function to check whether or not a sign or zero extension

3263

/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by

3264

/// either using the operands of \p Inst or promoting \p Inst.

3265

/// The type of the extension is defined by \p IsSExt.

3266

/// In other words, check if:

3267

/// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.

3268

/// #1 Promotion applies:

3269

/// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).

3270

/// #2 Operand reuses:

3271

/// ext opnd1 to ConsideredExtType.

3272

/// \p PromotedInsts maps the instructions to their type before promotion.

3273

static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,

3274

const InstrToOrigTy &PromotedInsts, bool IsSExt);

3275

3276

/// \brief Utility function to determine if \p OpIdx should be promoted when

3277

/// promoting \p Inst.

3278

static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {

3279

return !(isa<SelectInst>(Inst) && OpIdx == 0);

3280

}

3281

3282

/// \brief Utility function to promote the operand of \p Ext when this

3283

/// operand is a promotable trunc or sext or zext.

3284

/// \p PromotedInsts maps the instructions to their type before promotion.

3285

/// \p CreatedInstsCost[out] contains the cost of all instructions

3286

/// created to promote the operand of Ext.

3287

/// Newly added extensions are inserted in \p Exts.

3288

/// Newly added truncates are inserted in \p Truncs.

3289

/// Should never be called directly.

3290

/// \return The promoted value which is used instead of Ext.

3291

static Value *promoteOperandForTruncAndAnyExt(

3292

Instruction *Ext, TypePromotionTransaction &TPT,

3293

InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,

3294

SmallVectorImpl<Instruction *> *Exts,

3295

SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);

3296

3297

/// \brief Utility function to promote the operand of \p Ext when this

3298

/// operand is promotable and is not a supported trunc or sext.

3299

/// \p PromotedInsts maps the instructions to their type before promotion.

3300

/// \p CreatedInstsCost[out] contains the cost of all the instructions

3301

/// created to promote the operand of Ext.

3302

/// Newly added extensions are inserted in \p Exts.

3303

/// Newly added truncates are inserted in \p Truncs.

3304

/// Should never be called directly.

3305

/// \return The promoted value which is used instead of Ext.

3306

static Value *promoteOperandForOther(Instruction *Ext,

3307

TypePromotionTransaction &TPT,

3308

InstrToOrigTy &PromotedInsts,

3309

unsigned &CreatedInstsCost,

3310

SmallVectorImpl<Instruction *> *Exts,

3311

SmallVectorImpl<Instruction *> *Truncs,

3312

const TargetLowering &TLI, bool IsSExt);

3313

3314

/// \see promoteOperandForOther.

3315

static Value *signExtendOperandForOther(

3316

Instruction *Ext, TypePromotionTransaction &TPT,

3317

InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,

3318

SmallVectorImpl<Instruction *> *Exts,

3319

SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {

3320

return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,

3321

Exts, Truncs, TLI, true);

3322

}

3323

3324

/// \see promoteOperandForOther.

3325

static Value *zeroExtendOperandForOther(

3326

Instruction *Ext, TypePromotionTransaction &TPT,

3327

InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,

3328

SmallVectorImpl<Instruction *> *Exts,

3329

SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {

3330

return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,

3331

Exts, Truncs, TLI, false);

3332

}

3333

3334

public:

3335

/// Type for the utility function that promotes the operand of Ext.

3336

typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,

3337

InstrToOrigTy &PromotedInsts,

3338

unsigned &CreatedInstsCost,

3339

SmallVectorImpl<Instruction *> *Exts,

3340

SmallVectorImpl<Instruction *> *Truncs,

3341

const TargetLowering &TLI);

3342

/// \brief Given a sign/zero extend instruction \p Ext, return the approriate

3343

/// action to promote the operand of \p Ext instead of using Ext.

3344

/// \return NULL if no promotable action is possible with the current

3345

/// sign extension.

3346

/// \p InsertedInsts keeps track of all the instructions inserted by the

3347

/// other CodeGenPrepare optimizations. This information is important

3348

/// because we do not want to promote these instructions as CodeGenPrepare

3349

/// will reinsert them later. Thus creating an infinite loop: create/remove.

3350

/// \p PromotedInsts maps the instructions to their type before promotion.

3351

static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,

3352

const TargetLowering &TLI,

3353

const InstrToOrigTy &PromotedInsts);

3354

};

3355

3356

bool TypePromotionHelper::canGetThrough(const Instruction *Inst,

3357

Type *ConsideredExtType,

3358

const InstrToOrigTy &PromotedInsts,

3359

bool IsSExt) {

3360

// The promotion helper does not know how to deal with vector types yet.

3361

// To be able to fix that, we would need to fix the places where we

3362

// statically extend, e.g., constants and such.

3363

if (Inst->getType()->isVectorTy())

3364

return false;

3365

3366

// We can always get through zext.

3367

if (isa<ZExtInst>(Inst))

3368

return true;

3369

3370

// sext(sext) is ok too.

3371

if (IsSExt && isa<SExtInst>(Inst))

3372

return true;

3373

3374

// We can get through binary operator, if it is legal. In other words, the

3375

// binary operator must have a nuw or nsw flag.

3376

const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);

3377

if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&

3378

((!IsSExt && BinOp->hasNoUnsignedWrap()) ||

3379

(IsSExt && BinOp->hasNoSignedWrap())))

3380

return true;

3381

3382

// Check if we can do the following simplification.

3383

// ext(trunc(opnd)) --> ext(opnd)

3384

if (!isa<TruncInst>(Inst))

3385

return false;

3386

3387

Value *OpndVal = Inst->getOperand(0);

3388

// Check if we can use this operand in the extension.

3389

// If the type is larger than the result type of the extension, we cannot.

3390

if (!OpndVal->getType()->isIntegerTy() ||

3391

OpndVal->getType()->getIntegerBitWidth() >

3392

ConsideredExtType->getIntegerBitWidth())

3393

return false;

3394

3395

// If the operand of the truncate is not an instruction, we will not have

3396

// any information on the dropped bits.

3397

// (Actually we could for constant but it is not worth the extra logic).

3398

Instruction *Opnd = dyn_cast<Instruction>(OpndVal);

3399

if (!Opnd)

3400

return false;

3401

3402

// Check if the source of the type is narrow enough.

3403

// I.e., check that trunc just drops extended bits of the same kind of

3404

// the extension.

3405

// #1 get the type of the operand and check the kind of the extended bits.

3406

const Type *OpndType;

3407

InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);

3408

if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)

3409

OpndType = It->second.getPointer();

3410

else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))

3411

OpndType = Opnd->getOperand(0)->getType();

3412

else

3413

return false;

3414

3415

// #2 check that the truncate just drops extended bits.

3416

return Inst->getType()->getIntegerBitWidth() >=

3417

OpndType->getIntegerBitWidth();

3418

}

3419

3420

TypePromotionHelper::Action TypePromotionHelper::getAction(

3421

Instruction *Ext, const SetOfInstrs &InsertedInsts,

3422

const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {

3423

assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3424, __PRETTY_FUNCTION__))

3424

"Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3424, __PRETTY_FUNCTION__));

3425

Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));

3426

Type *ExtTy = Ext->getType();

3427

bool IsSExt = isa<SExtInst>(Ext);

3428

// If the operand of the extension is not an instruction, we cannot

3429

// get through.

3430

// If it, check we can get through.

3431

if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))

3432

return nullptr;

3433

3434

// Do not promote if the operand has been added by codegenprepare.

3435

// Otherwise, it means we are undoing an optimization that is likely to be

3436

// redone, thus causing potential infinite loop.

3437

if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))

3438

return nullptr;

3439

3440

// SExt or Trunc instructions.

3441

// Return the related handler.

3442

if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||

3443

isa<ZExtInst>(ExtOpnd))

3444

return promoteOperandForTruncAndAnyExt;

3445

3446

// Regular instruction.

3447

// Abort early if we will have to insert non-free instructions.

3448

if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))

3449

return nullptr;

3450

return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;

3451

}

3452

3453

Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(

3454

llvm::Instruction *SExt, TypePromotionTransaction &TPT,

3455

InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,

3456

SmallVectorImpl<Instruction *> *Exts,

3457

SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {

3458

// By construction, the operand of SExt is an instruction. Otherwise we cannot

3459

// get through it and this method should not be called.

3460

Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));

3461

Value *ExtVal = SExt;

3462

bool HasMergedNonFreeExt = false;

3463

if (isa<ZExtInst>(SExtOpnd)) {

3464

// Replace s|zext(zext(opnd))

3465

// => zext(opnd).

3466

HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);

3467

Value *ZExt =

3468

TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());

3469

TPT.replaceAllUsesWith(SExt, ZExt);

3470

TPT.eraseInstruction(SExt);

3471

ExtVal = ZExt;

3472

} else {

3473

// Replace z|sext(trunc(opnd)) or sext(sext(opnd))

3474

// => z|sext(opnd).

3475

TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));

3476

}

3477

CreatedInstsCost = 0;

3478

3479

// Remove dead code.

3480

if (SExtOpnd->use_empty())

3481

TPT.eraseInstruction(SExtOpnd);

3482

3483

// Check if the extension is still needed.

3484

Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);

3485

if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {

3486

if (ExtInst) {

3487

if (Exts)

3488

Exts->push_back(ExtInst);

3489

CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;

3490

}

3491

return ExtVal;

3492

}

3493

3494

// At this point we have: ext ty opnd to ty.

3495

// Reassign the uses of ExtInst to the opnd and remove ExtInst.

3496

Value *NextVal = ExtInst->getOperand(0);

3497

TPT.eraseInstruction(ExtInst, NextVal);

3498

return NextVal;

3499

}

3500

3501

Value *TypePromotionHelper::promoteOperandForOther(

3502

Instruction *Ext, TypePromotionTransaction &TPT,

3503

InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,

3504

SmallVectorImpl<Instruction *> *Exts,

3505

SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,

3506

bool IsSExt) {

3507

// By construction, the operand of Ext is an instruction. Otherwise we cannot

3508

// get through it and this method should not be called.

3509

Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));

3510

CreatedInstsCost = 0;

3511

if (!ExtOpnd->hasOneUse()) {

3512

// ExtOpnd will be promoted.

3513

// All its uses, but Ext, will need to use a truncated value of the

3514

// promoted version.

3515

// Create the truncate now.

3516

Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());

3517

if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {

3518

ITrunc->removeFromParent();

3519

// Insert it just after the definition.

3520

ITrunc->insertAfter(ExtOpnd);

3521

if (Truncs)

3522

Truncs->push_back(ITrunc);

3523

}

3524

3525

TPT.replaceAllUsesWith(ExtOpnd, Trunc);

3526

// Restore the operand of Ext (which has been replaced by the previous call

3527

// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.

3528

TPT.setOperand(Ext, 0, ExtOpnd);

3529

}

3530

3531

// Get through the Instruction:

3532

// 1. Update its type.

3533

// 2. Replace the uses of Ext by Inst.

3534

// 3. Extend each operand that needs to be extended.

3535

3536

// Remember the original type of the instruction before promotion.

3537

// This is useful to know that the high bits are sign extended bits.

3538

PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(

3539

ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));

3540

// Step #1.

3541

TPT.mutateType(ExtOpnd, Ext->getType());

3542

// Step #2.

3543

TPT.replaceAllUsesWith(Ext, ExtOpnd);

3544

// Step #3.

3545

Instruction *ExtForOpnd = Ext;

3546

3547

DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false);

3548

for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;

3549

++OpIdx) {

3550

DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
);

3551

if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||

3552

!shouldExtOperand(ExtOpnd, OpIdx)) {

3553

DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false);

3554

continue;

3555

}

3556

// Check if we can statically extend the operand.

3557

Value *Opnd = ExtOpnd->getOperand(OpIdx);

3558

if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {

3559

DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false);

3560

unsigned BitWidth = Ext->getType()->getIntegerBitWidth();

3561

APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)

3562

: Cst->getValue().zext(BitWidth);

3563

TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));

3564

continue;

3565

}

3566

// UndefValue are typed, so we have to statically sign extend them.

3567

if (isa<UndefValue>(Opnd)) {

3568

DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false);

3569

TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));

3570

continue;

3571

}

3572

3573

// Otherwise we have to explicity sign extend the operand.

3574

// Check if Ext was reused to extend an operand.

3575

if (!ExtForOpnd) {

3576

// If yes, create a new one.

3577

DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false);

3578

Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())

3579

: TPT.createZExt(Ext, Opnd, Ext->getType());

3580

if (!isa<Instruction>(ValForExtOpnd)) {

3581

TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);

3582

continue;

3583

}

3584

ExtForOpnd = cast<Instruction>(ValForExtOpnd);

3585

}

3586

if (Exts)

3587

Exts->push_back(ExtForOpnd);

3588

TPT.setOperand(ExtForOpnd, 0, Opnd);

3589

3590

// Move the sign extension before the insertion point.

3591

TPT.moveBefore(ExtForOpnd, ExtOpnd);

3592

TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);

3593

CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);

3594

// If more sext are required, new instructions will have to be created.

3595

ExtForOpnd = nullptr;

3596

}

3597

if (ExtForOpnd == Ext) {

3598

DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false);

3599

TPT.eraseInstruction(Ext);

3600

}

3601

return ExtOpnd;

3602

}

3603

3604

/// Check whether or not promoting an instruction to a wider type is profitable.

3605

/// \p NewCost gives the cost of extension instructions created by the

3606

/// promotion.

3607

/// \p OldCost gives the cost of extension instructions before the promotion

3608

/// plus the number of instructions that have been

3609

/// matched in the addressing mode the promotion.

3610

/// \p PromotedOperand is the value that has been promoted.

3611

/// \return True if the promotion is profitable, false otherwise.

3612

bool AddressingModeMatcher::isPromotionProfitable(

3613

unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {

3614

DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false);

3615

// The cost of the new extensions is greater than the cost of the

3616

// old extension plus what we folded.

3617

// This is not profitable.

3618

if (NewCost > OldCost)

3619

return false;

3620

if (NewCost < OldCost)

3621

return true;

3622

// The promotion is neutral but it may help folding the sign extension in

3623

// loads for instance.

3624

// Check that we did not create an illegal instruction.

3625

return isPromotedInstructionLegal(TLI, DL, PromotedOperand);

3626

}

3627

3628

/// Given an instruction or constant expr, see if we can fold the operation

3629

/// into the addressing mode. If so, update the addressing mode and return

3630

/// true, otherwise return false without modifying AddrMode.

3631

/// If \p MovedAway is not NULL, it contains the information of whether or

3632

/// not AddrInst has to be folded into the addressing mode on success.

3633

/// If \p MovedAway == true, \p AddrInst will not be part of the addressing

3634

/// because it has been moved away.

3635

/// Thus AddrInst must not be added in the matched instructions.

3636

/// This state can happen when AddrInst is a sext, since it may be moved away.

3637

/// Therefore, AddrInst may not be valid when MovedAway is true and it must

3638

/// not be referenced anymore.

3639

bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,

3640

unsigned Depth,

3641

bool *MovedAway) {

3642

// Avoid exponential behavior on extremely deep expression trees.

3643

if (Depth >= 5) return false;

3644

3645

// By default, all matched instructions stay in place.

3646

if (MovedAway)

3647

*MovedAway = false;

3648

3649

switch (Opcode) {

3650

case Instruction::PtrToInt:

3651

// PtrToInt is always a noop, as we know that the int type is pointer sized.

3652

return matchAddr(AddrInst->getOperand(0), Depth);

3653

case Instruction::IntToPtr: {

3654

auto AS = AddrInst->getType()->getPointerAddressSpace();

3655

auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));

3656

// This inttoptr is a no-op if the integer type is pointer sized.

3657

if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)

3658

return matchAddr(AddrInst->getOperand(0), Depth);

3659

return false;

3660

}

3661

case Instruction::BitCast:

3662

// BitCast is always a noop, and we can handle it as long as it is

3663

// int->int or pointer->pointer (we don't want int<->fp or something).

3664

if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||

3665

AddrInst->getOperand(0)->getType()->isIntegerTy()) &&

3666

// Don't touch identity bitcasts. These were probably put here by LSR,

3667

// and we don't want to mess around with them. Assume it knows what it

3668

// is doing.

3669

AddrInst->getOperand(0)->getType() != AddrInst->getType())

3670

return matchAddr(AddrInst->getOperand(0), Depth);

3671

return false;

3672

case Instruction::AddrSpaceCast: {

3673

unsigned SrcAS

3674

= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();

3675

unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();

3676

if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))

3677

return matchAddr(AddrInst->getOperand(0), Depth);

3678

return false;

3679

}

3680

case Instruction::Add: {

3681

// Check to see if we can merge in the RHS then the LHS. If so, we win.

3682

ExtAddrMode BackupAddrMode = AddrMode;

3683

unsigned OldSize = AddrModeInsts.size();

3684

// Start a transaction at this point.

3685

// The LHS may match but not the RHS.

3686

// Therefore, we need a higher level restoration point to undo partially

3687

// matched operation.

3688

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

3689

TPT.getRestorationPoint();

3690

3691

if (matchAddr(AddrInst->getOperand(1), Depth+1) &&

3692

matchAddr(AddrInst->getOperand(0), Depth+1))

3693

return true;

3694

3695

// Restore the old addr mode info.

3696

AddrMode = BackupAddrMode;

3697

AddrModeInsts.resize(OldSize);

3698

TPT.rollback(LastKnownGood);

3699

3700

// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.

3701

if (matchAddr(AddrInst->getOperand(0), Depth+1) &&

3702

matchAddr(AddrInst->getOperand(1), Depth+1))

3703

return true;

3704

3705

// Otherwise we definitely can't merge the ADD in.

3706

AddrMode = BackupAddrMode;

3707

AddrModeInsts.resize(OldSize);

3708

TPT.rollback(LastKnownGood);

3709

break;

3710

}

3711

//case Instruction::Or:

3712

// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.

3713

//break;

3714

case Instruction::Mul:

3715

case Instruction::Shl: {

3716

// Can only handle X*C and X << C.

3717

ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));

3718

if (!RHS)

3719

return false;

3720

int64_t Scale = RHS->getSExtValue();

3721

if (Opcode == Instruction::Shl)

3722

Scale = 1LL << Scale;

3723

3724

return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);

3725

}

3726

case Instruction::GetElementPtr: {

3727

// Scan the GEP. We check it if it contains constant offsets and at most

3728

// one variable offset.

3729

int VariableOperand = -1;

3730

unsigned VariableScale = 0;

3731

3732

int64_t ConstantOffset = 0;

3733

gep_type_iterator GTI = gep_type_begin(AddrInst);

3734

for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {

3735

if (StructType *STy = GTI.getStructTypeOrNull()) {

3736

const StructLayout *SL = DL.getStructLayout(STy);

3737

unsigned Idx =

3738

cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();

3739

ConstantOffset += SL->getElementOffset(Idx);

3740

} else {

3741

uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());

3742

if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {

3743

ConstantOffset += CI->getSExtValue()*TypeSize;

3744

} else if (TypeSize) { // Scales of zero don't do anything.

3745

// We only allow one variable index at the moment.

3746

if (VariableOperand != -1)

3747

return false;

3748

3749

// Remember the variable index.

3750

VariableOperand = i;

3751

VariableScale = TypeSize;

3752

}

3753

}

3754

}

3755

3756

// A common case is for the GEP to only do a constant offset. In this case,

3757

// just add it to the disp field and check validity.

3758

if (VariableOperand == -1) {

3759

AddrMode.BaseOffs += ConstantOffset;

3760

if (ConstantOffset == 0 ||

3761

TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {

3762

// Check to see if we can fold the base pointer in too.

3763

if (matchAddr(AddrInst->getOperand(0), Depth+1))

3764

return true;

3765

}

3766

AddrMode.BaseOffs -= ConstantOffset;

3767

return false;

3768

}

3769

3770

// Save the valid addressing mode in case we can't match.

3771

ExtAddrMode BackupAddrMode = AddrMode;

3772

unsigned OldSize = AddrModeInsts.size();

3773

3774

// See if the scale and offset amount is valid for this target.

3775

AddrMode.BaseOffs += ConstantOffset;

3776

3777

// Match the base operand of the GEP.

3778

if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {

3779

// If it couldn't be matched, just stuff the value in a register.

3780

if (AddrMode.HasBaseReg) {

3781

AddrMode = BackupAddrMode;

3782

AddrModeInsts.resize(OldSize);

3783

return false;

3784

}

3785

AddrMode.HasBaseReg = true;

3786

AddrMode.BaseReg = AddrInst->getOperand(0);

3787

}

3788

3789

// Match the remaining variable portion of the GEP.

3790

if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,

3791

Depth)) {

3792

// If it couldn't be matched, try stuffing the base into a register

3793

// instead of matching it, and retrying the match of the scale.

3794

AddrMode = BackupAddrMode;

3795

AddrModeInsts.resize(OldSize);

3796

if (AddrMode.HasBaseReg)

3797

return false;

3798

AddrMode.HasBaseReg = true;

3799

AddrMode.BaseReg = AddrInst->getOperand(0);

3800

AddrMode.BaseOffs += ConstantOffset;

3801

if (!matchScaledValue(AddrInst->getOperand(VariableOperand),

3802

VariableScale, Depth)) {

3803

// If even that didn't work, bail.

3804

AddrMode = BackupAddrMode;

3805

AddrModeInsts.resize(OldSize);

3806

return false;

3807

}

3808

}

3809

3810

return true;

3811

}

3812

case Instruction::SExt:

3813

case Instruction::ZExt: {

3814

Instruction *Ext = dyn_cast<Instruction>(AddrInst);

3815

if (!Ext)

3816

return false;

3817

3818

// Try to move this ext out of the way of the addressing mode.

3819

// Ask for a method for doing so.

3820

TypePromotionHelper::Action TPH =

3821

TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);

3822

if (!TPH)

3823

return false;

3824

3825

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

3826

TPT.getRestorationPoint();

3827

unsigned CreatedInstsCost = 0;

3828

unsigned ExtCost = !TLI.isExtFree(Ext);

3829

Value *PromotedOperand =

3830

TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);

3831

// SExt has been moved away.

3832

// Thus either it will be rematched later in the recursive calls or it is

3833

// gone. Anyway, we must not fold it into the addressing mode at this point.

3834

// E.g.,

3835

// op = add opnd, 1

3836

// idx = ext op

3837

// addr = gep base, idx

3838

// is now:

3839

// promotedOpnd = ext opnd <- no match here

3840

// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)

3841

// addr = gep base, op <- match

3842

if (MovedAway)

3843

*MovedAway = true;

3844

3845

assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3846, __PRETTY_FUNCTION__))

3846

"TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3846, __PRETTY_FUNCTION__));

3847

3848

ExtAddrMode BackupAddrMode = AddrMode;

3849

unsigned OldSize = AddrModeInsts.size();

3850

3851

if (!matchAddr(PromotedOperand, Depth) ||

3852

// The total of the new cost is equal to the cost of the created

3853

// instructions.

3854

// The total of the old cost is equal to the cost of the extension plus

3855

// what we have saved in the addressing mode.

3856

!isPromotionProfitable(CreatedInstsCost,

3857

ExtCost + (AddrModeInsts.size() - OldSize),

3858

PromotedOperand)) {

3859

AddrMode = BackupAddrMode;

3860

AddrModeInsts.resize(OldSize);

3861

DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false);

3862

TPT.rollback(LastKnownGood);

3863

return false;

3864

}

3865

return true;

3866

}

3867

}

3868

return false;

3869

}

3870

3871

/// If we can, try to add the value of 'Addr' into the current addressing mode.

3872

/// If Addr can't be added to AddrMode this returns false and leaves AddrMode

3873

/// unmodified. This assumes that Addr is either a pointer type or intptr_t

3874

/// for the target.

3875

///

3876

bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {

3877

// Start a transaction at this point that we will rollback if the matching

3878

// fails.

3879

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

3880

TPT.getRestorationPoint();

3881

if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {

3882

// Fold in immediates if legal for the target.

3883

AddrMode.BaseOffs += CI->getSExtValue();

3884

if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))

3885

return true;

3886

AddrMode.BaseOffs -= CI->getSExtValue();

3887

} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {

3888

// If this is a global variable, try to fold it into the addressing mode.

3889

if (!AddrMode.BaseGV) {

3890

AddrMode.BaseGV = GV;

3891

if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))

3892

return true;

3893

AddrMode.BaseGV = nullptr;

3894

}

3895

} else if (Instruction *I = dyn_cast<Instruction>(Addr)) {

3896

ExtAddrMode BackupAddrMode = AddrMode;

3897

unsigned OldSize = AddrModeInsts.size();

3898

3899

// Check to see if it is possible to fold this operation.

3900

bool MovedAway = false;

3901

if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {

3902

// This instruction may have been moved away. If so, there is nothing

3903

// to check here.

3904

if (MovedAway)

3905

return true;

3906

// Okay, it's possible to fold this. Check to see if it is actually

3907

// *profitable* to do so. We use a simple cost model to avoid increasing

3908

// register pressure too much.

3909

if (I->hasOneUse() ||

3910

isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {

3911

AddrModeInsts.push_back(I);

3912

return true;

3913

}

3914

3915

// It isn't profitable to do this, roll back.

3916

//cerr << "NOT FOLDING: " << *I;

3917

AddrMode = BackupAddrMode;

3918

AddrModeInsts.resize(OldSize);

3919

TPT.rollback(LastKnownGood);

3920

}

3921

} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {

3922

if (matchOperationAddr(CE, CE->getOpcode(), Depth))

3923

return true;

3924

TPT.rollback(LastKnownGood);

3925

} else if (isa<ConstantPointerNull>(Addr)) {

3926

// Null pointer gets folded without affecting the addressing mode.

3927

return true;

3928

}

3929

3930

// Worse case, the target should support [reg] addressing modes. :)

3931

if (!AddrMode.HasBaseReg) {

3932

AddrMode.HasBaseReg = true;

3933

AddrMode.BaseReg = Addr;

3934

// Still check for legality in case the target supports [imm] but not [i+r].

3935

if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))

3936

return true;

3937

AddrMode.HasBaseReg = false;

3938

AddrMode.BaseReg = nullptr;

3939

}

3940

3941

// If the base register is already taken, see if we can do [r+r].

3942

if (AddrMode.Scale == 0) {

3943

AddrMode.Scale = 1;

3944

AddrMode.ScaledReg = Addr;

3945

if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))

3946

return true;

3947

AddrMode.Scale = 0;

3948

AddrMode.ScaledReg = nullptr;

3949

}

3950

// Couldn't match.

3951

TPT.rollback(LastKnownGood);

3952

return false;

3953

}

3954

3955

/// Check to see if all uses of OpVal by the specified inline asm call are due

3956

/// to memory operands. If so, return true, otherwise return false.

3957

static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,

3958

const TargetLowering &TLI,

3959

const TargetRegisterInfo &TRI) {

3960

const Function *F = CI->getFunction();

3961

TargetLowering::AsmOperandInfoVector TargetConstraints =

3962

TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,

3963

ImmutableCallSite(CI));

3964

3965

for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {

3966

TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];

3967

3968

// Compute the constraint code and ConstraintType to use.

3969

TLI.ComputeConstraintToUse(OpInfo, SDValue());

3970

3971

// If this asm operand is our Value*, and if it isn't an indirect memory

3972

// operand, we can't fold it!

3973

if (OpInfo.CallOperandVal == OpVal &&

3974

(OpInfo.ConstraintType != TargetLowering::C_Memory ||

3975

!OpInfo.isIndirect))

3976

return false;

3977

}

3978

3979

return true;

3980

}

3981

3982

/// Recursively walk all the uses of I until we find a memory use.

3983

/// If we find an obviously non-foldable instruction, return true.

3984

/// Add the ultimately found memory instructions to MemoryUses.

3985

static bool FindAllMemoryUses(

3986

Instruction *I,

3987

SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,

3988

SmallPtrSetImpl<Instruction *> &ConsideredInsts,

3989

const TargetLowering &TLI, const TargetRegisterInfo &TRI) {

3990

// If we already considered this instruction, we're done.

3991

if (!ConsideredInsts.insert(I).second)

3992

return false;

3993

3994

// If this is an obviously unfoldable instruction, bail out.

3995

if (!MightBeFoldableInst(I))

3996

return true;

3997

3998

const bool OptSize = I->getFunction()->optForSize();

3999

4000

// Loop over all the uses, recursively processing them.

4001

for (Use &U : I->uses()) {

4002

Instruction *UserI = cast<Instruction>(U.getUser());

4003

4004

if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {

4005

MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));

4006

continue;

4007

}

4008

4009

if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {

4010

unsigned opNo = U.getOperandNo();

4011

if (opNo != StoreInst::getPointerOperandIndex())

4012

return true; // Storing addr, not into addr.

4013

MemoryUses.push_back(std::make_pair(SI, opNo));

4014

continue;

4015

}

4016

4017

if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {

4018

unsigned opNo = U.getOperandNo();

4019

if (opNo != AtomicRMWInst::getPointerOperandIndex())

4020

return true; // Storing addr, not into addr.

4021

MemoryUses.push_back(std::make_pair(RMW, opNo));

4022

continue;

4023

}

4024

4025

if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {

4026

unsigned opNo = U.getOperandNo();

4027

if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())

4028

return true; // Storing addr, not into addr.

4029

MemoryUses.push_back(std::make_pair(CmpX, opNo));

4030

continue;

4031

}

4032

4033

if (CallInst *CI = dyn_cast<CallInst>(UserI)) {

4034

// If this is a cold call, we can sink the addressing calculation into

4035

// the cold path. See optimizeCallInst

4036

if (!OptSize && CI->hasFnAttr(Attribute::Cold))

4037

continue;

4038

4039

InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());

4040

if (!IA) return true;

4041

4042

// If this is a memory operand, we're cool, otherwise bail out.

4043

if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))

4044

return true;

4045

continue;

4046

}

4047

4048

if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))

4049

return true;

4050

}

4051

4052

return false;

4053

}

4054

4055

/// Return true if Val is already known to be live at the use site that we're

4056

/// folding it into. If so, there is no cost to include it in the addressing

4057

/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the

4058

/// instruction already.

4059

bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,

4060

Value *KnownLive2) {

4061

// If Val is either of the known-live values, we know it is live!

4062

if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)

4063

return true;

4064

4065

// All values other than instructions and arguments (e.g. constants) are live.

4066

if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;

4067

4068

// If Val is a constant sized alloca in the entry block, it is live, this is

4069

// true because it is just a reference to the stack/frame pointer, which is

4070

// live for the whole function.

4071

if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))

4072

if (AI->isStaticAlloca())

4073

return true;

4074

4075

// Check to see if this value is already used in the memory instruction's

4076

// block. If so, it's already live into the block at the very least, so we

4077

// can reasonably fold it.

4078

return Val->isUsedInBasicBlock(MemoryInst->getParent());

4079

}

4080

4081

/// It is possible for the addressing mode of the machine to fold the specified

4082

/// instruction into a load or store that ultimately uses it.

4083

/// However, the specified instruction has multiple uses.

4084

/// Given this, it may actually increase register pressure to fold it

4085

/// into the load. For example, consider this code:

4086

///

4087

/// X = ...

4088

/// Y = X+1

4089

/// use(Y) -> nonload/store

4090

/// Z = Y+1

4091

/// load Z

4092

///

4093

/// In this case, Y has multiple uses, and can be folded into the load of Z

4094

/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to

4095

/// be live at the use(Y) line. If we don't fold Y into load Z, we use one

4096

/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the

4097

/// number of computations either.

4098

///

4099

/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If

4100

/// X was live across 'load Z' for other reasons, we actually *would* want to

4101

/// fold the addressing mode in the Z case. This would make Y die earlier.

4102

bool AddressingModeMatcher::

4103

isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,

4104

ExtAddrMode &AMAfter) {

4105

if (IgnoreProfitability) return true;

4106

4107

// AMBefore is the addressing mode before this instruction was folded into it,

4108

// and AMAfter is the addressing mode after the instruction was folded. Get

4109

// the set of registers referenced by AMAfter and subtract out those

4110

// referenced by AMBefore: this is the set of values which folding in this

4111

// address extends the lifetime of.

4112

4113

// Note that there are only two potential values being referenced here,

4114

// BaseReg and ScaleReg (global addresses are always available, as are any

4115

// folded immediates).

4116

Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;

4117

4118

// If the BaseReg or ScaledReg was referenced by the previous addrmode, their

4119

// lifetime wasn't extended by adding this instruction.

4120

if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))

4121

BaseReg = nullptr;

4122

if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))

4123

ScaledReg = nullptr;

4124

4125

// If folding this instruction (and it's subexprs) didn't extend any live

4126

// ranges, we're ok with it.

4127

if (!BaseReg && !ScaledReg)

4128

return true;

4129

4130

// If all uses of this instruction can have the address mode sunk into them,

4131

// we can remove the addressing mode and effectively trade one live register

4132

// for another (at worst.) In this context, folding an addressing mode into

4133

// the use is just a particularly nice way of sinking it.

4134

SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;

4135

SmallPtrSet<Instruction*, 16> ConsideredInsts;

4136

if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))

4137

return false; // Has a non-memory, non-foldable use!

4138

4139

// Now that we know that all uses of this instruction are part of a chain of

4140

// computation involving only operations that could theoretically be folded

4141

// into a memory use, loop over each of these memory operation uses and see

4142

// if they could *actually* fold the instruction. The assumption is that

4143

// addressing modes are cheap and that duplicating the computation involved

4144

// many times is worthwhile, even on a fastpath. For sinking candidates

4145

// (i.e. cold call sites), this serves as a way to prevent excessive code

4146

// growth since most architectures have some reasonable small and fast way to

4147

// compute an effective address. (i.e LEA on x86)

4148

SmallVector<Instruction*, 32> MatchedAddrModeInsts;

4149

for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {

4150

Instruction *User = MemoryUses[i].first;

4151

unsigned OpNo = MemoryUses[i].second;

4152

4153

// Get the access type of this use. If the use isn't a pointer, we don't

4154

// know what it accesses.

4155

Value *Address = User->getOperand(OpNo);

4156

PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());

4157

if (!AddrTy)

4158

return false;

4159

Type *AddressAccessTy = AddrTy->getElementType();

4160

unsigned AS = AddrTy->getAddressSpace();

4161

4162

// Do a match against the root of this address, ignoring profitability. This

4163

// will tell us if the addressing mode for the memory operation will

4164

// *actually* cover the shared instruction.

4165

ExtAddrMode Result;

4166

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

4167

TPT.getRestorationPoint();

4168

AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,

4169

AddressAccessTy, AS,

4170

MemoryInst, Result, InsertedInsts,

4171

PromotedInsts, TPT);

4172

Matcher.IgnoreProfitability = true;

4173

bool Success = Matcher.matchAddr(Address, 0);

4174

4175

4176

// The match was to check the profitability, the changes made are not

4177

// part of the original matcher. Therefore, they should be dropped

4178

// otherwise the original matcher will not present the right state.

4179

TPT.rollback(LastKnownGood);

4180

4181

// If the match didn't cover I, then it won't be shared by it.

4182

if (!is_contained(MatchedAddrModeInsts, I))

4183

return false;

4184

4185

MatchedAddrModeInsts.clear();

4186

}

4187

4188

return true;

4189

}

4190

4191

} // end anonymous namespace

4192

4193

/// Return true if the specified values are defined in a

4194

/// different basic block than BB.

4195

static bool IsNonLocalValue(Value *V, BasicBlock *BB) {

4196

if (Instruction *I = dyn_cast<Instruction>(V))

4197

return I->getParent() != BB;

4198

return false;

4199

}

4200

4201

/// Sink addressing mode computation immediate before MemoryInst if doing so

4202

/// can be done without increasing register pressure. The need for the

4203

/// register pressure constraint means this can end up being an all or nothing

4204

/// decision for all uses of the same addressing computation.

4205

///

4206

/// Load and Store Instructions often have addressing modes that can do

4207

/// significant amounts of computation. As such, instruction selection will try

4208

/// to get the load or store to do as much computation as possible for the

4209

/// program. The problem is that isel can only see within a single block. As

4210

/// such, we sink as much legal addressing mode work into the block as possible.

4211

///

4212

/// This method is used to optimize both load/store and inline asms with memory

4213

/// operands. It's also used to sink addressing computations feeding into cold

4214

/// call sites into their (cold) basic block.

4215

///

4216

/// The motivation for handling sinking into cold blocks is that doing so can

4217

/// both enable other address mode sinking (by satisfying the register pressure

4218

/// constraint above), and reduce register pressure globally (by removing the

4219

/// addressing mode computation from the fast path entirely.).

4220

bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,

4221

Type *AccessTy, unsigned AddrSpace) {

4222

Value *Repl = Addr;

4223

4224

// Try to collapse single-value PHI nodes. This is necessary to undo

4225

// unprofitable PRE transformations.

4226

SmallVector<Value*, 8> worklist;

4227

SmallPtrSet<Value*, 16> Visited;

4228

worklist.push_back(Addr);

4229

4230

// Use a worklist to iteratively look through PHI nodes, and ensure that

4231

// the addressing mode obtained from the non-PHI roots of the graph

4232

// are equivalent.

4233

Value *Consensus = nullptr;

4234

unsigned NumUsesConsensus = 0;

4235

bool IsNumUsesConsensusValid = false;

4236

SmallVector<Instruction*, 16> AddrModeInsts;

4237

ExtAddrMode AddrMode;

4238

TypePromotionTransaction TPT(RemovedInsts);

4239

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

4240

TPT.getRestorationPoint();

4241

while (!worklist.empty()) {

4242

Value *V = worklist.back();

4243

worklist.pop_back();

4244

4245

// Break use-def graph loops.

4246

if (!Visited.insert(V).second) {

4247

Consensus = nullptr;

4248

break;

4249

}

4250

4251

// For a PHI node, push all of its incoming values.

4252

if (PHINode *P = dyn_cast<PHINode>(V)) {

4253

for (Value *IncValue : P->incoming_values())

4254

worklist.push_back(IncValue);

4255

continue;

4256

}

4257

4258

// For non-PHIs, determine the addressing mode being computed. Note that

4259

// the result may differ depending on what other uses our candidate

4260

// addressing instructions might have.

4261

SmallVector<Instruction*, 16> NewAddrModeInsts;

4262

ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(

4263

V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,

4264

InsertedInsts, PromotedInsts, TPT);

4265

4266

// This check is broken into two cases with very similar code to avoid using

4267

// getNumUses() as much as possible. Some values have a lot of uses, so

4268

// calling getNumUses() unconditionally caused a significant compile-time

4269

// regression.

4270

if (!Consensus) {

4271

Consensus = V;

4272

AddrMode = NewAddrMode;

4273

AddrModeInsts = NewAddrModeInsts;

4274

continue;

4275

} else if (NewAddrMode == AddrMode) {

4276

if (!IsNumUsesConsensusValid) {

4277

NumUsesConsensus = Consensus->getNumUses();

4278

IsNumUsesConsensusValid = true;

4279

}

4280

4281

// Ensure that the obtained addressing mode is equivalent to that obtained

4282

// for all other roots of the PHI traversal. Also, when choosing one

4283

// such root as representative, select the one with the most uses in order

4284

// to keep the cost modeling heuristics in AddressingModeMatcher

4285

// applicable.

4286

unsigned NumUses = V->getNumUses();

4287

if (NumUses > NumUsesConsensus) {

4288

Consensus = V;

4289

NumUsesConsensus = NumUses;

4290

AddrModeInsts = NewAddrModeInsts;

4291

}

4292

continue;

4293

}

4294

4295

Consensus = nullptr;

4296

break;

4297

}

4298

4299

// If the addressing mode couldn't be determined, or if multiple different

4300

// ones were determined, bail out now.

4301

if (!Consensus) {

4302

TPT.rollback(LastKnownGood);

4303

return false;

4304

}

4305

TPT.commit();

4306

4307

// If all the instructions matched are already in this BB, don't do anything.

4308

if (none_of(AddrModeInsts, [&](Value *V) {

4309

return IsNonLocalValue(V, MemoryInst->getParent());

4310

})) {

4311

DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false);

4312

return false;

4313

}

4314

4315

// Insert this computation right after this user. Since our caller is

4316

// scanning from the top of the BB to the bottom, reuse of the expr are

4317

// guaranteed to happen later.

4318

IRBuilder<> Builder(MemoryInst);

4319

4320

// Now that we determined the addressing expression we want to use and know

4321

// that we have to sink it into this block. Check to see if we have already

4322

// done this for some other load/store instr in this block. If so, reuse the

4323

// computation.

4324

Value *&SunkAddr = SunkAddrs[Addr];

4325

if (SunkAddr) {

4326

DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)

4327

<< *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false);

4328

if (SunkAddr->getType() != Addr->getType())

4329

SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());

4330

} else if (AddrSinkUsingGEPs ||

4331

(!AddrSinkUsingGEPs.getNumOccurrences() && TM &&

4332

SubtargetInfo->useAA())) {

4333

// By default, we use the GEP-based method when AA is used later. This

4334

// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.

4335

DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)

4336

<< *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false);

4337

Type *IntPtrTy = DL->getIntPtrType(Addr->getType());

4338

Value *ResultPtr = nullptr, *ResultIndex = nullptr;

4339

4340

// First, find the pointer.

4341

if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {

4342

ResultPtr = AddrMode.BaseReg;

4343

AddrMode.BaseReg = nullptr;

4344

}

4345

4346

if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {

4347

// We can't add more than one pointer together, nor can we scale a

4348

// pointer (both of which seem meaningless).

4349

if (ResultPtr || AddrMode.Scale != 1)

4350

return false;

4351

4352

ResultPtr = AddrMode.ScaledReg;

4353

AddrMode.Scale = 0;

4354

}

4355

4356

if (AddrMode.BaseGV) {

4357

if (ResultPtr)

4358

return false;

4359

4360

ResultPtr = AddrMode.BaseGV;

4361

}

4362

4363

// If the real base value actually came from an inttoptr, then the matcher

4364

// will look through it and provide only the integer value. In that case,

4365

// use it here.

4366

if (!ResultPtr && AddrMode.BaseReg) {

4367

ResultPtr =

4368

Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");

4369

AddrMode.BaseReg = nullptr;

4370

} else if (!ResultPtr && AddrMode.Scale == 1) {

4371

ResultPtr =

4372

Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");

4373

AddrMode.Scale = 0;

4374

}

4375

4376

if (!ResultPtr &&

4377

!AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {

4378

SunkAddr = Constant::getNullValue(Addr->getType());

4379

} else if (!ResultPtr) {

4380

return false;

4381

} else {

4382

Type *I8PtrTy =

4383

Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());

4384

Type *I8Ty = Builder.getInt8Ty();

4385

4386

// Start with the base register. Do this first so that subsequent address

4387

// matching finds it last, which will prevent it from trying to match it

4388

// as the scaled value in case it happens to be a mul. That would be

4389

// problematic if we've sunk a different mul for the scale, because then

4390

// we'd end up sinking both muls.

4391

if (AddrMode.BaseReg) {

4392

Value *V = AddrMode.BaseReg;

4393

if (V->getType() != IntPtrTy)

4394

V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");

4395

4396

ResultIndex = V;

4397

}

4398

4399

// Add the scale value.

4400

if (AddrMode.Scale) {

4401

Value *V = AddrMode.ScaledReg;

4402

if (V->getType() == IntPtrTy) {

4403

// done.

4404

} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <

4405

cast<IntegerType>(V->getType())->getBitWidth()) {

4406

V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");

4407

} else {

4408

// It is only safe to sign extend the BaseReg if we know that the math

4409

// required to create it did not overflow before we extend it. Since

4410

// the original IR value was tossed in favor of a constant back when

4411

// the AddrMode was created we need to bail out gracefully if widths

4412

// do not match instead of extending it.

4413

Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);

4414

if (I && (ResultIndex != AddrMode.BaseReg))

4415

I->eraseFromParent();

4416

return false;

4417

}

4418

4419

if (AddrMode.Scale != 1)

4420

V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),

4421

"sunkaddr");

4422

if (ResultIndex)

4423

ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");

4424

else

4425

ResultIndex = V;

4426

}

4427

4428

// Add in the Base Offset if present.

4429

if (AddrMode.BaseOffs) {

4430

Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);

4431

if (ResultIndex) {

4432

// We need to add this separately from the scale above to help with

4433

// SDAG consecutive load/store merging.

4434

if (ResultPtr->getType() != I8PtrTy)

4435

ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);

4436

ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");

4437

}

4438

4439

ResultIndex = V;

4440

}

4441

4442

if (!ResultIndex) {

4443

SunkAddr = ResultPtr;

4444

} else {

4445

if (ResultPtr->getType() != I8PtrTy)

4446

ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);

4447

SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");

4448

}

4449

4450

if (SunkAddr->getType() != Addr->getType())

4451

SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());

4452

}

4453

} else {

4454

4455

4456

Type *IntPtrTy = DL->getIntPtrType(Addr->getType());

4457

Value *Result = nullptr;

4458

4459

// Start with the base register. Do this first so that subsequent address

4460

// matching finds it last, which will prevent it from trying to match it

4461

// as the scaled value in case it happens to be a mul. That would be

4462

// problematic if we've sunk a different mul for the scale, because then

4463

// we'd end up sinking both muls.

4464

if (AddrMode.BaseReg) {

4465

Value *V = AddrMode.BaseReg;

4466

if (V->getType()->isPointerTy())

4467

V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");

4468

if (V->getType() != IntPtrTy)

4469

V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");

4470

Result = V;

4471

}

4472

4473

// Add the scale value.

4474

if (AddrMode.Scale) {

4475

Value *V = AddrMode.ScaledReg;

4476

if (V->getType() == IntPtrTy) {

4477

// done.

4478

} else if (V->getType()->isPointerTy()) {

4479

V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");

4480

} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <

4481

cast<IntegerType>(V->getType())->getBitWidth()) {

4482

V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");

4483

} else {

4484

// It is only safe to sign extend the BaseReg if we know that the math

4485

// required to create it did not overflow before we extend it. Since

4486

// the original IR value was tossed in favor of a constant back when

4487

// the AddrMode was created we need to bail out gracefully if widths

4488

// do not match instead of extending it.

4489

Instruction *I = dyn_cast_or_null<Instruction>(Result);

4490

if (I && (Result != AddrMode.BaseReg))

4491

I->eraseFromParent();

4492

return false;

4493

}

4494

if (AddrMode.Scale != 1)

4495

V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),

4496

"sunkaddr");

4497

if (Result)

4498

Result = Builder.CreateAdd(Result, V, "sunkaddr");

4499

else

4500

Result = V;

4501

}

4502

4503

// Add in the BaseGV if present.

4504

if (AddrMode.BaseGV) {

4505

Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");

4506

if (Result)

4507

Result = Builder.CreateAdd(Result, V, "sunkaddr");

4508

else

4509

Result = V;

4510

}

4511

4512

// Add in the Base Offset if present.

4513

if (AddrMode.BaseOffs) {

4514

Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);

4515

if (Result)

4516

Result = Builder.CreateAdd(Result, V, "sunkaddr");

4517

else

4518

Result = V;

4519

}

4520

4521

if (!Result)

4522

SunkAddr = Constant::getNullValue(Addr->getType());

4523

else

4524

SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");

4525

}

4526

4527

MemoryInst->replaceUsesOfWith(Repl, SunkAddr);

4528

4529

// If we have no uses, recursively delete the value and all dead instructions

4530

// using it.

4531

if (Repl->use_empty()) {

4532

// This can cause recursive deletion, which can invalidate our iterator.

4533

// Use a WeakTrackingVH to hold onto it in case this happens.

4534

Value *CurValue = &*CurInstIterator;

4535

WeakTrackingVH IterHandle(CurValue);

4536

BasicBlock *BB = CurInstIterator->getParent();

4537

4538

RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);

4539

4540

if (IterHandle != CurValue) {

4541

// If the iterator instruction was recursively deleted, start over at the

4542

// start of the block.

4543

CurInstIterator = BB->begin();

4544

SunkAddrs.clear();

4545

}

4546

}

4547

++NumMemoryInsts;

4548

return true;

4549

}

4550

4551

/// If there are any memory operands, use OptimizeMemoryInst to sink their

4552

/// address computing into the block when possible / profitable.

4553

bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {

4554

bool MadeChange = false;

4555

4556

const TargetRegisterInfo *TRI =

4557

TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();

4558

TargetLowering::AsmOperandInfoVector TargetConstraints =

4559

TLI->ParseConstraints(*DL, TRI, CS);

4560

unsigned ArgNo = 0;

4561

for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {

4562

TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];

4563

4564

// Compute the constraint code and ConstraintType to use.

4565

TLI->ComputeConstraintToUse(OpInfo, SDValue());

4566

4567

if (OpInfo.ConstraintType == TargetLowering::C_Memory &&

4568

OpInfo.isIndirect) {

4569

Value *OpVal = CS->getArgOperand(ArgNo++);

4570

MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);

4571

} else if (OpInfo.Type == InlineAsm::isInput)

4572

ArgNo++;

4573

}

4574

4575

return MadeChange;

4576

}

4577

4578

/// \brief Check if all the uses of \p Val are equivalent (or free) zero or

4579

/// sign extensions.

4580

static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {

4581

assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4581, __PRETTY_FUNCTION__));

4582

const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());

4583

bool IsSExt = isa<SExtInst>(FirstUser);

4584

Type *ExtTy = FirstUser->getType();

4585

for (const User *U : Val->users()) {

4586

const Instruction *UI = cast<Instruction>(U);

4587

if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))

4588

return false;

4589

Type *CurTy = UI->getType();

4590

// Same input and output types: Same instruction after CSE.

4591

if (CurTy == ExtTy)

4592

continue;

4593

4594

// If IsSExt is true, we are in this situation:

4595

// a = Val

4596

// b = sext ty1 a to ty2

4597

// c = sext ty1 a to ty3

4598

// Assuming ty2 is shorter than ty3, this could be turned into:

4599

// a = Val

4600

// b = sext ty1 a to ty2

4601

// c = sext ty2 b to ty3

4602

// However, the last sext is not free.

4603

if (IsSExt)

4604

return false;

4605

4606

// This is a ZExt, maybe this is free to extend from one type to another.

4607

// In that case, we would not account for a different use.

4608

Type *NarrowTy;

4609

Type *LargeTy;

4610

if (ExtTy->getScalarType()->getIntegerBitWidth() >

4611

CurTy->getScalarType()->getIntegerBitWidth()) {

4612

NarrowTy = CurTy;

4613

LargeTy = ExtTy;

4614

} else {

4615

NarrowTy = ExtTy;

4616

LargeTy = CurTy;

4617

}

4618

4619

if (!TLI.isZExtFree(NarrowTy, LargeTy))

4620

return false;

4621

}

4622

// All uses are the same or can be derived from one another for free.

4623

return true;

4624

}

4625

4626

/// \brief Try to speculatively promote extensions in \p Exts and continue

4627

/// promoting through newly promoted operands recursively as far as doing so is

4628

/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.

4629

/// When some promotion happened, \p TPT contains the proper state to revert

4630

/// them.

4631

///

4632

/// \return true if some promotion happened, false otherwise.

4633

bool CodeGenPrepare::tryToPromoteExts(

4634

TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,

4635

SmallVectorImpl<Instruction *> &ProfitablyMovedExts,

4636

unsigned CreatedInstsCost) {

4637

bool Promoted = false;

4638

4639

// Iterate over all the extensions to try to promote them.

4640

for (auto I : Exts) {

4641

// Early check if we directly have ext(load).

4642

if (isa<LoadInst>(I->getOperand(0))) {

4643

ProfitablyMovedExts.push_back(I);

4644

continue;

4645

}

4646

4647

// Check whether or not we want to do any promotion. The reason we have

4648

// this check inside the for loop is to catch the case where an extension

4649

// is directly fed by a load because in such case the extension can be moved

4650

// up without any promotion on its operands.

4651

if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)

4652

return false;

4653

4654

// Get the action to perform the promotion.

4655

TypePromotionHelper::Action TPH =

4656

TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);

4657

// Check if we can promote.

4658

if (!TPH) {

4659

// Save the current extension as we cannot move up through its operand.

4660

ProfitablyMovedExts.push_back(I);

4661

continue;

4662

}

4663

4664

// Save the current state.

4665

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

4666

TPT.getRestorationPoint();

4667

SmallVector<Instruction *, 4> NewExts;

4668

unsigned NewCreatedInstsCost = 0;

4669

unsigned ExtCost = !TLI->isExtFree(I);

4670

// Promote.

4671

Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,

4672

&NewExts, nullptr, *TLI);

4673

assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4674, __PRETTY_FUNCTION__))

4674

"TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4674, __PRETTY_FUNCTION__));

4675

4676

// We would be able to merge only one extension in a load.

4677

// Therefore, if we have more than 1 new extension we heuristically

4678

// cut this search path, because it means we degrade the code quality.

4679

// With exactly 2, the transformation is neutral, because we will merge

4680

// one extension but leave one. However, we optimistically keep going,

4681

// because the new extension may be removed too.

4682

long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;

4683

// FIXME: It would be possible to propagate a negative value instead of

4684

// conservatively ceiling it to 0.

4685

TotalCreatedInstsCost =

4686

std::max((long long)0, (TotalCreatedInstsCost - ExtCost));

4687

if (!StressExtLdPromotion &&

4688

(TotalCreatedInstsCost > 1 ||

4689

!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {

4690

// This promotion is not profitable, rollback to the previous state, and

4691

// save the current extension in ProfitablyMovedExts as the latest

4692

// speculative promotion turned out to be unprofitable.

4693

TPT.rollback(LastKnownGood);

4694

ProfitablyMovedExts.push_back(I);

4695

continue;

4696

}

4697

// Continue promoting NewExts as far as doing so is profitable.

4698

SmallVector<Instruction *, 2> NewlyMovedExts;

4699

(void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);

4700

bool NewPromoted = false;

4701

for (auto ExtInst : NewlyMovedExts) {

4702

Instruction *MovedExt = cast<Instruction>(ExtInst);

4703

Value *ExtOperand = MovedExt->getOperand(0);

4704

// If we have reached to a load, we need this extra profitability check

4705

// as it could potentially be merged into an ext(load).

4706

if (isa<LoadInst>(ExtOperand) &&

4707

!(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||

4708

(ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))

4709

continue;

4710

4711

ProfitablyMovedExts.push_back(MovedExt);

4712

NewPromoted = true;

4713

}

4714

4715

// If none of speculative promotions for NewExts is profitable, rollback

4716

// and save the current extension (I) as the last profitable extension.

4717

if (!NewPromoted) {

4718

TPT.rollback(LastKnownGood);

4719

ProfitablyMovedExts.push_back(I);

4720

continue;

4721

}

4722

// The promotion is profitable.

4723

Promoted = true;

4724

}

4725

return Promoted;

4726

}

4727

4728

/// Merging redundant sexts when one is dominating the other.

4729

bool CodeGenPrepare::mergeSExts(Function &F) {

4730

DominatorTree DT(F);

4731

bool Changed = false;

4732

for (auto &Entry : ValToSExtendedUses) {

4733

SExts &Insts = Entry.second;

4734

SExts CurPts;

4735

for (Instruction *Inst : Insts) {

4736

if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||

4737

Inst->getOperand(0) != Entry.first)

4738

continue;

4739

bool inserted = false;

4740

for (auto &Pt : CurPts) {

4741

if (DT.dominates(Inst, Pt)) {

4742

Pt->replaceAllUsesWith(Inst);

4743

RemovedInsts.insert(Pt);

4744

Pt->removeFromParent();

4745

Pt = Inst;

4746

inserted = true;

4747

Changed = true;

4748

break;

4749

}

4750

if (!DT.dominates(Pt, Inst))

4751

// Give up if we need to merge in a common dominator as the

4752

// expermients show it is not profitable.

4753

continue;

4754

Inst->replaceAllUsesWith(Pt);

4755

RemovedInsts.insert(Inst);

4756

Inst->removeFromParent();

4757

inserted = true;

4758

Changed = true;

4759

break;

4760

}

4761

if (!inserted)

4762

CurPts.push_back(Inst);

4763

}

4764

}

4765

return Changed;

4766

}

4767

4768

/// Return true, if an ext(load) can be formed from an extension in

4769

/// \p MovedExts.

4770

bool CodeGenPrepare::canFormExtLd(

4771

const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,

4772

Instruction *&Inst, bool HasPromoted) {

4773

for (auto *MovedExtInst : MovedExts) {

4774

if (isa<LoadInst>(MovedExtInst->getOperand(0))) {

4775

LI = cast<LoadInst>(MovedExtInst->getOperand(0));

4776

Inst = MovedExtInst;

4777

break;

4778

}

4779

}

4780

if (!LI)

4781

return false;

4782

4783

// If they're already in the same block, there's nothing to do.

4784

// Make the cheap checks first if we did not promote.

4785

// If we promoted, we need to check if it is indeed profitable.

4786

if (!HasPromoted && LI->getParent() == Inst->getParent())

4787

return false;

4788

4789

EVT VT = TLI->getValueType(*DL, Inst->getType());

4790

EVT LoadVT = TLI->getValueType(*DL, LI->getType());

4791

4792

// If the load has other users and the truncate is not free, this probably

4793

// isn't worthwhile.

4794

if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&

4795

!TLI->isTruncateFree(Inst->getType(), LI->getType()))

4796

return false;

4797

4798

// Check whether the target supports casts folded into loads.

4799

unsigned LType;

4800

if (isa<ZExtInst>(Inst))

4801

LType = ISD::ZEXTLOAD;

4802

else {

4803

assert(isa<SExtInst>(Inst) && "Unexpected ext type!")((isa<SExtInst>(Inst) && "Unexpected ext type!"
) ? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Inst) && \"Unexpected ext type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4803, __PRETTY_FUNCTION__));

4804

LType = ISD::SEXTLOAD;

4805

}

4806

4807

return TLI->isLoadExtLegal(LType, VT, LoadVT);

4808

}

4809

4810

/// Move a zext or sext fed by a load into the same basic block as the load,

4811

/// unless conditions are unfavorable. This allows SelectionDAG to fold the

4812

/// extend into the load.

4813

///

4814

/// E.g.,

4815

/// \code

4816

/// %ld = load i32* %addr

4817

/// %add = add nuw i32 %ld, 4

4818

/// %zext = zext i32 %add to i64

4819

// \endcode

4820

/// =>

4821

/// \code

4822

/// %ld = load i32* %addr

4823

/// %zext = zext i32 %ld to i64

4824

/// %add = add nuw i64 %zext, 4

4825

/// \encode

4826

/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which

4827

/// allow us to match zext(load i32*) to i64.

4828

///

4829

/// Also, try to promote the computations used to obtain a sign extended

4830

/// value used into memory accesses.

4831

/// E.g.,

4832

/// \code

4833

/// a = add nsw i32 b, 3

4834

/// d = sext i32 a to i64

4835

/// e = getelementptr ..., i64 d

4836

/// \endcode

4837

/// =>

4838

/// \code

4839

/// f = sext i32 b to i64

4840

/// a = add nsw i64 f, 3

4841

/// e = getelementptr ..., i64 a

4842

/// \endcode

4843

///

4844

/// \p Inst[in/out] the extension may be modified during the process if some

4845

/// promotions apply.

4846

bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {

4847

// ExtLoad formation and address type promotion infrastructure requires TLI to

4848

// be effective.

4849

if (!TLI)

4850

return false;

4851

4852

bool AllowPromotionWithoutCommonHeader = false;

4853

/// See if it is an interesting sext operations for the address type

4854

/// promotion before trying to promote it, e.g., the ones with the right

4855

/// type and used in memory accesses.

4856

bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(

4857

*Inst, AllowPromotionWithoutCommonHeader);

4858

TypePromotionTransaction TPT(RemovedInsts);

4859

TypePromotionTransaction::ConstRestorationPt LastKnownGood =

4860

TPT.getRestorationPoint();

4861

SmallVector<Instruction *, 1> Exts;

4862

SmallVector<Instruction *, 2> SpeculativelyMovedExts;

4863

Exts.push_back(Inst);

4864

4865

bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);

4866

4867

// Look for a load being extended.

4868

LoadInst *LI = nullptr;

4869

Instruction *ExtFedByLoad;

4870

4871

// Try to promote a chain of computation if it allows to form an extended

4872

// load.

4873

if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {

4874

assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4874, __PRETTY_FUNCTION__));

4875

TPT.commit();

4876

// Move the extend into the same block as the load

4877

ExtFedByLoad->removeFromParent();

4878

ExtFedByLoad->insertAfter(LI);

4879

// CGP does not check if the zext would be speculatively executed when moved

4880

// to the same basic block as the load. Preserving its original location

4881

// would pessimize the debugging experience, as well as negatively impact

4882

// the quality of sample pgo. We don't want to use "line 0" as that has a

4883

// size cost in the line-table section and logically the zext can be seen as

4884

// part of the load. Therefore we conservatively reuse the same debug

4885

// location for the load and the zext.

4886

ExtFedByLoad->setDebugLoc(LI->getDebugLoc());

4887

++NumExtsMoved;

4888

Inst = ExtFedByLoad;

4889

return true;

4890

}

4891

4892

// Continue promoting SExts if known as considerable depending on targets.

4893

if (ATPConsiderable &&

4894

performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,

4895

HasPromoted, TPT, SpeculativelyMovedExts))

4896

return true;

4897

4898

TPT.rollback(LastKnownGood);

4899

return false;

4900

}

4901

4902

// Perform address type promotion if doing so is profitable.

4903

// If AllowPromotionWithoutCommonHeader == false, we should find other sext

4904

// instructions that sign extended the same initial value. However, if

4905

// AllowPromotionWithoutCommonHeader == true, we expect promoting the

4906

// extension is just profitable.

4907

bool CodeGenPrepare::performAddressTypePromotion(

4908

Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,

4909

bool HasPromoted, TypePromotionTransaction &TPT,

4910

SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {

4911

bool Promoted = false;

4912

SmallPtrSet<Instruction *, 1> UnhandledExts;

4913

bool AllSeenFirst = true;

4914

for (auto I : SpeculativelyMovedExts) {

4915

Value *HeadOfChain = I->getOperand(0);

4916

DenseMap<Value *, Instruction *>::iterator AlreadySeen =

4917

SeenChainsForSExt.find(HeadOfChain);

4918

// If there is an unhandled SExt which has the same header, try to promote

4919

// it as well.

4920

if (AlreadySeen != SeenChainsForSExt.end()) {

4921

if (AlreadySeen->second != nullptr)

4922

UnhandledExts.insert(AlreadySeen->second);

4923

AllSeenFirst = false;

4924

}

4925

}

4926

4927

if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&

4928

SpeculativelyMovedExts.size() == 1)) {

4929

TPT.commit();

4930

if (HasPromoted)

4931

Promoted = true;

4932

for (auto I : SpeculativelyMovedExts) {

4933

Value *HeadOfChain = I->getOperand(0);

4934

SeenChainsForSExt[HeadOfChain] = nullptr;

4935

ValToSExtendedUses[HeadOfChain].push_back(I);

4936

}

4937

// Update Inst as promotion happen.

4938

Inst = SpeculativelyMovedExts.pop_back_val();

4939

} else {

4940

// This is the first chain visited from the header, keep the current chain

4941

// as unhandled. Defer to promote this until we encounter another SExt

4942

// chain derived from the same header.

4943

for (auto I : SpeculativelyMovedExts) {

4944

Value *HeadOfChain = I->getOperand(0);

4945

SeenChainsForSExt[HeadOfChain] = Inst;

4946

}

4947

return false;

4948

}

4949

4950

if (!AllSeenFirst && !UnhandledExts.empty())

4951

for (auto VisitedSExt : UnhandledExts) {

4952

if (RemovedInsts.count(VisitedSExt))

4953

continue;

4954

TypePromotionTransaction TPT(RemovedInsts);

4955

SmallVector<Instruction *, 1> Exts;

4956

SmallVector<Instruction *, 2> Chains;

4957

Exts.push_back(VisitedSExt);

4958

bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);

4959

TPT.commit();

4960

if (HasPromoted)

4961

Promoted = true;

4962

for (auto I : Chains) {

4963

Value *HeadOfChain = I->getOperand(0);

4964

// Mark this as handled.

4965

SeenChainsForSExt[HeadOfChain] = nullptr;

4966

ValToSExtendedUses[HeadOfChain].push_back(I);

4967

}

4968

}

4969

return Promoted;

4970

}

4971

4972

bool CodeGenPrepare::optimizeExtUses(Instruction *I) {

4973

BasicBlock *DefBB = I->getParent();

4974

4975

// If the result of a {s|z}ext and its source are both live out, rewrite all

4976

// other uses of the source with result of extension.

4977

Value *Src = I->getOperand(0);

4978

if (Src->hasOneUse())

4979

return false;

4980

4981

// Only do this xform if truncating is free.

4982

if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))

4983

return false;

4984

4985

// Only safe to perform the optimization if the source is also defined in

4986

// this block.

4987

if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())

4988

return false;

4989

4990

bool DefIsLiveOut = false;

4991

for (User *U : I->users()) {

4992

Instruction *UI = cast<Instruction>(U);

4993

4994

// Figure out which BB this ext is used in.

4995

BasicBlock *UserBB = UI->getParent();

4996

if (UserBB == DefBB) continue;

4997

DefIsLiveOut = true;

4998

break;

4999

}

5000

if (!DefIsLiveOut)

5001

return false;

5002

5003

// Make sure none of the uses are PHI nodes.

5004

for (User *U : Src->users()) {

5005

Instruction *UI = cast<Instruction>(U);

5006

BasicBlock *UserBB = UI->getParent();

5007

if (UserBB == DefBB) continue;

5008

// Be conservative. We don't want this xform to end up introducing

5009

// reloads just before load / store instructions.

5010

if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))

5011

return false;

5012

}

5013

5014

// InsertedTruncs - Only insert one trunc in each block once.

5015

DenseMap<BasicBlock*, Instruction*> InsertedTruncs;

5016

5017

bool MadeChange = false;

5018

for (Use &U : Src->uses()) {

5019

Instruction *User = cast<Instruction>(U.getUser());

5020

5021

// Figure out which BB this ext is used in.

5022

BasicBlock *UserBB = User->getParent();

5023

if (UserBB == DefBB) continue;

5024

5025

// Both src and def are live in this block. Rewrite the use.

5026

Instruction *&InsertedTrunc = InsertedTruncs[UserBB];

5027

5028

if (!InsertedTrunc) {

5029

BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

5030

5031

InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);

5032

InsertedInsts.insert(InsertedTrunc);

5033

}

5034

5035

// Replace a use of the {s|z}ext source with a use of the result.

5036

U = InsertedTrunc;

5037

++NumExtUses;

5038

MadeChange = true;

5039

}

5040

5041

return MadeChange;

5042

}

5043

5044

// Find loads whose uses only use some of the loaded value's bits. Add an "and"

5045

// just after the load if the target can fold this into one extload instruction,

5046

// with the hope of eliminating some of the other later "and" instructions using

5047

// the loaded value. "and"s that are made trivially redundant by the insertion

5048

// of the new "and" are removed by this function, while others (e.g. those whose

5049

// path from the load goes through a phi) are left for isel to potentially

5050

// remove.

5051

5052

// For example:

5053

5054

// b0:

5055

// x = load i32

5056

// ...

5057

// b1:

5058

// y = and x, 0xff

5059

// z = use y

5060

5061

// becomes:

5062

5063

// b0:

5064

// x = load i32

5065

// x' = and x, 0xff

5066

// ...

5067

// b1:

5068

// z = use x'

5069

5070

// whereas:

5071

5072

// b0:

5073

// x1 = load i32

5074

// ...

5075

// b1:

5076

// x2 = load i32

5077

// ...

5078

// b2:

5079

// x = phi x1, x2

5080

// y = and x, 0xff

5081

5082

// becomes (after a call to optimizeLoadExt for each load):

5083

5084

// b0:

5085

// x1 = load i32

5086

// x1' = and x1, 0xff

5087

// ...

5088

// b1:

5089

// x2 = load i32

5090

// x2' = and x2, 0xff

5091

// ...

5092

// b2:

5093

// x = phi x1', x2'

5094

// y = and x, 0xff

5095

5096

5097

bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {

5098

5099

if (!Load->isSimple() ||

5100

!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))

5101

return false;

5102

5103

// Skip loads we've already transformed.

5104

if (Load->hasOneUse() &&

5105

InsertedInsts.count(cast<Instruction>(*Load->user_begin())))

5106

return false;

5107

5108

// Look at all uses of Load, looking through phis, to determine how many bits

5109

// of the loaded value are needed.

5110

SmallVector<Instruction *, 8> WorkList;

5111

SmallPtrSet<Instruction *, 16> Visited;

5112

SmallVector<Instruction *, 8> AndsToMaybeRemove;

5113

for (auto *U : Load->users())

5114

WorkList.push_back(cast<Instruction>(U));

5115

5116

EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());

5117

unsigned BitWidth = LoadResultVT.getSizeInBits();

5118

APInt DemandBits(BitWidth, 0);

5119

APInt WidestAndBits(BitWidth, 0);

5120

5121

while (!WorkList.empty()) {

5122

Instruction *I = WorkList.back();

5123

WorkList.pop_back();

5124

5125

// Break use-def graph loops.

5126

if (!Visited.insert(I).second)

5127

continue;

5128

5129

// For a PHI node, push all of its users.

5130

if (auto *Phi = dyn_cast<PHINode>(I)) {

5131

for (auto *U : Phi->users())

5132

WorkList.push_back(cast<Instruction>(U));

5133

continue;

5134

}

5135

5136

switch (I->getOpcode()) {

5137

case llvm::Instruction::And: {

5138

auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));

5139

if (!AndC)

5140

return false;

5141

APInt AndBits = AndC->getValue();

5142

DemandBits |= AndBits;

5143

// Keep track of the widest and mask we see.

5144

if (AndBits.ugt(WidestAndBits))

5145

WidestAndBits = AndBits;

5146

if (AndBits == WidestAndBits && I->getOperand(0) == Load)

5147

AndsToMaybeRemove.push_back(I);

5148

break;

5149

}

5150

5151

case llvm::Instruction::Shl: {

5152

auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));

5153

if (!ShlC)

5154

return false;

5155

uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);

5156

DemandBits.setLowBits(BitWidth - ShiftAmt);

5157

break;

5158

}

5159

5160

case llvm::Instruction::Trunc: {

5161

EVT TruncVT = TLI->getValueType(*DL, I->getType());

5162

unsigned TruncBitWidth = TruncVT.getSizeInBits();

5163

DemandBits.setLowBits(TruncBitWidth);

5164

break;

5165

}

5166

5167

default:

5168

return false;

5169

}

5170

}

5171

5172

uint32_t ActiveBits = DemandBits.getActiveBits();

5173

// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the

5174

// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,

5175

// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but

5176

// (and (load x) 1) is not matched as a single instruction, rather as a LDR

5177

// followed by an AND.

5178

// TODO: Look into removing this restriction by fixing backends to either

5179

// return false for isLoadExtLegal for i1 or have them select this pattern to

5180

// a single instruction.

5181

5182

// Also avoid hoisting if we didn't see any ands with the exact DemandBits

5183

// mask, since these are the only ands that will be removed by isel.

5184

if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||

5185

WidestAndBits != DemandBits)

5186

return false;

5187

5188

LLVMContext &Ctx = Load->getType()->getContext();

5189

Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);

5190

EVT TruncVT = TLI->getValueType(*DL, TruncTy);

5191

5192

// Reject cases that won't be matched as extloads.

5193

if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||

5194

!TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))

5195

return false;

5196

5197

IRBuilder<> Builder(Load->getNextNode());

5198

auto *NewAnd = dyn_cast<Instruction>(

5199

Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));

5200

// Mark this instruction as "inserted by CGP", so that other

5201

// optimizations don't touch it.

5202

InsertedInsts.insert(NewAnd);

5203

5204

// Replace all uses of load with new and (except for the use of load in the

5205

// new and itself).

5206

Load->replaceAllUsesWith(NewAnd);

5207

NewAnd->setOperand(0, Load);

5208

5209

// Remove any and instructions that are now redundant.

5210

for (auto *And : AndsToMaybeRemove)

5211

// Check that the and mask is the same as the one we decided to put on the

5212

// new and.

5213

if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {

5214

And->replaceAllUsesWith(NewAnd);

5215

if (&*CurInstIterator == And)

5216

CurInstIterator = std::next(And->getIterator());

5217

And->eraseFromParent();

5218

++NumAndUses;

5219

}

5220

5221

++NumAndsAdded;

5222

return true;

5223

}

5224

5225

/// Check if V (an operand of a select instruction) is an expensive instruction

5226

/// that is only used once.

5227

static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {

5228

auto *I = dyn_cast<Instruction>(V);

5229

// If it's safe to speculatively execute, then it should not have side

5230

// effects; therefore, it's safe to sink and possibly *not* execute.

5231

return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&

5232

TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;

5233

}

5234

5235

/// Returns true if a SelectInst should be turned into an explicit branch.

5236

static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,

5237

const TargetLowering *TLI,

5238

SelectInst *SI) {

5239

// If even a predictable select is cheap, then a branch can't be cheaper.

5240

if (!TLI->isPredictableSelectExpensive())

5241

return false;

5242

5243

// FIXME: This should use the same heuristics as IfConversion to determine

5244

// whether a select is better represented as a branch.

5245

5246

// If metadata tells us that the select condition is obviously predictable,

5247

// then we want to replace the select with a branch.

5248

uint64_t TrueWeight, FalseWeight;

5249

if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {

5250

uint64_t Max = std::max(TrueWeight, FalseWeight);

5251

uint64_t Sum = TrueWeight + FalseWeight;

5252

if (Sum != 0) {

5253

auto Probability = BranchProbability::getBranchProbability(Max, Sum);

5254

if (Probability > TLI->getPredictableBranchThreshold())

5255

return true;

5256

}

5257

}

5258

5259

CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());

5260

5261

// If a branch is predictable, an out-of-order CPU can avoid blocking on its

5262

// comparison condition. If the compare has more than one use, there's

5263

// probably another cmov or setcc around, so it's not worth emitting a branch.

5264

if (!Cmp || !Cmp->hasOneUse())

5265

return false;

5266

5267

// If either operand of the select is expensive and only needed on one side

5268

// of the select, we should form a branch.

5269

if (sinkSelectOperand(TTI, SI->getTrueValue()) ||

5270

sinkSelectOperand(TTI, SI->getFalseValue()))

5271

return true;

5272

5273

return false;

5274

}

5275

5276

/// If \p isTrue is true, return the true value of \p SI, otherwise return

5277

/// false value of \p SI. If the true/false value of \p SI is defined by any

5278

/// select instructions in \p Selects, look through the defining select

5279

/// instruction until the true/false value is not defined in \p Selects.

5280

static Value *getTrueOrFalseValue(

5281

SelectInst *SI, bool isTrue,

5282

const SmallPtrSet<const Instruction *, 2> &Selects) {

5283

Value *V;

5284

5285

for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);

5286

DefSI = dyn_cast<SelectInst>(V)) {

5287

assert(DefSI->getCondition() == SI->getCondition() &&((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5288, __PRETTY_FUNCTION__))

5288

"The condition of DefSI does not match with SI")((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5288, __PRETTY_FUNCTION__));

5289

V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());

5290

}

5291

return V;

5292

}

5293

5294

/// If we have a SelectInst that will likely profit from branch prediction,

5295

/// turn it into a branch.

5296

bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {

5297

// Find all consecutive select instructions that share the same condition.

5298

SmallVector<SelectInst *, 2> ASI;

5299

ASI.push_back(SI);

5300

for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);

5301

It != SI->getParent()->end(); ++It) {

5302

SelectInst *I = dyn_cast<SelectInst>(&*It);

5303

if (I && SI->getCondition() == I->getCondition()) {

5304

ASI.push_back(I);

5305

} else {

5306

break;

5307

}

5308

}

5309

5310

SelectInst *LastSI = ASI.back();

5311

// Increment the current iterator to skip all the rest of select instructions

5312

// because they will be either "not lowered" or "all lowered" to branch.

5313

CurInstIterator = std::next(LastSI->getIterator());

5314

5315

bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);

5316

5317

// Can we convert the 'select' to CF ?

5318

if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||

5319

SI->getMetadata(LLVMContext::MD_unpredictable))

5320

return false;

5321

5322

TargetLowering::SelectSupportKind SelectKind;

5323

if (VectorCond)

5324

SelectKind = TargetLowering::VectorMaskSelect;

5325

else if (SI->getType()->isVectorTy())

5326

SelectKind = TargetLowering::ScalarCondVectorVal;

5327

else

5328

SelectKind = TargetLowering::ScalarValSelect;

5329

5330

if (TLI->isSelectSupported(SelectKind) &&

5331

!isFormingBranchFromSelectProfitable(TTI, TLI, SI))

5332

return false;

5333

5334

ModifiedDT = true;

5335

5336

// Transform a sequence like this:

5337

// start:

5338

// %cmp = cmp uge i32 %a, %b

5339

// %sel = select i1 %cmp, i32 %c, i32 %d

5340

5341

// Into:

5342

// start:

5343

// %cmp = cmp uge i32 %a, %b

5344

// br i1 %cmp, label %select.true, label %select.false

5345

// select.true:

5346

// br label %select.end

5347

// select.false:

5348

// br label %select.end

5349

// select.end:

5350

// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]

5351

5352

// In addition, we may sink instructions that produce %c or %d from

5353

// the entry block into the destination(s) of the new branch.

5354

// If the true or false blocks do not contain a sunken instruction, that

5355

// block and its branch may be optimized away. In that case, one side of the

5356

// first branch will point directly to select.end, and the corresponding PHI

5357

// predecessor block will be the start block.

5358

5359

// First, we split the block containing the select into 2 blocks.

5360

BasicBlock *StartBlock = SI->getParent();

5361

BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));

5362

BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");

5363

5364

// Delete the unconditional branch that was just created by the split.

5365

StartBlock->getTerminator()->eraseFromParent();

5366

5367

// These are the new basic blocks for the conditional branch.

5368

// At least one will become an actual new basic block.

5369

BasicBlock *TrueBlock = nullptr;

5370

BasicBlock *FalseBlock = nullptr;

5371

BranchInst *TrueBranch = nullptr;

5372

BranchInst *FalseBranch = nullptr;

5373

5374

// Sink expensive instructions into the conditional blocks to avoid executing

5375

// them speculatively.

5376

for (SelectInst *SI : ASI) {

5377

if (sinkSelectOperand(TTI, SI->getTrueValue())) {

5378

if (TrueBlock == nullptr) {

5379

TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",

5380

EndBlock->getParent(), EndBlock);

5381

TrueBranch = BranchInst::Create(EndBlock, TrueBlock);

5382

}

5383

auto *TrueInst = cast<Instruction>(SI->getTrueValue());

5384

TrueInst->moveBefore(TrueBranch);

5385

}

5386

if (sinkSelectOperand(TTI, SI->getFalseValue())) {

5387

if (FalseBlock == nullptr) {

5388

FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",

5389

EndBlock->getParent(), EndBlock);

5390

FalseBranch = BranchInst::Create(EndBlock, FalseBlock);

5391

}

5392

auto *FalseInst = cast<Instruction>(SI->getFalseValue());

5393

FalseInst->moveBefore(FalseBranch);

5394

}

5395

}

5396

5397

// If there was nothing to sink, then arbitrarily choose the 'false' side

5398

// for a new input value to the PHI.

5399

if (TrueBlock == FalseBlock) {

5400

assert(TrueBlock == nullptr &&((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5401, __PRETTY_FUNCTION__))

5401

"Unexpected basic block transform while optimizing select")((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5401, __PRETTY_FUNCTION__));

5402

5403

FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",

5404

EndBlock->getParent(), EndBlock);

5405

BranchInst::Create(EndBlock, FalseBlock);

5406

}

5407

5408

// Insert the real conditional branch based on the original condition.

5409

// If we did not create a new block for one of the 'true' or 'false' paths

5410

// of the condition, it means that side of the branch goes to the end block

5411

// directly and the path originates from the start block from the point of

5412

// view of the new PHI.

5413

BasicBlock *TT, *FT;

5414

if (TrueBlock == nullptr) {

5415

TT = EndBlock;

5416

FT = FalseBlock;

5417

TrueBlock = StartBlock;

5418

} else if (FalseBlock == nullptr) {

5419

TT = TrueBlock;

5420

FT = EndBlock;

5421

FalseBlock = StartBlock;

5422

} else {

5423

TT = TrueBlock;

5424

FT = FalseBlock;

5425

}

5426

IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);

5427

5428

SmallPtrSet<const Instruction *, 2> INS;

5429

INS.insert(ASI.begin(), ASI.end());

5430

// Use reverse iterator because later select may use the value of the

5431

// earlier select, and we need to propagate value through earlier select

5432

// to get the PHI operand.

5433

for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {

5434

SelectInst *SI = *It;

5435

// The select itself is replaced with a PHI Node.

5436

PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());

5437

PN->takeName(SI);

5438

PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);

5439

PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);

5440

5441

SI->replaceAllUsesWith(PN);

5442

SI->eraseFromParent();

5443

INS.erase(SI);

5444

++NumSelectsExpanded;

5445

}

5446

5447

// Instruct OptimizeBlock to skip to the next block.

5448

CurInstIterator = StartBlock->end();

5449

return true;

5450

}

5451

5452

static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {

5453

SmallVector<int, 16> Mask(SVI->getShuffleMask());

5454

int SplatElem = -1;

5455

for (unsigned i = 0; i < Mask.size(); ++i) {

5456

if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)

5457

return false;

5458

SplatElem = Mask[i];

5459

}

5460

5461

return true;

5462

}

5463

5464

/// Some targets have expensive vector shifts if the lanes aren't all the same

5465

/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases

5466

/// it's often worth sinking a shufflevector splat down to its use so that

5467

/// codegen can spot all lanes are identical.

5468

bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {

5469

BasicBlock *DefBB = SVI->getParent();

5470

5471

// Only do this xform if variable vector shifts are particularly expensive.

5472

if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))

5473

return false;

5474

5475

// We only expect better codegen by sinking a shuffle if we can recognise a

5476

// constant splat.

5477

if (!isBroadcastShuffle(SVI))

5478

return false;

5479

5480

// InsertedShuffles - Only insert a shuffle in each block once.

5481

DenseMap<BasicBlock*, Instruction*> InsertedShuffles;

5482

5483

bool MadeChange = false;

5484

for (User *U : SVI->users()) {

5485

Instruction *UI = cast<Instruction>(U);

5486

5487

// Figure out which BB this ext is used in.

5488

BasicBlock *UserBB = UI->getParent();

5489

if (UserBB == DefBB) continue;

5490

5491

// For now only apply this when the splat is used by a shift instruction.

5492

if (!UI->isShift()) continue;

5493

5494

// Everything checks out, sink the shuffle if the user's block doesn't

5495

// already have a copy.

5496

Instruction *&InsertedShuffle = InsertedShuffles[UserBB];

5497

5498

if (!InsertedShuffle) {

5499

BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();

5500

5501

InsertedShuffle =

5502

new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),

5503

SVI->getOperand(2), "", &*InsertPt);

5504

}

5505

5506

UI->replaceUsesOfWith(SVI, InsertedShuffle);

5507

MadeChange = true;

5508

}

5509

5510

// If we removed all uses, nuke the shuffle.

5511

if (SVI->use_empty()) {

5512

SVI->eraseFromParent();

5513

MadeChange = true;

5514

}

5515

5516

return MadeChange;

5517

}

5518

5519

bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {

5520

if (!TLI || !DL)

5521

return false;

5522

5523

Value *Cond = SI->getCondition();

5524

Type *OldType = Cond->getType();

5525

LLVMContext &Context = Cond->getContext();

5526

MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));

5527

unsigned RegWidth = RegType.getSizeInBits();

5528

5529

if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())

5530

return false;

5531

5532

// If the register width is greater than the type width, expand the condition

5533

// of the switch instruction and each case constant to the width of the

5534

// register. By widening the type of the switch condition, subsequent

5535

// comparisons (for case comparisons) will not need to be extended to the

5536

// preferred register width, so we will potentially eliminate N-1 extends,

5537

// where N is the number of cases in the switch.

5538

auto *NewType = Type::getIntNTy(Context, RegWidth);

5539

5540

// Zero-extend the switch condition and case constants unless the switch

5541

// condition is a function argument that is already being sign-extended.

5542

// In that case, we can avoid an unnecessary mask/extension by sign-extending

5543

// everything instead.

5544

Instruction::CastOps ExtType = Instruction::ZExt;

5545

if (auto *Arg = dyn_cast<Argument>(Cond))

5546

if (Arg->hasSExtAttr())

5547

ExtType = Instruction::SExt;

5548

5549

auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);

5550

ExtInst->insertBefore(SI);

5551

SI->setCondition(ExtInst);

5552

for (auto Case : SI->cases()) {

5553

APInt NarrowConst = Case.getCaseValue()->getValue();

5554

APInt WideConst = (ExtType == Instruction::ZExt) ?

5555

NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);

5556

Case.setValue(ConstantInt::get(Context, WideConst));

5557

}

5558

5559

return true;

5560

}

5561

5562

5563

namespace {

5564

/// \brief Helper class to promote a scalar operation to a vector one.

5565

/// This class is used to move downward extractelement transition.

5566

/// E.g.,

5567

/// a = vector_op <2 x i32>

5568

/// b = extractelement <2 x i32> a, i32 0

5569

/// c = scalar_op b

5570

/// store c

5571

///

5572

/// =>

5573

/// a = vector_op <2 x i32>

5574

/// c = vector_op a (equivalent to scalar_op on the related lane)

5575

/// * d = extractelement <2 x i32> c, i32 0

5576

/// * store d

5577

/// Assuming both extractelement and store can be combine, we get rid of the

5578

/// transition.

5579

class VectorPromoteHelper {

5580

/// DataLayout associated with the current module.

5581

const DataLayout &DL;

5582

5583

/// Used to perform some checks on the legality of vector operations.

5584

const TargetLowering &TLI;

5585

5586

/// Used to estimated the cost of the promoted chain.

5587

const TargetTransformInfo &TTI;

5588

5589

/// The transition being moved downwards.

5590

Instruction *Transition;

5591

/// The sequence of instructions to be promoted.

5592

SmallVector<Instruction *, 4> InstsToBePromoted;

5593

/// Cost of combining a store and an extract.

5594

unsigned StoreExtractCombineCost;

5595

/// Instruction that will be combined with the transition.

5596

Instruction *CombineInst;

5597

5598

/// \brief The instruction that represents the current end of the transition.

5599

/// Since we are faking the promotion until we reach the end of the chain

5600

/// of computation, we need a way to get the current end of the transition.

5601

Instruction *getEndOfTransition() const {

5602

if (InstsToBePromoted.empty())

5603

return Transition;

5604

return InstsToBePromoted.back();

5605

}

5606

5607

/// \brief Return the index of the original value in the transition.

5608

/// E.g., for "extractelement <2 x i32> c, i32 1" the original value,

5609

/// c, is at index 0.

5610

unsigned getTransitionOriginalValueIdx() const {

5611

assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5612, __PRETTY_FUNCTION__))

5612

"Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5612, __PRETTY_FUNCTION__));

5613

return 0;

5614

}

5615

5616

/// \brief Return the index of the index in the transition.

5617

/// E.g., for "extractelement <2 x i32> c, i32 0" the index

5618

/// is at index 1.

5619

unsigned getTransitionIdx() const {

5620

5621

5622

return 1;

5623

}

5624

5625

/// \brief Get the type of the transition.

5626

/// This is the type of the original value.

5627

/// E.g., for "extractelement <2 x i32> c, i32 1" the type of the

5628

/// transition is <2 x i32>.

5629

Type *getTransitionType() const {

5630

return Transition->getOperand(getTransitionOriginalValueIdx())->getType();

5631

}

5632

5633

/// \brief Promote \p ToBePromoted by moving \p Def downward through.

5634

/// I.e., we have the following sequence:

5635

/// Def = Transition <ty1> a to <ty2>

5636

/// b = ToBePromoted <ty2> Def, ...

5637

/// =>

5638

/// b = ToBePromoted <ty1> a, ...

5639

/// Def = Transition <ty1> ToBePromoted to <ty2>

5640

void promoteImpl(Instruction *ToBePromoted);

5641

5642

/// \brief Check whether or not it is profitable to promote all the

5643

/// instructions enqueued to be promoted.

5644

bool isProfitableToPromote() {

5645

Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());

5646

unsigned Index = isa<ConstantInt>(ValIdx)

5647

? cast<ConstantInt>(ValIdx)->getZExtValue()

5648

: -1;

5649

Type *PromotedType = getTransitionType();

5650

5651

StoreInst *ST = cast<StoreInst>(CombineInst);

5652

unsigned AS = ST->getPointerAddressSpace();

5653

unsigned Align = ST->getAlignment();

5654

// Check if this store is supported.

5655

if (!TLI.allowsMisalignedMemoryAccesses(

5656

TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,

5657

Align)) {

5658

// If this is not supported, there is no way we can combine

5659

// the extract with the store.

5660

return false;

5661

}

5662

5663

// The scalar chain of computation has to pay for the transition

5664

// scalar to vector.

5665

// The vector chain has to account for the combining cost.

5666

uint64_t ScalarCost =

5667

TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);

5668

uint64_t VectorCost = StoreExtractCombineCost;

5669

for (const auto &Inst : InstsToBePromoted) {

5670

// Compute the cost.

5671

// By construction, all instructions being promoted are arithmetic ones.

5672

// Moreover, one argument is a constant that can be viewed as a splat

5673

// constant.

5674

Value *Arg0 = Inst->getOperand(0);

5675

bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||

5676

isa<ConstantFP>(Arg0);

5677

TargetTransformInfo::OperandValueKind Arg0OVK =

5678

IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue

5679

: TargetTransformInfo::OK_AnyValue;

5680

TargetTransformInfo::OperandValueKind Arg1OVK =

5681

!IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue

5682

: TargetTransformInfo::OK_AnyValue;

5683

ScalarCost += TTI.getArithmeticInstrCost(

5684

Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);

5685

VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,

5686

Arg0OVK, Arg1OVK);

5687

}

5688

DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)

5689

<< ScalarCost << "\nVector: " << VectorCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false);

5690

return ScalarCost > VectorCost;

5691

}

5692

5693

/// \brief Generate a constant vector with \p Val with the same

5694

/// number of elements as the transition.

5695

/// \p UseSplat defines whether or not \p Val should be replicated

5696

/// across the whole vector.

5697

/// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,

5698

/// otherwise we generate a vector with as many undef as possible:

5699

/// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only

5700

/// used at the index of the extract.

5701

Value *getConstantVector(Constant *Val, bool UseSplat) const {

5702

unsigned ExtractIdx = UINT_MAX(2147483647 *2U +1U);

5703

if (!UseSplat) {

5704

// If we cannot determine where the constant must be, we have to

5705

// use a splat constant.

5706

Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());

5707

if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))

5708

ExtractIdx = CstVal->getSExtValue();

5709

else

5710

UseSplat = true;

5711

}

5712

5713

unsigned End = getTransitionType()->getVectorNumElements();

5714

if (UseSplat)

5715

return ConstantVector::getSplat(End, Val);

5716

5717

SmallVector<Constant *, 4> ConstVec;

5718

UndefValue *UndefVal = UndefValue::get(Val->getType());

5719

for (unsigned Idx = 0; Idx != End; ++Idx) {

5720

if (Idx == ExtractIdx)

5721

ConstVec.push_back(Val);

5722

else

5723

ConstVec.push_back(UndefVal);

5724

}

5725

return ConstantVector::get(ConstVec);

5726

}

5727

5728

/// \brief Check if promoting to a vector type an operand at \p OperandIdx

5729

/// in \p Use can trigger undefined behavior.

5730

static bool canCauseUndefinedBehavior(const Instruction *Use,

5731

unsigned OperandIdx) {

5732

// This is not safe to introduce undef when the operand is on

5733

// the right hand side of a division-like instruction.

5734

if (OperandIdx != 1)

5735

return false;

5736

switch (Use->getOpcode()) {

5737

default:

5738

return false;

5739

case Instruction::SDiv:

5740

case Instruction::UDiv:

5741

case Instruction::SRem:

5742

case Instruction::URem:

5743

return true;

5744

case Instruction::FDiv:

5745

case Instruction::FRem:

5746

return !Use->hasNoNaNs();

5747

}

5748

llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5748);

5749

}

5750

5751

public:

5752

VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,

5753

const TargetTransformInfo &TTI, Instruction *Transition,

5754

unsigned CombineCost)

5755

: DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),

5756

StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {

5757

assert(Transition && "Do not know how to promote null")((Transition && "Do not know how to promote null") ? static_cast
<void> (0) : __assert_fail ("Transition && \"Do not know how to promote null\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5757, __PRETTY_FUNCTION__));

5758

}

5759

5760

/// \brief Check if we can promote \p ToBePromoted to \p Type.

5761

bool canPromote(const Instruction *ToBePromoted) const {

5762

// We could support CastInst too.

5763

return isa<BinaryOperator>(ToBePromoted);

5764

}

5765

5766

/// \brief Check if it is profitable to promote \p ToBePromoted

5767

/// by moving downward the transition through.

5768

bool shouldPromote(const Instruction *ToBePromoted) const {

5769

// Promote only if all the operands can be statically expanded.

5770

// Indeed, we do not want to introduce any new kind of transitions.

5771

for (const Use &U : ToBePromoted->operands()) {

5772

const Value *Val = U.get();

5773

if (Val == getEndOfTransition()) {

5774

// If the use is a division and the transition is on the rhs,

5775

// we cannot promote the operation, otherwise we may create a

5776

// division by zero.

5777

if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))

5778

return false;

5779

continue;

5780

}

5781

if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&

5782

!isa<ConstantFP>(Val))

5783

return false;

5784

}

5785

// Check that the resulting operation is legal.

5786

int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());

5787

if (!ISDOpcode)

5788

return false;

5789

return StressStoreExtract ||

5790

TLI.isOperationLegalOrCustom(

5791

ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));

5792

}

5793

5794

/// \brief Check whether or not \p Use can be combined

5795

/// with the transition.

5796

/// I.e., is it possible to do Use(Transition) => AnotherUse?

5797

bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }

5798

5799

/// \brief Record \p ToBePromoted as part of the chain to be promoted.

5800

void enqueueForPromotion(Instruction *ToBePromoted) {

5801

InstsToBePromoted.push_back(ToBePromoted);

5802

}

5803

5804

/// \brief Set the instruction that will be combined with the transition.

5805

void recordCombineInstruction(Instruction *ToBeCombined) {

5806

assert(canCombine(ToBeCombined) && "Unsupported instruction to combine")((canCombine(ToBeCombined) && "Unsupported instruction to combine"
) ? static_cast<void> (0) : __assert_fail ("canCombine(ToBeCombined) && \"Unsupported instruction to combine\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5806, __PRETTY_FUNCTION__));

5807

CombineInst = ToBeCombined;

5808

}

5809

5810

/// \brief Promote all the instructions enqueued for promotion if it is

5811

/// is profitable.

5812

/// \return True if the promotion happened, false otherwise.

5813

bool promote() {

5814

// Check if there is something to promote.

5815

// Right now, if we do not have anything to combine with,

5816

// we assume the promotion is not profitable.

5817

if (InstsToBePromoted.empty() || !CombineInst)

5818

return false;

5819

5820

// Check cost.

5821

if (!StressStoreExtract && !isProfitableToPromote())

5822

return false;

5823

5824

// Promote.

5825

for (auto &ToBePromoted : InstsToBePromoted)

5826

promoteImpl(ToBePromoted);

5827

InstsToBePromoted.clear();

5828

return true;

5829

}

5830

};

5831

} // End of anonymous namespace.

5832

5833

void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {

5834

// At this point, we know that all the operands of ToBePromoted but Def

5835

// can be statically promoted.

5836

// For Def, we need to use its parameter in ToBePromoted:

5837

// b = ToBePromoted ty1 a

5838

// Def = Transition ty1 b to ty2

5839

// Move the transition down.

5840

// 1. Replace all uses of the promoted operation by the transition.

5841

// = ... b => = ... Def.

5842

assert(ToBePromoted->getType() == Transition->getType() &&((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__))

5843

"The type of the result of the transition does not match "((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__))

5844

"the final type")((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__));

5845

ToBePromoted->replaceAllUsesWith(Transition);

5846

// 2. Update the type of the uses.

5847

// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.

5848

Type *TransitionTy = getTransitionType();

5849

ToBePromoted->mutateType(TransitionTy);

5850

// 3. Update all the operands of the promoted operation with promoted

5851

// operands.

5852

// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.

5853

for (Use &U : ToBePromoted->operands()) {

5854

Value *Val = U.get();

5855

Value *NewVal = nullptr;

5856

if (Val == Transition)

5857

NewVal = Transition->getOperand(getTransitionOriginalValueIdx());

5858

else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||

5859

isa<ConstantFP>(Val)) {

5860

// Use a splat constant if it is not safe to use undef.

5861

NewVal = getConstantVector(

5862

cast<Constant>(Val),

5863

isa<UndefValue>(Val) ||

5864

canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));

5865

} else

5866

llvm_unreachable("Did you modified shouldPromote and forgot to update "::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5867)

5867

"this?")::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5867);

5868

ToBePromoted->setOperand(U.getOperandNo(), NewVal);

5869

}

5870

Transition->removeFromParent();

5871

Transition->insertAfter(ToBePromoted);

5872

Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);

5873

}

5874

5875

/// Some targets can do store(extractelement) with one instruction.

5876

/// Try to push the extractelement towards the stores when the target

5877

/// has this feature and this is profitable.

5878

bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {

5879

unsigned CombineCost = UINT_MAX(2147483647 *2U +1U);

5880

if (DisableStoreExtract || !TLI ||

5881

(!StressStoreExtract &&

5882

!TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),

5883

Inst->getOperand(1), CombineCost)))

5884

return false;

5885

5886

// At this point we know that Inst is a vector to scalar transition.

5887

// Try to move it down the def-use chain, until:

5888

// - We can combine the transition with its single use

5889

// => we got rid of the transition.

5890

// - We escape the current basic block

5891

// => we would need to check that we are moving it at a cheaper place and

5892

// we do not do that for now.

5893

BasicBlock *Parent = Inst->getParent();

5894

DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Found an interesting transition: "
<< *Inst << '\n'; } } while (false);

5895

VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);

5896

// If the transition has more than one use, assume this is not going to be

5897

// beneficial.

5898

while (Inst->hasOneUse()) {

5899

Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());

5900

DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Use: " << *ToBePromoted
<< '\n'; } } while (false);

5901

5902

if (ToBePromoted->getParent() != Parent) {

5903

DEBUG(dbgs() << "Instruction to promote is in a different block ("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)

5904

<< ToBePromoted->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)

5905

<< ") than the transition (" << Parent->getName() << ").\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false);

5906

return false;

5907

}

5908

5909

if (VPH.canCombine(ToBePromoted)) {

5910

DEBUG(dbgs() << "Assume " << *Inst << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)

5911

<< "will be combined with: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false);

5912

VPH.recordCombineInstruction(ToBePromoted);

5913

bool Changed = VPH.promote();

5914

NumStoreExtractExposed += Changed;

5915

return Changed;

5916

}

5917

5918

DEBUG(dbgs() << "Try promoting.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Try promoting.\n"; } }
while (false);

5919

if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))

5920

return false;

5921

5922

DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Promoting is possible... Enqueue for promotion!\n"
; } } while (false);

5923

5924

VPH.enqueueForPromotion(ToBePromoted);

5925

Inst = ToBePromoted;

5926

}

5927

return false;

5928

}

5929

5930

/// For the instruction sequence of store below, F and I values

5931

/// are bundled together as an i64 value before being stored into memory.

5932

/// Sometimes it is more efficent to generate separate stores for F and I,

5933

/// which can remove the bitwise instructions or sink them to colder places.

5934

///

5935

/// (store (or (zext (bitcast F to i32) to i64),

5936

/// (shl (zext I to i64), 32)), addr) -->

5937

/// (store F, addr) and (store I, addr+4)

5938

///

5939

/// Similarly, splitting for other merged store can also be beneficial, like:

5940

/// For pair of {i32, i32}, i64 store --> two i32 stores.

5941

/// For pair of {i32, i16}, i64 store --> two i32 stores.

5942

/// For pair of {i16, i16}, i32 store --> two i16 stores.

5943

/// For pair of {i16, i8}, i32 store --> two i16 stores.

5944

/// For pair of {i8, i8}, i16 store --> two i8 stores.

5945

///

5946

/// We allow each target to determine specifically which kind of splitting is

5947

/// supported.

5948

///

5949

/// The store patterns are commonly seen from the simple code snippet below

5950

/// if only std::make_pair(...) is sroa transformed before inlined into hoo.

5951

/// void goo(const std::pair<int, float> &);

5952

/// hoo() {

5953

/// ...

5954

/// goo(std::make_pair(tmp, ftmp));

5955

/// ...

5956

/// }

5957

///

5958

/// Although we already have similar splitting in DAG Combine, we duplicate

5959

/// it in CodeGenPrepare to catch the case in which pattern is across

5960

/// multiple BBs. The logic in DAG Combine is kept to catch case generated

5961

/// during code expansion.

5962

static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,

5963

const TargetLowering &TLI) {

5964

// Handle simple but common cases only.

5965

Type *StoreType = SI.getValueOperand()->getType();

5966

if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||

5967

DL.getTypeSizeInBits(StoreType) == 0)

5968

return false;

5969

5970

unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;

5971

Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);

5972

if (DL.getTypeStoreSizeInBits(SplitStoreType) !=

5973

DL.getTypeSizeInBits(SplitStoreType))

5974

return false;

5975

5976

// Match the following patterns:

5977

// (store (or (zext LValue to i64),

5978

// (shl (zext HValue to i64), 32)), HalfValBitSize)

5979

// or

5980

// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)

5981

// (zext LValue to i64),

5982

// Expect both operands of OR and the first operand of SHL have only

5983

// one use.

5984

Value *LValue, *HValue;

5985

if (!match(SI.getValueOperand(),

5986

m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),

5987

m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),

5988

m_SpecificInt(HalfValBitSize))))))

5989

return false;

5990

5991

// Check LValue and HValue are int with size less or equal than 32.

5992

if (!LValue->getType()->isIntegerTy() ||

5993

DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||

5994

!HValue->getType()->isIntegerTy() ||

5995

DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)

5996

return false;

5997

5998

// If LValue/HValue is a bitcast instruction, use the EVT before bitcast

5999

// as the input of target query.

6000

auto *LBC = dyn_cast<BitCastInst>(LValue);

6001

auto *HBC = dyn_cast<BitCastInst>(HValue);

6002

EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())

6003

: EVT::getEVT(LValue->getType());

6004

EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())

6005

: EVT::getEVT(HValue->getType());

6006

if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))

6007

return false;

6008

6009

// Start to split store.

6010

IRBuilder<> Builder(SI.getContext());

6011

Builder.SetInsertPoint(&SI);

6012

6013

// If LValue/HValue is a bitcast in another BB, create a new one in current

6014

// BB so it may be merged with the splitted stores by dag combiner.

6015

if (LBC && LBC->getParent() != SI.getParent())

6016

LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());

6017

if (HBC && HBC->getParent() != SI.getParent())

6018

HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());

6019

6020

auto CreateSplitStore = [&](Value *V, bool Upper) {

6021

V = Builder.CreateZExtOrBitCast(V, SplitStoreType);

6022

Value *Addr = Builder.CreateBitCast(

6023

SI.getOperand(1),

6024

SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));

6025

if (Upper)

6026

Addr = Builder.CreateGEP(

6027

SplitStoreType, Addr,

6028

ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));

6029

Builder.CreateAlignedStore(

6030

V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());

6031

};

6032

6033

CreateSplitStore(LValue, false);

6034

CreateSplitStore(HValue, true);

6035

6036

// Delete the old store.

6037

SI.eraseFromParent();

6038

return true;

6039

}

6040

6041

bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {

6042

// Bail out if we inserted the instruction to prevent optimizations from

6043

// stepping on each other's toes.

6044

if (InsertedInsts.count(I))

6045

return false;

6046

6047

if (PHINode *P = dyn_cast<PHINode>(I)) {

6048

// It is possible for very late stage optimizations (such as SimplifyCFG)

6049

// to introduce PHI nodes too late to be cleaned up. If we detect such a

6050

// trivial PHI, go ahead and zap it here.

6051

if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {

6052

P->replaceAllUsesWith(V);

6053

P->eraseFromParent();

6054

++NumPHIsElim;

6055

return true;

6056

}

6057

return false;

6058

}

6059

6060

if (CastInst *CI = dyn_cast<CastInst>(I)) {

6061

// If the source of the cast is a constant, then this should have

6062

// already been constant folded. The only reason NOT to constant fold

6063

// it is if something (e.g. LSR) was careful to place the constant

6064

// evaluation in a block other than then one that uses it (e.g. to hoist

6065

// the address of globals out of a loop). If this is the case, we don't

6066

// want to forward-subst the cast.

6067

if (isa<Constant>(CI->getOperand(0)))

6068

return false;

6069

6070

if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))

6071

return true;

6072

6073

if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {

6074

/// Sink a zext or sext into its user blocks if the target type doesn't

6075

/// fit in one register

6076

if (TLI &&

6077

TLI->getTypeAction(CI->getContext(),

6078

TLI->getValueType(*DL, CI->getType())) ==

6079

TargetLowering::TypeExpandInteger) {

6080

return SinkCast(CI);

6081

} else {

6082

bool MadeChange = optimizeExt(I);

6083

return MadeChange | optimizeExtUses(I);

6084

}

6085

}

6086

return false;

6087

}

6088

6089

if (CmpInst *CI = dyn_cast<CmpInst>(I))

6090

if (!TLI || !TLI->hasMultipleConditionRegisters())

6091

return OptimizeCmpExpression(CI, TLI);

6092

6093

if (LoadInst *LI = dyn_cast<LoadInst>(I)) {

6094

LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);

6095

if (TLI) {

6096

bool Modified = optimizeLoadExt(LI);

6097

unsigned AS = LI->getPointerAddressSpace();

6098

Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);

6099

return Modified;

6100

}

6101

return false;

6102

}

6103

6104

if (StoreInst *SI = dyn_cast<StoreInst>(I)) {

6105

if (TLI && splitMergedValStore(*SI, *DL, *TLI))

6106

return true;

6107

SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);

6108

if (TLI) {

6109

unsigned AS = SI->getPointerAddressSpace();

6110

return optimizeMemoryInst(I, SI->getOperand(1),

6111

SI->getOperand(0)->getType(), AS);

6112

}

6113

return false;

6114

}

6115

6116

if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {

6117

unsigned AS = RMW->getPointerAddressSpace();

6118

return optimizeMemoryInst(I, RMW->getPointerOperand(),

6119

RMW->getType(), AS);

6120

}

6121

6122

if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {

6123

unsigned AS = CmpX->getPointerAddressSpace();

6124

return optimizeMemoryInst(I, CmpX->getPointerOperand(),

6125

CmpX->getCompareOperand()->getType(), AS);

6126

}

6127

6128

BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);

6129

6130

if (BinOp && (BinOp->getOpcode() == Instruction::And) &&

6131

EnableAndCmpSinking && TLI)

6132

return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);

6133

6134

if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||

6135

BinOp->getOpcode() == Instruction::LShr)) {

6136

ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));

6137

if (TLI && CI && TLI->hasExtractBitsInsn())

6138

return OptimizeExtractBits(BinOp, CI, *TLI, *DL);

6139

6140

return false;

6141

}

6142

6143

if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {

6144

if (GEPI->hasAllZeroIndices()) {

6145

/// The GEP operand must be a pointer, so must its result -> BitCast

6146

Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),

6147

GEPI->getName(), GEPI);

6148

GEPI->replaceAllUsesWith(NC);

6149

GEPI->eraseFromParent();

6150

++NumGEPsElim;

6151

optimizeInst(NC, ModifiedDT);

6152

return true;

6153

}

6154

return false;

6155

}

6156

6157

if (CallInst *CI = dyn_cast<CallInst>(I))

6158

return optimizeCallInst(CI, ModifiedDT);

6159

6160

if (SelectInst *SI = dyn_cast<SelectInst>(I))

6161

return optimizeSelectInst(SI);

6162

6163

if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))

6164

return optimizeShuffleVectorInst(SVI);

6165

6166

if (auto *Switch = dyn_cast<SwitchInst>(I))

6167

return optimizeSwitchInst(Switch);

6168

6169

if (isa<ExtractElementInst>(I))

6170

return optimizeExtractElementInst(I);

6171

6172

return false;

6173

}

6174

6175

/// Given an OR instruction, check to see if this is a bitreverse

6176

/// idiom. If so, insert the new intrinsic and return true.

6177

static bool makeBitReverse(Instruction &I, const DataLayout &DL,

6178

const TargetLowering &TLI) {

6179

if (!I.getType()->isIntegerTy() ||

6180

!TLI.isOperationLegalOrCustom(ISD::BITREVERSE,

6181

TLI.getValueType(DL, I.getType(), true)))

6182

return false;

6183

6184

SmallVector<Instruction*, 4> Insts;

6185

if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))

6186

return false;

6187

Instruction *LastInst = Insts.back();

6188

I.replaceAllUsesWith(LastInst);

6189

RecursivelyDeleteTriviallyDeadInstructions(&I);

6190

return true;

6191

}

6192

6193

// In this pass we look for GEP and cast instructions that are used

6194

// across basic blocks and rewrite them to improve basic-block-at-a-time

6195

// selection.

6196

bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {

6197

SunkAddrs.clear();

6198

bool MadeChange = false;

6199

6200

CurInstIterator = BB.begin();

6201

while (CurInstIterator != BB.end()) {

6202

MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);

6203

if (ModifiedDT)

6204

return true;

6205

}

6206

6207

bool MadeBitReverse = true;

6208

while (TLI && MadeBitReverse) {

6209

MadeBitReverse = false;

6210

for (auto &I : reverse(BB)) {

6211

if (makeBitReverse(I, *DL, *TLI)) {

6212

MadeBitReverse = MadeChange = true;

6213

ModifiedDT = true;

6214

break;

6215

}

6216

}

6217

}

6218

MadeChange |= dupRetToEnableTailCallOpts(&BB);

6219

6220

return MadeChange;

6221

}

6222

6223

// llvm.dbg.value is far away from the value then iSel may not be able

6224

// handle it properly. iSel will drop llvm.dbg.value if it can not

6225

// find a node corresponding to the value.

6226

bool CodeGenPrepare::placeDbgValues(Function &F) {

6227

bool MadeChange = false;

6228

for (BasicBlock &BB : F) {

6229

Instruction *PrevNonDbgInst = nullptr;

6230

for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {

6231

Instruction *Insn = &*BI++;

6232

DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);

6233

// Leave dbg.values that refer to an alloca alone. These

6234

// instrinsics describe the address of a variable (= the alloca)

6235

// being taken. They should not be moved next to the alloca

6236

// (and to the beginning of the scope), but rather stay close to

6237

// where said address is used.

6238

if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {

6239

PrevNonDbgInst = Insn;

6240

continue;

6241

}

6242

6243

Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());

6244

if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {

6245

// If VI is a phi in a block with an EHPad terminator, we can't insert

6246

// after it.

6247

if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())

6248

continue;

6249

DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false);

6250

DVI->removeFromParent();

6251

if (isa<PHINode>(VI))

6252

DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());

6253

else

6254

DVI->insertAfter(VI);

6255

MadeChange = true;

6256

++NumDbgValueMoved;

6257

}

6258

}

6259

}

6260

return MadeChange;

6261

}

6262

6263

/// \brief Scale down both weights to fit into uint32_t.

6264

static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {

6265

uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;

6266

uint32_t Scale = (NewMax / UINT32_MAX(4294967295U)) + 1;

6267

NewTrue = NewTrue / Scale;

6268

NewFalse = NewFalse / Scale;

6269

}

6270

6271

/// \brief Some targets prefer to split a conditional branch like:

6272

/// \code

6273

/// %0 = icmp ne i32 %a, 0

6274

/// %1 = icmp ne i32 %b, 0

6275

/// %or.cond = or i1 %0, %1

6276

/// br i1 %or.cond, label %TrueBB, label %FalseBB

6277

/// \endcode

6278

/// into multiple branch instructions like:

6279

/// \code

6280

/// bb1:

6281

/// %0 = icmp ne i32 %a, 0

6282

/// br i1 %0, label %TrueBB, label %bb2

6283

/// bb2:

6284

/// %1 = icmp ne i32 %b, 0

6285

/// br i1 %1, label %TrueBB, label %FalseBB

6286

/// \endcode

6287

/// This usually allows instruction selection to do even further optimizations

6288

/// and combine the compare with the branch instruction. Currently this is

6289

/// applied for targets which have "cheap" jump instructions.

6290

///

6291

/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.

6292

///

6293

bool CodeGenPrepare::splitBranchCondition(Function &F) {

6294

if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())

6295

return false;

6296

6297

bool MadeChange = false;

6298

for (auto &BB : F) {

6299

// Does this BB end with the following?

6300

// %cond1 = icmp|fcmp|binary instruction ...

6301

// %cond2 = icmp|fcmp|binary instruction ...

6302

// %cond.or = or|and i1 %cond1, cond2

6303

// br i1 %cond.or label %dest1, label %dest2"

6304

BinaryOperator *LogicOp;

6305

BasicBlock *TBB, *FBB;

6306

if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))

6307

continue;

6308

6309

auto *Br1 = cast<BranchInst>(BB.getTerminator());

6310

if (Br1->getMetadata(LLVMContext::MD_unpredictable))

6311

continue;

6312

6313

unsigned Opc;

6314

Value *Cond1, *Cond2;

6315

if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),

6316

m_OneUse(m_Value(Cond2)))))

6317

Opc = Instruction::And;

6318

else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),

6319

m_OneUse(m_Value(Cond2)))))

6320

Opc = Instruction::Or;

6321

else

6322

continue;

6323

6324

if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||

6325

!match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )

6326

continue;

6327

6328

DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Before branch condition splitting\n"
; BB.dump(); } } while (false);

6329

6330

// Create a new BB.

6331

auto TmpBB =

6332

BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",

6333

BB.getParent(), BB.getNextNode());

6334

6335

// Update original basic block by using the first condition directly by the

6336

// branch instruction and removing the no longer needed and/or instruction.

6337

Br1->setCondition(Cond1);

6338

LogicOp->eraseFromParent();

6339

6340

// Depending on the conditon we have to either replace the true or the false

6341

// successor of the original branch instruction.

6342

if (Opc == Instruction::And)

6343

Br1->setSuccessor(0, TmpBB);

6344

else

6345

Br1->setSuccessor(1, TmpBB);

6346

6347

// Fill in the new basic block.

6348

auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);

6349

if (auto *I = dyn_cast<Instruction>(Cond2)) {

6350

I->removeFromParent();

6351

I->insertBefore(Br2);

6352

}

6353

6354

// Update PHI nodes in both successors. The original BB needs to be

6355

// replaced in one successor's PHI nodes, because the branch comes now from

6356

// the newly generated BB (NewBB). In the other successor we need to add one

6357

// incoming edge to the PHI nodes, because both branch instructions target

6358

// now the same successor. Depending on the original branch condition

6359

// (and/or) we have to swap the successors (TrueDest, FalseDest), so that

6360

// we perform the correct update for the PHI nodes.

6361

// This doesn't change the successor order of the just created branch

6362

// instruction (or any other instruction).

6363

if (Opc == Instruction::Or)

6364

std::swap(TBB, FBB);

6365

6366

// Replace the old BB with the new BB.

6367

for (auto &I : *TBB) {

6368

PHINode *PN = dyn_cast<PHINode>(&I);

6369

if (!PN)

6370

break;

6371

int i;

6372

while ((i = PN->getBasicBlockIndex(&BB)) >= 0)

6373

PN->setIncomingBlock(i, TmpBB);

6374

}

6375

6376

// Add another incoming edge form the new BB.

6377

for (auto &I : *FBB) {

6378

PHINode *PN = dyn_cast<PHINode>(&I);

6379

if (!PN)

6380

break;

6381

auto *Val = PN->getIncomingValueForBlock(&BB);

6382

PN->addIncoming(Val, TmpBB);

6383

}

6384

6385

// Update the branch weights (from SelectionDAGBuilder::

6386

// FindMergedConditions).

6387

if (Opc == Instruction::Or) {

6388

// Codegen X | Y as:

6389

// BB1:

6390

// jmp_if_X TBB

6391

// jmp TmpBB

6392

// TmpBB:

6393

// jmp_if_Y TBB

6394

// jmp FBB

6395

6396

6397

// We have flexibility in setting Prob for BB1 and Prob for NewBB.

6398

// The requirement is that

6399

// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)

6400

// = TrueProb for orignal BB.

6401

// Assuming the orignal weights are A and B, one choice is to set BB1's

6402

// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice

6403

// assumes that

6404

// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.

6405

// Another choice is to assume TrueProb for BB1 equals to TrueProb for

6406

// TmpBB, but the math is more complicated.

6407

uint64_t TrueWeight, FalseWeight;

6408

if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {

6409

uint64_t NewTrueWeight = TrueWeight;

6410

uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;

6411

scaleWeights(NewTrueWeight, NewFalseWeight);

6412

Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())

6413

.createBranchWeights(TrueWeight, FalseWeight));

6414

6415

NewTrueWeight = TrueWeight;

6416

NewFalseWeight = 2 * FalseWeight;

6417

scaleWeights(NewTrueWeight, NewFalseWeight);

6418

Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())

6419

.createBranchWeights(TrueWeight, FalseWeight));

6420

}

6421

} else {

6422

// Codegen X & Y as:

6423

// BB1:

6424

// jmp_if_X TmpBB

6425

// jmp FBB

6426

// TmpBB:

6427

// jmp_if_Y TBB

6428

// jmp FBB

6429

6430

// This requires creation of TmpBB after CurBB.

6431

6432

// We have flexibility in setting Prob for BB1 and Prob for TmpBB.

6433

// The requirement is that

6434

// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)

6435

// = FalseProb for orignal BB.

6436

// Assuming the orignal weights are A and B, one choice is to set BB1's

6437

// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice

6438

// assumes that

6439

// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.

6440

uint64_t TrueWeight, FalseWeight;

6441

if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {

6442

uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;

6443

uint64_t NewFalseWeight = FalseWeight;

6444

scaleWeights(NewTrueWeight, NewFalseWeight);

6445

Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())

6446

.createBranchWeights(TrueWeight, FalseWeight));

6447

6448

NewTrueWeight = 2 * TrueWeight;

6449

NewFalseWeight = FalseWeight;

6450

scaleWeights(NewTrueWeight, NewFalseWeight);

6451

Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())

6452

.createBranchWeights(TrueWeight, FalseWeight));

6453

}

6454

}

6455

6456

// Note: No point in getting fancy here, since the DT info is never

6457

// available to CodeGenPrepare.

6458

ModifiedDT = true;

6459

6460

MadeChange = true;

6461

6462

DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)

6463

TmpBB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false);

6464

}

6465

return MadeChange;

6466

}