Bug Summary

File:lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Warning:line 932, column 17
1st function call argument is an uninitialized value

Annotated Source Code

1//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/MapVector.h"
13#include "llvm/ADT/PostOrderIterator.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/iterator_range.h"
19#include "llvm/Analysis/AliasAnalysis.h"
20#include "llvm/Analysis/MemoryLocation.h"
21#include "llvm/Analysis/OrderedBasicBlock.h"
22#include "llvm/Analysis/ScalarEvolution.h"
23#include "llvm/Analysis/TargetTransformInfo.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/Analysis/VectorUtils.h"
26#include "llvm/IR/Attributes.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/DerivedTypes.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/User.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/KnownBits.h"
45#include "llvm/Support/MathExtras.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/Transforms/Utils/Local.h"
48#include "llvm/Transforms/Vectorize.h"
49#include <algorithm>
50#include <cassert>
51#include <cstdlib>
52#include <tuple>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE"load-store-vectorizer" "load-store-vectorizer"
58
59STATISTIC(NumVectorInstructions, "Number of vector accesses generated")static llvm::Statistic NumVectorInstructions = {"load-store-vectorizer"
, "NumVectorInstructions", "Number of vector accesses generated"
, {0}, false}
;
60STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized")static llvm::Statistic NumScalarsVectorized = {"load-store-vectorizer"
, "NumScalarsVectorized", "Number of scalar accesses vectorized"
, {0}, false}
;
61
62// FIXME: Assuming stack alignment of 4 is always good enough
63static const unsigned StackAdjustedAlignment = 4;
64
65namespace {
66
67using InstrList = SmallVector<Instruction *, 8>;
68using InstrListMap = MapVector<Value *, InstrList>;
69
70class Vectorizer {
71 Function &F;
72 AliasAnalysis &AA;
73 DominatorTree &DT;
74 ScalarEvolution &SE;
75 TargetTransformInfo &TTI;
76 const DataLayout &DL;
77 IRBuilder<> Builder;
78
79public:
80 Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
81 ScalarEvolution &SE, TargetTransformInfo &TTI)
82 : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
83 DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
84
85 bool run();
86
87private:
88 Value *getPointerOperand(Value *I) const;
89
90 GetElementPtrInst *getSourceGEP(Value *Src) const;
91
92 unsigned getPointerAddressSpace(Value *I);
93
94 unsigned getAlignment(LoadInst *LI) const {
95 unsigned Align = LI->getAlignment();
96 if (Align != 0)
97 return Align;
98
99 return DL.getABITypeAlignment(LI->getType());
100 }
101
102 unsigned getAlignment(StoreInst *SI) const {
103 unsigned Align = SI->getAlignment();
104 if (Align != 0)
105 return Align;
106
107 return DL.getABITypeAlignment(SI->getValueOperand()->getType());
108 }
109
110 bool isConsecutiveAccess(Value *A, Value *B);
111
112 /// After vectorization, reorder the instructions that I depends on
113 /// (the instructions defining its operands), to ensure they dominate I.
114 void reorder(Instruction *I);
115
116 /// Returns the first and the last instructions in Chain.
117 std::pair<BasicBlock::iterator, BasicBlock::iterator>
118 getBoundaryInstrs(ArrayRef<Instruction *> Chain);
119
120 /// Erases the original instructions after vectorizing.
121 void eraseInstructions(ArrayRef<Instruction *> Chain);
122
123 /// "Legalize" the vector type that would be produced by combining \p
124 /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
125 /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
126 /// expected to have more than 4 elements.
127 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
128 splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
129
130 /// Finds the largest prefix of Chain that's vectorizable, checking for
131 /// intervening instructions which may affect the memory accessed by the
132 /// instructions within Chain.
133 ///
134 /// The elements of \p Chain must be all loads or all stores and must be in
135 /// address order.
136 ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
137
138 /// Collects load and store instructions to vectorize.
139 std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
140
141 /// Processes the collected instructions, the \p Map. The values of \p Map
142 /// should be all loads or all stores.
143 bool vectorizeChains(InstrListMap &Map);
144
145 /// Finds the load/stores to consecutive memory addresses and vectorizes them.
146 bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
147
148 /// Vectorizes the load instructions in Chain.
149 bool
150 vectorizeLoadChain(ArrayRef<Instruction *> Chain,
151 SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
152
153 /// Vectorizes the store instructions in Chain.
154 bool
155 vectorizeStoreChain(ArrayRef<Instruction *> Chain,
156 SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
157
158 /// Check if this load/store access is misaligned accesses.
159 bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
160 unsigned Alignment);
161};
162
163class LoadStoreVectorizer : public FunctionPass {
164public:
165 static char ID;
166
167 LoadStoreVectorizer() : FunctionPass(ID) {
168 initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
169 }
170
171 bool runOnFunction(Function &F) override;
172
173 StringRef getPassName() const override {
174 return "GPU Load and Store Vectorizer";
175 }
176
177 void getAnalysisUsage(AnalysisUsage &AU) const override {
178 AU.addRequired<AAResultsWrapperPass>();
179 AU.addRequired<ScalarEvolutionWrapperPass>();
180 AU.addRequired<DominatorTreeWrapperPass>();
181 AU.addRequired<TargetTransformInfoWrapperPass>();
182 AU.setPreservesCFG();
183 }
184};
185
186} // end anonymous namespace
187
188char LoadStoreVectorizer::ID = 0;
189
190INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,static void *initializeLoadStoreVectorizerPassOnce(PassRegistry
&Registry) {
191 "Vectorize load and Store instructions", false, false)static void *initializeLoadStoreVectorizerPassOnce(PassRegistry
&Registry) {
192INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)initializeSCEVAAWrapperPassPass(Registry);
193INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
194INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
195INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry);
196INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
197INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Vectorize load and store instructions"
, "load-store-vectorizer", &LoadStoreVectorizer::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoadStoreVectorizer>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoadStoreVectorizerPassFlag; void llvm
::initializeLoadStoreVectorizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeLoadStoreVectorizerPassFlag, initializeLoadStoreVectorizerPassOnce
, std::ref(Registry)); }
198 "Vectorize load and store instructions", false, false)PassInfo *PI = new PassInfo( "Vectorize load and store instructions"
, "load-store-vectorizer", &LoadStoreVectorizer::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoadStoreVectorizer>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoadStoreVectorizerPassFlag; void llvm
::initializeLoadStoreVectorizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeLoadStoreVectorizerPassFlag, initializeLoadStoreVectorizerPassOnce
, std::ref(Registry)); }
199
200Pass *llvm::createLoadStoreVectorizerPass() {
201 return new LoadStoreVectorizer();
202}
203
204// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
205// vectors of Instructions.
206static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
207 SmallVector<Value *, 8> VL(IL.begin(), IL.end());
208 propagateMetadata(I, VL);
209}
210
211bool LoadStoreVectorizer::runOnFunction(Function &F) {
212 // Don't vectorize when the attribute NoImplicitFloat is used.
213 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
1
Assuming the condition is false
2
Assuming the condition is false
3
Taking false branch
214 return false;
215
216 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
217 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
218 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
219 TargetTransformInfo &TTI =
220 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
221
222 Vectorizer V(F, AA, DT, SE, TTI);
223 return V.run();
4
Calling 'Vectorizer::run'
224}
225
226// Vectorizer Implementation
227bool Vectorizer::run() {
228 bool Changed = false;
229
230 // Scan the blocks in the function in post order.
231 for (BasicBlock *BB : post_order(&F)) {
232 InstrListMap LoadRefs, StoreRefs;
233 std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
234 Changed |= vectorizeChains(LoadRefs);
235 Changed |= vectorizeChains(StoreRefs);
5
Calling 'Vectorizer::vectorizeChains'
236 }
237
238 return Changed;
239}
240
241Value *Vectorizer::getPointerOperand(Value *I) const {
242 if (LoadInst *LI = dyn_cast<LoadInst>(I))
243 return LI->getPointerOperand();
244 if (StoreInst *SI = dyn_cast<StoreInst>(I))
245 return SI->getPointerOperand();
246 return nullptr;
247}
248
249unsigned Vectorizer::getPointerAddressSpace(Value *I) {
250 if (LoadInst *L = dyn_cast<LoadInst>(I))
251 return L->getPointerAddressSpace();
252 if (StoreInst *S = dyn_cast<StoreInst>(I))
253 return S->getPointerAddressSpace();
254 return -1;
255}
256
257GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
258 // First strip pointer bitcasts. Make sure pointee size is the same with
259 // and without casts.
260 // TODO: a stride set by the add instruction below can match the difference
261 // in pointee type size here. Currently it will not be vectorized.
262 Value *SrcPtr = getPointerOperand(Src);
263 Value *SrcBase = SrcPtr->stripPointerCasts();
264 if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) ==
265 DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType()))
266 SrcPtr = SrcBase;
267 return dyn_cast<GetElementPtrInst>(SrcPtr);
268}
269
270// FIXME: Merge with llvm::isConsecutiveAccess
271bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
272 Value *PtrA = getPointerOperand(A);
273 Value *PtrB = getPointerOperand(B);
274 unsigned ASA = getPointerAddressSpace(A);
275 unsigned ASB = getPointerAddressSpace(B);
276
277 // Check that the address spaces match and that the pointers are valid.
278 if (!PtrA || !PtrB || (ASA != ASB))
279 return false;
280
281 // Make sure that A and B are different pointers of the same size type.
282 unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
283 Type *PtrATy = PtrA->getType()->getPointerElementType();
284 Type *PtrBTy = PtrB->getType()->getPointerElementType();
285 if (PtrA == PtrB ||
286 DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
287 DL.getTypeStoreSize(PtrATy->getScalarType()) !=
288 DL.getTypeStoreSize(PtrBTy->getScalarType()))
289 return false;
290
291 APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
292
293 APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
294 PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
295 PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
296
297 APInt OffsetDelta = OffsetB - OffsetA;
298
299 // Check if they are based on the same pointer. That makes the offsets
300 // sufficient.
301 if (PtrA == PtrB)
302 return OffsetDelta == Size;
303
304 // Compute the necessary base pointer delta to have the necessary final delta
305 // equal to the size.
306 APInt BaseDelta = Size - OffsetDelta;
307
308 // Compute the distance with SCEV between the base pointers.
309 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
310 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
311 const SCEV *C = SE.getConstant(BaseDelta);
312 const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
313 if (X == PtrSCEVB)
314 return true;
315
316 // Sometimes even this doesn't work, because SCEV can't always see through
317 // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
318 // things the hard way.
319
320 // Look through GEPs after checking they're the same except for the last
321 // index.
322 GetElementPtrInst *GEPA = getSourceGEP(A);
323 GetElementPtrInst *GEPB = getSourceGEP(B);
324 if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
325 return false;
326 unsigned FinalIndex = GEPA->getNumOperands() - 1;
327 for (unsigned i = 0; i < FinalIndex; i++)
328 if (GEPA->getOperand(i) != GEPB->getOperand(i))
329 return false;
330
331 Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
332 Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
333 if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
334 OpA->getType() != OpB->getType())
335 return false;
336
337 // Only look through a ZExt/SExt.
338 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
339 return false;
340
341 bool Signed = isa<SExtInst>(OpA);
342
343 OpA = dyn_cast<Instruction>(OpA->getOperand(0));
344 OpB = dyn_cast<Instruction>(OpB->getOperand(0));
345 if (!OpA || !OpB || OpA->getType() != OpB->getType())
346 return false;
347
348 // Now we need to prove that adding 1 to OpA won't overflow.
349 bool Safe = false;
350 // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
351 // we're okay.
352 if (OpB->getOpcode() == Instruction::Add &&
353 isa<ConstantInt>(OpB->getOperand(1)) &&
354 cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
355 if (Signed)
356 Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
357 else
358 Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
359 }
360
361 unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
362
363 // Second attempt:
364 // If any bits are known to be zero other than the sign bit in OpA, we can
365 // add 1 to it while guaranteeing no overflow of any sort.
366 if (!Safe) {
367 KnownBits Known(BitWidth);
368 computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
369 if (Known.countMaxTrailingOnes() < (BitWidth - 1))
370 Safe = true;
371 }
372
373 if (!Safe)
374 return false;
375
376 const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
377 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
378 const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
379 const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
380 return X2 == OffsetSCEVB;
381}
382
383void Vectorizer::reorder(Instruction *I) {
384 OrderedBasicBlock OBB(I->getParent());
385 SmallPtrSet<Instruction *, 16> InstructionsToMove;
386 SmallVector<Instruction *, 16> Worklist;
387
388 Worklist.push_back(I);
389 while (!Worklist.empty()) {
390 Instruction *IW = Worklist.pop_back_val();
391 int NumOperands = IW->getNumOperands();
392 for (int i = 0; i < NumOperands; i++) {
393 Instruction *IM = dyn_cast<Instruction>(IW->getOperand(i));
394 if (!IM || IM->getOpcode() == Instruction::PHI)
395 continue;
396
397 // If IM is in another BB, no need to move it, because this pass only
398 // vectorizes instructions within one BB.
399 if (IM->getParent() != I->getParent())
400 continue;
401
402 if (!OBB.dominates(IM, I)) {
403 InstructionsToMove.insert(IM);
404 Worklist.push_back(IM);
405 }
406 }
407 }
408
409 // All instructions to move should follow I. Start from I, not from begin().
410 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
411 ++BBI) {
412 if (!InstructionsToMove.count(&*BBI))
413 continue;
414 Instruction *IM = &*BBI;
415 --BBI;
416 IM->removeFromParent();
417 IM->insertBefore(I);
418 }
419}
420
421std::pair<BasicBlock::iterator, BasicBlock::iterator>
422Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
423 Instruction *C0 = Chain[0];
424 BasicBlock::iterator FirstInstr = C0->getIterator();
425 BasicBlock::iterator LastInstr = C0->getIterator();
426
427 BasicBlock *BB = C0->getParent();
428 unsigned NumFound = 0;
429 for (Instruction &I : *BB) {
430 if (!is_contained(Chain, &I))
431 continue;
432
433 ++NumFound;
434 if (NumFound == 1) {
435 FirstInstr = I.getIterator();
436 }
437 if (NumFound == Chain.size()) {
438 LastInstr = I.getIterator();
439 break;
440 }
441 }
442
443 // Range is [first, last).
444 return std::make_pair(FirstInstr, ++LastInstr);
445}
446
447void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
448 SmallVector<Instruction *, 16> Instrs;
449 for (Instruction *I : Chain) {
450 Value *PtrOperand = getPointerOperand(I);
451 assert(PtrOperand && "Instruction must have a pointer operand.")((PtrOperand && "Instruction must have a pointer operand."
) ? static_cast<void> (0) : __assert_fail ("PtrOperand && \"Instruction must have a pointer operand.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 451, __PRETTY_FUNCTION__))
;
452 Instrs.push_back(I);
453 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
454 Instrs.push_back(GEP);
455 }
456
457 // Erase instructions.
458 for (Instruction *I : Instrs)
459 if (I->use_empty())
460 I->eraseFromParent();
461}
462
463std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
464Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
465 unsigned ElementSizeBits) {
466 unsigned ElementSizeBytes = ElementSizeBits / 8;
467 unsigned SizeBytes = ElementSizeBytes * Chain.size();
468 unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
469 if (NumLeft == Chain.size()) {
470 if ((NumLeft & 1) == 0)
471 NumLeft /= 2; // Split even in half
472 else
473 --NumLeft; // Split off last element
474 } else if (NumLeft == 0)
475 NumLeft = 1;
476 return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
477}
478
479ArrayRef<Instruction *>
480Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
481 // These are in BB order, unlike Chain, which is in address order.
482 SmallVector<Instruction *, 16> MemoryInstrs;
483 SmallVector<Instruction *, 16> ChainInstrs;
484
485 bool IsLoadChain = isa<LoadInst>(Chain[0]);
486 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
487 for (Instruction *I : Chain) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
488 if (IsLoadChain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
489 assert(isa<LoadInst>(I) &&do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
490 "All elements of Chain must be loads, or all must be stores.");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
491 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
492 assert(isa<StoreInst>(I) &&do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
493 "All elements of Chain must be loads, or all must be stores.");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
494 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
495 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 490, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 493, __PRETTY_FUNCTION__)); } }; } } while (false)
;
496
497 for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
498 if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
499 if (!is_contained(Chain, &I))
500 MemoryInstrs.push_back(&I);
501 else
502 ChainInstrs.push_back(&I);
503 } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
504 DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-write/throw operation: "
<< I << '\n'; } } while (false)
;
505 break;
506 } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
507 DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-read/write/throw operation: "
<< I << '\n'; } } while (false)
508 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-read/write/throw operation: "
<< I << '\n'; } } while (false)
;
509 break;
510 }
511 }
512
513 OrderedBasicBlock OBB(Chain[0]->getParent());
514
515 // Loop until we find an instruction in ChainInstrs that we can't vectorize.
516 unsigned ChainInstrIdx = 0;
517 Instruction *BarrierMemoryInstr = nullptr;
518
519 for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
520 Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
521
522 // If a barrier memory instruction was found, chain instructions that follow
523 // will not be added to the valid prefix.
524 if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
525 break;
526
527 // Check (in BB order) if any instruction prevents ChainInstr from being
528 // vectorized. Find and store the first such "conflicting" instruction.
529 for (Instruction *MemInstr : MemoryInstrs) {
530 // If a barrier memory instruction was found, do not check past it.
531 if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
532 break;
533
534 if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
535 continue;
536
537 // We can ignore the alias as long as the load comes before the store,
538 // because that means we won't be moving the load past the store to
539 // vectorize it (the vectorized load is inserted at the location of the
540 // first load in the chain).
541 if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
542 OBB.dominates(ChainInstr, MemInstr))
543 continue;
544
545 // Same case, but in reverse.
546 if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
547 OBB.dominates(MemInstr, ChainInstr))
548 continue;
549
550 if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
551 MemoryLocation::get(ChainInstr))) {
552 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
553 dbgs() << "LSV: Found alias:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
554 " Aliasing instruction and pointer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
555 << " " << *MemInstr << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
556 << " " << *getPointerOperand(MemInstr) << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
557 << " Aliased instruction and pointer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
558 << " " << *ChainInstr << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
559 << " " << *getPointerOperand(ChainInstr) << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
560 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
;
561 // Save this aliasing memory instruction as a barrier, but allow other
562 // instructions that precede the barrier to be vectorized with this one.
563 BarrierMemoryInstr = MemInstr;
564 break;
565 }
566 }
567 // Continue the search only for store chains, since vectorizing stores that
568 // precede an aliasing load is valid. Conversely, vectorizing loads is valid
569 // up to an aliasing store, but should not pull loads from further down in
570 // the basic block.
571 if (IsLoadChain && BarrierMemoryInstr) {
572 // The BarrierMemoryInstr is a store that precedes ChainInstr.
573 assert(OBB.dominates(BarrierMemoryInstr, ChainInstr))((OBB.dominates(BarrierMemoryInstr, ChainInstr)) ? static_cast
<void> (0) : __assert_fail ("OBB.dominates(BarrierMemoryInstr, ChainInstr)"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 573, __PRETTY_FUNCTION__))
;
574 break;
575 }
576 }
577
578 // Find the largest prefix of Chain whose elements are all in
579 // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
580 // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
581 // order.)
582 SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
583 ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
584 unsigned ChainIdx = 0;
585 for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
586 if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
587 break;
588 }
589 return Chain.slice(0, ChainIdx);
590}
591
592std::pair<InstrListMap, InstrListMap>
593Vectorizer::collectInstructions(BasicBlock *BB) {
594 InstrListMap LoadRefs;
595 InstrListMap StoreRefs;
596
597 for (Instruction &I : *BB) {
598 if (!I.mayReadOrWriteMemory())
599 continue;
600
601 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
602 if (!LI->isSimple())
603 continue;
604
605 // Skip if it's not legal.
606 if (!TTI.isLegalToVectorizeLoad(LI))
607 continue;
608
609 Type *Ty = LI->getType();
610 if (!VectorType::isValidElementType(Ty->getScalarType()))
611 continue;
612
613 // Skip weird non-byte sizes. They probably aren't worth the effort of
614 // handling correctly.
615 unsigned TySize = DL.getTypeSizeInBits(Ty);
616 if (TySize < 8)
617 continue;
618
619 Value *Ptr = LI->getPointerOperand();
620 unsigned AS = Ptr->getType()->getPointerAddressSpace();
621 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
622
623 // No point in looking at these if they're too big to vectorize.
624 if (TySize > VecRegSize / 2)
625 continue;
626
627 // Make sure all the users of a vector are constant-index extracts.
628 if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
629 const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
630 return EEI && isa<ConstantInt>(EEI->getOperand(1));
631 }))
632 continue;
633
634 // Save the load locations.
635 Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
636 LoadRefs[ObjPtr].push_back(LI);
637 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
638 if (!SI->isSimple())
639 continue;
640
641 // Skip if it's not legal.
642 if (!TTI.isLegalToVectorizeStore(SI))
643 continue;
644
645 Type *Ty = SI->getValueOperand()->getType();
646 if (!VectorType::isValidElementType(Ty->getScalarType()))
647 continue;
648
649 // Skip weird non-byte sizes. They probably aren't worth the effort of
650 // handling correctly.
651 unsigned TySize = DL.getTypeSizeInBits(Ty);
652 if (TySize < 8)
653 continue;
654
655 Value *Ptr = SI->getPointerOperand();
656 unsigned AS = Ptr->getType()->getPointerAddressSpace();
657 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
658 if (TySize > VecRegSize / 2)
659 continue;
660
661 if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
662 const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
663 return EEI && isa<ConstantInt>(EEI->getOperand(1));
664 }))
665 continue;
666
667 // Save store location.
668 Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
669 StoreRefs[ObjPtr].push_back(SI);
670 }
671 }
672
673 return {LoadRefs, StoreRefs};
674}
675
676bool Vectorizer::vectorizeChains(InstrListMap &Map) {
677 bool Changed = false;
678
679 for (const std::pair<Value *, InstrList> &Chain : Map) {
680 unsigned Size = Chain.second.size();
681 if (Size < 2)
6
Assuming 'Size' is >= 2
7
Taking false branch
682 continue;
683
684 DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Analyzing a chain of length "
<< Size << ".\n"; } } while (false)
;
685
686 // Process the stores in chunks of 64.
687 for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
8
Loop condition is true. Entering loop body
688 unsigned Len = std::min<unsigned>(CE - CI, 64);
689 ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
690 Changed |= vectorizeInstructions(Chunk);
9
Calling 'Vectorizer::vectorizeInstructions'
691 }
692 }
693
694 return Changed;
695}
696
697bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
698 DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Vectorizing "
<< Instrs.size() << " instructions.\n"; } } while
(false)
;
699 SmallVector<int, 16> Heads, Tails;
700 int ConsecutiveChain[64];
701
702 // Do a quadratic search on all of the given stores and find all of the pairs
703 // of stores that follow each other.
704 for (int i = 0, e = Instrs.size(); i < e; ++i) {
10
Assuming 'i' is >= 'e'
11
Loop condition is false. Execution continues on line 725
705 ConsecutiveChain[i] = -1;
706 for (int j = e - 1; j >= 0; --j) {
707 if (i == j)
708 continue;
709
710 if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
711 if (ConsecutiveChain[i] != -1) {
712 int CurDistance = std::abs(ConsecutiveChain[i] - i);
713 int NewDistance = std::abs(ConsecutiveChain[i] - j);
714 if (j < i || NewDistance > CurDistance)
715 continue; // Should not insert.
716 }
717
718 Tails.push_back(j);
719 Heads.push_back(i);
720 ConsecutiveChain[i] = j;
721 }
722 }
723 }
724
725 bool Changed = false;
726 SmallPtrSet<Instruction *, 16> InstructionsProcessed;
727
728 for (int Head : Heads) {
12
Assuming '__begin' is not equal to '__end'
729 if (InstructionsProcessed.count(Instrs[Head]))
13
Assuming the condition is false
14
Taking false branch
20
Assuming the condition is false
21
Taking false branch
27
Assuming the condition is false
28
Taking false branch
34
Assuming the condition is false
35
Taking false branch
730 continue;
731 bool LongerChainExists = false;
732 for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
15
Assuming the condition is false
16
Loop condition is false. Execution continues on line 738
22
Assuming the condition is false
23
Loop condition is false. Execution continues on line 738
29
Assuming the condition is false
30
Loop condition is false. Execution continues on line 738
36
Assuming the condition is false
37
Loop condition is false. Execution continues on line 738
733 if (Head == Tails[TIt] &&
734 !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
735 LongerChainExists = true;
736 break;
737 }
738 if (LongerChainExists)
17
Taking false branch
24
Taking false branch
31
Taking false branch
38
Taking false branch
739 continue;
740
741 // We found an instr that starts a chain. Now follow the chain and try to
742 // vectorize it.
743 SmallVector<Instruction *, 16> Operands;
744 int I = Head;
745 while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
18
Assuming the condition is false
25
Assuming the condition is false
32
Assuming the condition is false
39
Assuming the condition is false
746 if (InstructionsProcessed.count(Instrs[I]))
747 break;
748
749 Operands.push_back(Instrs[I]);
750 I = ConsecutiveChain[I];
751 }
752
753 bool Vectorized = false;
754 if (isa<LoadInst>(*Operands.begin()))
19
Taking false branch
26
Taking false branch
33
Taking false branch
40
Taking true branch
755 Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
41
Calling 'Vectorizer::vectorizeLoadChain'
756 else
757 Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
758
759 Changed |= Vectorized;
760 }
761
762 return Changed;
763}
764
765bool Vectorizer::vectorizeStoreChain(
766 ArrayRef<Instruction *> Chain,
767 SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
768 StoreInst *S0 = cast<StoreInst>(Chain[0]);
769
770 // If the vector has an int element, default to int for the whole load.
771 Type *StoreTy;
772 for (Instruction *I : Chain) {
773 StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
774 if (StoreTy->isIntOrIntVectorTy())
775 break;
776
777 if (StoreTy->isPtrOrPtrVectorTy()) {
778 StoreTy = Type::getIntNTy(F.getParent()->getContext(),
779 DL.getTypeSizeInBits(StoreTy));
780 break;
781 }
782 }
783
784 unsigned Sz = DL.getTypeSizeInBits(StoreTy);
785 unsigned AS = S0->getPointerAddressSpace();
786 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
787 unsigned VF = VecRegSize / Sz;
788 unsigned ChainSize = Chain.size();
789 unsigned Alignment = getAlignment(S0);
790
791 if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
792 InstructionsProcessed->insert(Chain.begin(), Chain.end());
793 return false;
794 }
795
796 ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
797 if (NewChain.empty()) {
798 // No vectorization possible.
799 InstructionsProcessed->insert(Chain.begin(), Chain.end());
800 return false;
801 }
802 if (NewChain.size() == 1) {
803 // Failed after the first instruction. Discard it and try the smaller chain.
804 InstructionsProcessed->insert(NewChain.front());
805 return false;
806 }
807
808 // Update Chain to the valid vectorizable subchain.
809 Chain = NewChain;
810 ChainSize = Chain.size();
811
812 // Check if it's legal to vectorize this chain. If not, split the chain and
813 // try again.
814 unsigned EltSzInBytes = Sz / 8;
815 unsigned SzInBytes = EltSzInBytes * ChainSize;
816 if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
817 auto Chains = splitOddVectorElts(Chain, Sz);
818 return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
819 vectorizeStoreChain(Chains.second, InstructionsProcessed);
820 }
821
822 VectorType *VecTy;
823 VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
824 if (VecStoreTy)
825 VecTy = VectorType::get(StoreTy->getScalarType(),
826 Chain.size() * VecStoreTy->getNumElements());
827 else
828 VecTy = VectorType::get(StoreTy, Chain.size());
829
830 // If it's more than the max vector size or the target has a better
831 // vector factor, break it into two pieces.
832 unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
833 if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
834 DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
835 " Creating two separate arrays.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
;
836 return vectorizeStoreChain(Chain.slice(0, TargetVF),
837 InstructionsProcessed) |
838 vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
839 }
840
841 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
842 dbgs() << "LSV: Stores to vectorize:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
843 for (Instruction *I : Chain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
844 dbgs() << " " << *I << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
845 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
;
846
847 // We won't try again to vectorize the elements of the chain, regardless of
848 // whether we succeed below.
849 InstructionsProcessed->insert(Chain.begin(), Chain.end());
850
851 // If the store is going to be misaligned, don't vectorize it.
852 if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
853 if (S0->getPointerAddressSpace() != 0)
854 return false;
855
856 unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
857 StackAdjustedAlignment,
858 DL, S0, nullptr, &DT);
859 if (NewAlign < StackAdjustedAlignment)
860 return false;
861 }
862
863 BasicBlock::iterator First, Last;
864 std::tie(First, Last) = getBoundaryInstrs(Chain);
865 Builder.SetInsertPoint(&*Last);
866
867 Value *Vec = UndefValue::get(VecTy);
868
869 if (VecStoreTy) {
870 unsigned VecWidth = VecStoreTy->getNumElements();
871 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
872 StoreInst *Store = cast<StoreInst>(Chain[I]);
873 for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
874 unsigned NewIdx = J + I * VecWidth;
875 Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
876 Builder.getInt32(J));
877 if (Extract->getType() != StoreTy->getScalarType())
878 Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
879
880 Value *Insert =
881 Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
882 Vec = Insert;
883 }
884 }
885 } else {
886 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
887 StoreInst *Store = cast<StoreInst>(Chain[I]);
888 Value *Extract = Store->getValueOperand();
889 if (Extract->getType() != StoreTy->getScalarType())
890 Extract =
891 Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
892
893 Value *Insert =
894 Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
895 Vec = Insert;
896 }
897 }
898
899 // This cast is safe because Builder.CreateStore() always creates a bona fide
900 // StoreInst.
901 StoreInst *SI = cast<StoreInst>(
902 Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
903 VecTy->getPointerTo(AS))));
904 propagateMetadata(SI, Chain);
905 SI->setAlignment(Alignment);
906
907 eraseInstructions(Chain);
908 ++NumVectorInstructions;
909 NumScalarsVectorized += Chain.size();
910 return true;
911}
912
913bool Vectorizer::vectorizeLoadChain(
914 ArrayRef<Instruction *> Chain,
915 SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
916 LoadInst *L0 = cast<LoadInst>(Chain[0]);
917
918 // If the vector has an int element, default to int for the whole load.
919 Type *LoadTy;
42
'LoadTy' declared without an initial value
920 for (const auto &V : Chain) {
43
Assuming '__begin' is equal to '__end'
921 LoadTy = cast<LoadInst>(V)->getType();
922 if (LoadTy->isIntOrIntVectorTy())
923 break;
924
925 if (LoadTy->isPtrOrPtrVectorTy()) {
926 LoadTy = Type::getIntNTy(F.getParent()->getContext(),
927 DL.getTypeSizeInBits(LoadTy));
928 break;
929 }
930 }
931
932 unsigned Sz = DL.getTypeSizeInBits(LoadTy);
44
1st function call argument is an uninitialized value
933 unsigned AS = L0->getPointerAddressSpace();
934 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
935 unsigned VF = VecRegSize / Sz;
936 unsigned ChainSize = Chain.size();
937 unsigned Alignment = getAlignment(L0);
938
939 if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
940 InstructionsProcessed->insert(Chain.begin(), Chain.end());
941 return false;
942 }
943
944 ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
945 if (NewChain.empty()) {
946 // No vectorization possible.
947 InstructionsProcessed->insert(Chain.begin(), Chain.end());
948 return false;
949 }
950 if (NewChain.size() == 1) {
951 // Failed after the first instruction. Discard it and try the smaller chain.
952 InstructionsProcessed->insert(NewChain.front());
953 return false;
954 }
955
956 // Update Chain to the valid vectorizable subchain.
957 Chain = NewChain;
958 ChainSize = Chain.size();
959
960 // Check if it's legal to vectorize this chain. If not, split the chain and
961 // try again.
962 unsigned EltSzInBytes = Sz / 8;
963 unsigned SzInBytes = EltSzInBytes * ChainSize;
964 if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
965 auto Chains = splitOddVectorElts(Chain, Sz);
966 return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
967 vectorizeLoadChain(Chains.second, InstructionsProcessed);
968 }
969
970 VectorType *VecTy;
971 VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
972 if (VecLoadTy)
973 VecTy = VectorType::get(LoadTy->getScalarType(),
974 Chain.size() * VecLoadTy->getNumElements());
975 else
976 VecTy = VectorType::get(LoadTy, Chain.size());
977
978 // If it's more than the max vector size or the target has a better
979 // vector factor, break it into two pieces.
980 unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
981 if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
982 DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
983 " Creating two separate arrays.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
;
984 return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
985 vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
986 }
987
988 // We won't try again to vectorize the elements of the chain, regardless of
989 // whether we succeed below.
990 InstructionsProcessed->insert(Chain.begin(), Chain.end());
991
992 // If the load is going to be misaligned, don't vectorize it.
993 if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
994 if (L0->getPointerAddressSpace() != 0)
995 return false;
996
997 unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
998 StackAdjustedAlignment,
999 DL, L0, nullptr, &DT);
1000 if (NewAlign < StackAdjustedAlignment)
1001 return false;
1002
1003 Alignment = NewAlign;
1004 }
1005
1006 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
1007 dbgs() << "LSV: Loads to vectorize:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
1008 for (Instruction *I : Chain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
1009 I->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
1010 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
;
1011
1012 // getVectorizablePrefix already computed getBoundaryInstrs. The value of
1013 // Last may have changed since then, but the value of First won't have. If it
1014 // matters, we could compute getBoundaryInstrs only once and reuse it here.
1015 BasicBlock::iterator First, Last;
1016 std::tie(First, Last) = getBoundaryInstrs(Chain);
1017 Builder.SetInsertPoint(&*First);
1018
1019 Value *Bitcast =
1020 Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
1021 // This cast is safe because Builder.CreateLoad always creates a bona fide
1022 // LoadInst.
1023 LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
1024 propagateMetadata(LI, Chain);
1025 LI->setAlignment(Alignment);
1026
1027 if (VecLoadTy) {
1028 SmallVector<Instruction *, 16> InstrsToErase;
1029
1030 unsigned VecWidth = VecLoadTy->getNumElements();
1031 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1032 for (auto Use : Chain[I]->users()) {
1033 // All users of vector loads are ExtractElement instructions with
1034 // constant indices, otherwise we would have bailed before now.
1035 Instruction *UI = cast<Instruction>(Use);
1036 unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1037 unsigned NewIdx = Idx + I * VecWidth;
1038 Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1039 UI->getName());
1040 if (V->getType() != UI->getType())
1041 V = Builder.CreateBitCast(V, UI->getType());
1042
1043 // Replace the old instruction.
1044 UI->replaceAllUsesWith(V);
1045 InstrsToErase.push_back(UI);
1046 }
1047 }
1048
1049 // Bitcast might not be an Instruction, if the value being loaded is a
1050 // constant. In that case, no need to reorder anything.
1051 if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1052 reorder(BitcastInst);
1053
1054 for (auto I : InstrsToErase)
1055 I->eraseFromParent();
1056 } else {
1057 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1058 Value *CV = Chain[I];
1059 Value *V =
1060 Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1061 if (V->getType() != CV->getType()) {
1062 V = Builder.CreateBitOrPointerCast(V, CV->getType());
1063 }
1064
1065 // Replace the old instruction.
1066 CV->replaceAllUsesWith(V);
1067 }
1068
1069 if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1070 reorder(BitcastInst);
1071 }
1072
1073 eraseInstructions(Chain);
1074
1075 ++NumVectorInstructions;
1076 NumScalarsVectorized += Chain.size();
1077 return true;
1078}
1079
1080bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1081 unsigned Alignment) {
1082 if (Alignment % SzInBytes == 0)
1083 return false;
1084
1085 bool Fast = false;
1086 bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1087 SzInBytes * 8, AddressSpace,
1088 Alignment, &Fast);
1089 DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allowsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Target said misaligned is allowed? "
<< Allows << " and fast? " << Fast <<
"\n";; } } while (false)
1090 << " and fast? " << Fast << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Target said misaligned is allowed? "
<< Allows << " and fast? " << Fast <<
"\n";; } } while (false)
;
1091 return !Allows || !Fast;
1092}