Bug Summary

File:lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Warning:line 899, column 17
1st function call argument is an uninitialized value

Annotated Source Code

1//===----- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//===----------------------------------------------------------------------===//
11
12#include "llvm/ADT/MapVector.h"
13#include "llvm/ADT/PostOrderIterator.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/OrderedBasicBlock.h"
19#include "llvm/Analysis/ScalarEvolution.h"
20#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/DataLayout.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instructions.h"
28#include "llvm/IR/Module.h"
29#include "llvm/IR/Type.h"
30#include "llvm/IR/Value.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include "llvm/Transforms/Vectorize.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE"load-store-vectorizer" "load-store-vectorizer"
40STATISTIC(NumVectorInstructions, "Number of vector accesses generated")static llvm::Statistic NumVectorInstructions = {"load-store-vectorizer"
, "NumVectorInstructions", "Number of vector accesses generated"
, {0}, false}
;
41STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized")static llvm::Statistic NumScalarsVectorized = {"load-store-vectorizer"
, "NumScalarsVectorized", "Number of scalar accesses vectorized"
, {0}, false}
;
42
43namespace {
44
45// FIXME: Assuming stack alignment of 4 is always good enough
46static const unsigned StackAdjustedAlignment = 4;
47typedef SmallVector<Instruction *, 8> InstrList;
48typedef MapVector<Value *, InstrList> InstrListMap;
49
50class Vectorizer {
51 Function &F;
52 AliasAnalysis &AA;
53 DominatorTree &DT;
54 ScalarEvolution &SE;
55 TargetTransformInfo &TTI;
56 const DataLayout &DL;
57 IRBuilder<> Builder;
58
59public:
60 Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
61 ScalarEvolution &SE, TargetTransformInfo &TTI)
62 : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
63 DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
64
65 bool run();
66
67private:
68 Value *getPointerOperand(Value *I);
69
70 unsigned getPointerAddressSpace(Value *I);
71
72 unsigned getAlignment(LoadInst *LI) const {
73 unsigned Align = LI->getAlignment();
74 if (Align != 0)
75 return Align;
76
77 return DL.getABITypeAlignment(LI->getType());
78 }
79
80 unsigned getAlignment(StoreInst *SI) const {
81 unsigned Align = SI->getAlignment();
82 if (Align != 0)
83 return Align;
84
85 return DL.getABITypeAlignment(SI->getValueOperand()->getType());
86 }
87
88 bool isConsecutiveAccess(Value *A, Value *B);
89
90 /// After vectorization, reorder the instructions that I depends on
91 /// (the instructions defining its operands), to ensure they dominate I.
92 void reorder(Instruction *I);
93
94 /// Returns the first and the last instructions in Chain.
95 std::pair<BasicBlock::iterator, BasicBlock::iterator>
96 getBoundaryInstrs(ArrayRef<Instruction *> Chain);
97
98 /// Erases the original instructions after vectorizing.
99 void eraseInstructions(ArrayRef<Instruction *> Chain);
100
101 /// "Legalize" the vector type that would be produced by combining \p
102 /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
103 /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
104 /// expected to have more than 4 elements.
105 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
106 splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
107
108 /// Finds the largest prefix of Chain that's vectorizable, checking for
109 /// intervening instructions which may affect the memory accessed by the
110 /// instructions within Chain.
111 ///
112 /// The elements of \p Chain must be all loads or all stores and must be in
113 /// address order.
114 ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
115
116 /// Collects load and store instructions to vectorize.
117 std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
118
119 /// Processes the collected instructions, the \p Map. The values of \p Map
120 /// should be all loads or all stores.
121 bool vectorizeChains(InstrListMap &Map);
122
123 /// Finds the load/stores to consecutive memory addresses and vectorizes them.
124 bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
125
126 /// Vectorizes the load instructions in Chain.
127 bool
128 vectorizeLoadChain(ArrayRef<Instruction *> Chain,
129 SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
130
131 /// Vectorizes the store instructions in Chain.
132 bool
133 vectorizeStoreChain(ArrayRef<Instruction *> Chain,
134 SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
135
136 /// Check if this load/store access is misaligned accesses.
137 bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
138 unsigned Alignment);
139};
140
141class LoadStoreVectorizer : public FunctionPass {
142public:
143 static char ID;
144
145 LoadStoreVectorizer() : FunctionPass(ID) {
146 initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
147 }
148
149 bool runOnFunction(Function &F) override;
150
151 StringRef getPassName() const override {
152 return "GPU Load and Store Vectorizer";
153 }
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.addRequired<AAResultsWrapperPass>();
157 AU.addRequired<ScalarEvolutionWrapperPass>();
158 AU.addRequired<DominatorTreeWrapperPass>();
159 AU.addRequired<TargetTransformInfoWrapperPass>();
160 AU.setPreservesCFG();
161 }
162};
163}
164
165INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,static void *initializeLoadStoreVectorizerPassOnce(PassRegistry
&Registry) {
166 "Vectorize load and Store instructions", false, false)static void *initializeLoadStoreVectorizerPassOnce(PassRegistry
&Registry) {
167INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)initializeSCEVAAWrapperPassPass(Registry);
168INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
169INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
170INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry);
171INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
172INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Vectorize load and store instructions"
, "load-store-vectorizer", &LoadStoreVectorizer::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoadStoreVectorizer>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoadStoreVectorizerPassFlag; void llvm
::initializeLoadStoreVectorizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeLoadStoreVectorizerPassFlag, initializeLoadStoreVectorizerPassOnce
, std::ref(Registry)); }
173 "Vectorize load and store instructions", false, false)PassInfo *PI = new PassInfo( "Vectorize load and store instructions"
, "load-store-vectorizer", &LoadStoreVectorizer::ID, PassInfo
::NormalCtor_t(callDefaultCtor<LoadStoreVectorizer>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeLoadStoreVectorizerPassFlag; void llvm
::initializeLoadStoreVectorizerPass(PassRegistry &Registry
) { llvm::call_once(InitializeLoadStoreVectorizerPassFlag, initializeLoadStoreVectorizerPassOnce
, std::ref(Registry)); }
174
175char LoadStoreVectorizer::ID = 0;
176
177Pass *llvm::createLoadStoreVectorizerPass() {
178 return new LoadStoreVectorizer();
179}
180
181// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
182// vectors of Instructions.
183static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
184 SmallVector<Value *, 8> VL(IL.begin(), IL.end());
185 propagateMetadata(I, VL);
186}
187
188bool LoadStoreVectorizer::runOnFunction(Function &F) {
189 // Don't vectorize when the attribute NoImplicitFloat is used.
190 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
191 return false;
192
193 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
194 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
195 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
196 TargetTransformInfo &TTI =
197 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
198
199 Vectorizer V(F, AA, DT, SE, TTI);
200 return V.run();
201}
202
203// Vectorizer Implementation
204bool Vectorizer::run() {
205 bool Changed = false;
206
207 // Scan the blocks in the function in post order.
208 for (BasicBlock *BB : post_order(&F)) {
209 InstrListMap LoadRefs, StoreRefs;
210 std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
211 Changed |= vectorizeChains(LoadRefs);
212 Changed |= vectorizeChains(StoreRefs);
213 }
214
215 return Changed;
216}
217
218Value *Vectorizer::getPointerOperand(Value *I) {
219 if (LoadInst *LI = dyn_cast<LoadInst>(I))
220 return LI->getPointerOperand();
221 if (StoreInst *SI = dyn_cast<StoreInst>(I))
222 return SI->getPointerOperand();
223 return nullptr;
224}
225
226unsigned Vectorizer::getPointerAddressSpace(Value *I) {
227 if (LoadInst *L = dyn_cast<LoadInst>(I))
228 return L->getPointerAddressSpace();
229 if (StoreInst *S = dyn_cast<StoreInst>(I))
230 return S->getPointerAddressSpace();
231 return -1;
232}
233
234// FIXME: Merge with llvm::isConsecutiveAccess
235bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
236 Value *PtrA = getPointerOperand(A);
237 Value *PtrB = getPointerOperand(B);
238 unsigned ASA = getPointerAddressSpace(A);
239 unsigned ASB = getPointerAddressSpace(B);
240
241 // Check that the address spaces match and that the pointers are valid.
242 if (!PtrA || !PtrB || (ASA != ASB))
243 return false;
244
245 // Make sure that A and B are different pointers of the same size type.
246 unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
247 Type *PtrATy = PtrA->getType()->getPointerElementType();
248 Type *PtrBTy = PtrB->getType()->getPointerElementType();
249 if (PtrA == PtrB ||
250 DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
251 DL.getTypeStoreSize(PtrATy->getScalarType()) !=
252 DL.getTypeStoreSize(PtrBTy->getScalarType()))
253 return false;
254
255 APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
256
257 APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
258 PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
259 PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
260
261 APInt OffsetDelta = OffsetB - OffsetA;
262
263 // Check if they are based on the same pointer. That makes the offsets
264 // sufficient.
265 if (PtrA == PtrB)
266 return OffsetDelta == Size;
267
268 // Compute the necessary base pointer delta to have the necessary final delta
269 // equal to the size.
270 APInt BaseDelta = Size - OffsetDelta;
271
272 // Compute the distance with SCEV between the base pointers.
273 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
274 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
275 const SCEV *C = SE.getConstant(BaseDelta);
276 const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
277 if (X == PtrSCEVB)
278 return true;
279
280 // Sometimes even this doesn't work, because SCEV can't always see through
281 // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
282 // things the hard way.
283
284 // Look through GEPs after checking they're the same except for the last
285 // index.
286 GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(getPointerOperand(A));
287 GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(getPointerOperand(B));
288 if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
289 return false;
290 unsigned FinalIndex = GEPA->getNumOperands() - 1;
291 for (unsigned i = 0; i < FinalIndex; i++)
292 if (GEPA->getOperand(i) != GEPB->getOperand(i))
293 return false;
294
295 Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
296 Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
297 if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
298 OpA->getType() != OpB->getType())
299 return false;
300
301 // Only look through a ZExt/SExt.
302 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
303 return false;
304
305 bool Signed = isa<SExtInst>(OpA);
306
307 OpA = dyn_cast<Instruction>(OpA->getOperand(0));
308 OpB = dyn_cast<Instruction>(OpB->getOperand(0));
309 if (!OpA || !OpB || OpA->getType() != OpB->getType())
310 return false;
311
312 // Now we need to prove that adding 1 to OpA won't overflow.
313 bool Safe = false;
314 // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
315 // we're okay.
316 if (OpB->getOpcode() == Instruction::Add &&
317 isa<ConstantInt>(OpB->getOperand(1)) &&
318 cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
319 if (Signed)
320 Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
321 else
322 Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
323 }
324
325 unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
326
327 // Second attempt:
328 // If any bits are known to be zero other than the sign bit in OpA, we can
329 // add 1 to it while guaranteeing no overflow of any sort.
330 if (!Safe) {
331 APInt KnownZero(BitWidth, 0);
332 APInt KnownOne(BitWidth, 0);
333 computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT);
334 KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1);
335 if (KnownZero != 0)
336 Safe = true;
337 }
338
339 if (!Safe)
340 return false;
341
342 const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
343 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
344 const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
345 const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
346 return X2 == OffsetSCEVB;
347}
348
349void Vectorizer::reorder(Instruction *I) {
350 OrderedBasicBlock OBB(I->getParent());
351 SmallPtrSet<Instruction *, 16> InstructionsToMove;
352 SmallVector<Instruction *, 16> Worklist;
353
354 Worklist.push_back(I);
355 while (!Worklist.empty()) {
356 Instruction *IW = Worklist.pop_back_val();
357 int NumOperands = IW->getNumOperands();
358 for (int i = 0; i < NumOperands; i++) {
359 Instruction *IM = dyn_cast<Instruction>(IW->getOperand(i));
360 if (!IM || IM->getOpcode() == Instruction::PHI)
361 continue;
362
363 // If IM is in another BB, no need to move it, because this pass only
364 // vectorizes instructions within one BB.
365 if (IM->getParent() != I->getParent())
366 continue;
367
368 if (!OBB.dominates(IM, I)) {
369 InstructionsToMove.insert(IM);
370 Worklist.push_back(IM);
371 }
372 }
373 }
374
375 // All instructions to move should follow I. Start from I, not from begin().
376 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
377 ++BBI) {
378 if (!InstructionsToMove.count(&*BBI))
379 continue;
380 Instruction *IM = &*BBI;
381 --BBI;
382 IM->removeFromParent();
383 IM->insertBefore(I);
384 }
385}
386
387std::pair<BasicBlock::iterator, BasicBlock::iterator>
388Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
389 Instruction *C0 = Chain[0];
390 BasicBlock::iterator FirstInstr = C0->getIterator();
391 BasicBlock::iterator LastInstr = C0->getIterator();
392
393 BasicBlock *BB = C0->getParent();
394 unsigned NumFound = 0;
395 for (Instruction &I : *BB) {
396 if (!is_contained(Chain, &I))
397 continue;
398
399 ++NumFound;
400 if (NumFound == 1) {
401 FirstInstr = I.getIterator();
402 }
403 if (NumFound == Chain.size()) {
404 LastInstr = I.getIterator();
405 break;
406 }
407 }
408
409 // Range is [first, last).
410 return std::make_pair(FirstInstr, ++LastInstr);
411}
412
413void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
414 SmallVector<Instruction *, 16> Instrs;
415 for (Instruction *I : Chain) {
416 Value *PtrOperand = getPointerOperand(I);
417 assert(PtrOperand && "Instruction must have a pointer operand.")((PtrOperand && "Instruction must have a pointer operand."
) ? static_cast<void> (0) : __assert_fail ("PtrOperand && \"Instruction must have a pointer operand.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 417, __PRETTY_FUNCTION__))
;
418 Instrs.push_back(I);
419 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
420 Instrs.push_back(GEP);
421 }
422
423 // Erase instructions.
424 for (Instruction *I : Instrs)
425 if (I->use_empty())
426 I->eraseFromParent();
427}
428
429std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
430Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
431 unsigned ElementSizeBits) {
432 unsigned ElementSizeBytes = ElementSizeBits / 8;
433 unsigned SizeBytes = ElementSizeBytes * Chain.size();
434 unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
435 if (NumLeft == Chain.size()) {
436 if ((NumLeft & 1) == 0)
437 NumLeft /= 2; // Split even in half
438 else
439 --NumLeft; // Split off last element
440 } else if (NumLeft == 0)
441 NumLeft = 1;
442 return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
443}
444
445ArrayRef<Instruction *>
446Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
447 // These are in BB order, unlike Chain, which is in address order.
448 SmallVector<Instruction *, 16> MemoryInstrs;
449 SmallVector<Instruction *, 16> ChainInstrs;
450
451 bool IsLoadChain = isa<LoadInst>(Chain[0]);
452 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
453 for (Instruction *I : Chain) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
454 if (IsLoadChain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
455 assert(isa<LoadInst>(I) &&do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
456 "All elements of Chain must be loads, or all must be stores.");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
457 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
458 assert(isa<StoreInst>(I) &&do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
459 "All elements of Chain must be loads, or all must be stores.");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
460 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
461 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { for (Instruction *I : Chain) {
if (IsLoadChain) ((isa<LoadInst>(I) && "All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<LoadInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 456, __PRETTY_FUNCTION__)); else ((isa<StoreInst>(I) &&
"All elements of Chain must be loads, or all must be stores."
) ? static_cast<void> (0) : __assert_fail ("isa<StoreInst>(I) && \"All elements of Chain must be loads, or all must be stores.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 459, __PRETTY_FUNCTION__)); } }; } } while (false)
;
462
463 for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
464 if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
465 if (!is_contained(Chain, &I))
466 MemoryInstrs.push_back(&I);
467 else
468 ChainInstrs.push_back(&I);
469 } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
470 DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-write/throw operation: "
<< I << '\n'; } } while (false)
;
471 break;
472 } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
473 DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-read/write/throw operation: "
<< I << '\n'; } } while (false)
474 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Found may-read/write/throw operation: "
<< I << '\n'; } } while (false)
;
475 break;
476 }
477 }
478
479 OrderedBasicBlock OBB(Chain[0]->getParent());
480
481 // Loop until we find an instruction in ChainInstrs that we can't vectorize.
482 unsigned ChainInstrIdx = 0;
483 Instruction *BarrierMemoryInstr = nullptr;
484
485 for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
486 Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
487
488 // If a barrier memory instruction was found, chain instructions that follow
489 // will not be added to the valid prefix.
490 if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
491 break;
492
493 // Check (in BB order) if any instruction prevents ChainInstr from being
494 // vectorized. Find and store the first such "conflicting" instruction.
495 for (Instruction *MemInstr : MemoryInstrs) {
496 // If a barrier memory instruction was found, do not check past it.
497 if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
498 break;
499
500 if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
501 continue;
502
503 // We can ignore the alias as long as the load comes before the store,
504 // because that means we won't be moving the load past the store to
505 // vectorize it (the vectorized load is inserted at the location of the
506 // first load in the chain).
507 if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
508 OBB.dominates(ChainInstr, MemInstr))
509 continue;
510
511 // Same case, but in reverse.
512 if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
513 OBB.dominates(MemInstr, ChainInstr))
514 continue;
515
516 if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
517 MemoryLocation::get(ChainInstr))) {
518 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
519 dbgs() << "LSV: Found alias:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
520 " Aliasing instruction and pointer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
521 << " " << *MemInstr << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
522 << " " << *getPointerOperand(MemInstr) << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
523 << " Aliased instruction and pointer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
524 << " " << *ChainInstr << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
525 << " " << *getPointerOperand(ChainInstr) << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
526 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n" << " " <<
*MemInstr << '\n' << " " << *getPointerOperand
(MemInstr) << '\n' << " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n' << " "
<< *getPointerOperand(ChainInstr) << '\n'; }; } }
while (false)
;
527 // Save this aliasing memory instruction as a barrier, but allow other
528 // instructions that precede the barrier to be vectorized with this one.
529 BarrierMemoryInstr = MemInstr;
530 break;
531 }
532 }
533 // Continue the search only for store chains, since vectorizing stores that
534 // precede an aliasing load is valid. Conversely, vectorizing loads is valid
535 // up to an aliasing store, but should not pull loads from further down in
536 // the basic block.
537 if (IsLoadChain && BarrierMemoryInstr) {
538 // The BarrierMemoryInstr is a store that precedes ChainInstr.
539 assert(OBB.dominates(BarrierMemoryInstr, ChainInstr))((OBB.dominates(BarrierMemoryInstr, ChainInstr)) ? static_cast
<void> (0) : __assert_fail ("OBB.dominates(BarrierMemoryInstr, ChainInstr)"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp"
, 539, __PRETTY_FUNCTION__))
;
540 break;
541 }
542 }
543
544 // Find the largest prefix of Chain whose elements are all in
545 // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
546 // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
547 // order.)
548 SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
549 ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
550 unsigned ChainIdx = 0;
551 for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
552 if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
553 break;
554 }
555 return Chain.slice(0, ChainIdx);
556}
557
558std::pair<InstrListMap, InstrListMap>
559Vectorizer::collectInstructions(BasicBlock *BB) {
560 InstrListMap LoadRefs;
561 InstrListMap StoreRefs;
562
563 for (Instruction &I : *BB) {
564 if (!I.mayReadOrWriteMemory())
565 continue;
566
567 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
568 if (!LI->isSimple())
569 continue;
570
571 // Skip if it's not legal.
572 if (!TTI.isLegalToVectorizeLoad(LI))
573 continue;
574
575 Type *Ty = LI->getType();
576 if (!VectorType::isValidElementType(Ty->getScalarType()))
577 continue;
578
579 // Skip weird non-byte sizes. They probably aren't worth the effort of
580 // handling correctly.
581 unsigned TySize = DL.getTypeSizeInBits(Ty);
582 if (TySize < 8)
583 continue;
584
585 Value *Ptr = LI->getPointerOperand();
586 unsigned AS = Ptr->getType()->getPointerAddressSpace();
587 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
588
589 // No point in looking at these if they're too big to vectorize.
590 if (TySize > VecRegSize / 2)
591 continue;
592
593 // Make sure all the users of a vector are constant-index extracts.
594 if (isa<VectorType>(Ty) && !all_of(LI->users(), [](const User *U) {
595 const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
596 return EEI && isa<ConstantInt>(EEI->getOperand(1));
597 }))
598 continue;
599
600 // Save the load locations.
601 Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
602 LoadRefs[ObjPtr].push_back(LI);
603
604 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
605 if (!SI->isSimple())
606 continue;
607
608 // Skip if it's not legal.
609 if (!TTI.isLegalToVectorizeStore(SI))
610 continue;
611
612 Type *Ty = SI->getValueOperand()->getType();
613 if (!VectorType::isValidElementType(Ty->getScalarType()))
614 continue;
615
616 // Skip weird non-byte sizes. They probably aren't worth the effort of
617 // handling correctly.
618 unsigned TySize = DL.getTypeSizeInBits(Ty);
619 if (TySize < 8)
620 continue;
621
622 Value *Ptr = SI->getPointerOperand();
623 unsigned AS = Ptr->getType()->getPointerAddressSpace();
624 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
625 if (TySize > VecRegSize / 2)
626 continue;
627
628 if (isa<VectorType>(Ty) && !all_of(SI->users(), [](const User *U) {
629 const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
630 return EEI && isa<ConstantInt>(EEI->getOperand(1));
631 }))
632 continue;
633
634 // Save store location.
635 Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
636 StoreRefs[ObjPtr].push_back(SI);
637 }
638 }
639
640 return {LoadRefs, StoreRefs};
641}
642
643bool Vectorizer::vectorizeChains(InstrListMap &Map) {
644 bool Changed = false;
645
646 for (const std::pair<Value *, InstrList> &Chain : Map) {
647 unsigned Size = Chain.second.size();
648 if (Size < 2)
649 continue;
650
651 DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Analyzing a chain of length "
<< Size << ".\n"; } } while (false)
;
652
653 // Process the stores in chunks of 64.
654 for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
655 unsigned Len = std::min<unsigned>(CE - CI, 64);
656 ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
657 Changed |= vectorizeInstructions(Chunk);
658 }
659 }
660
661 return Changed;
662}
663
664bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
665 DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Vectorizing "
<< Instrs.size() << " instructions.\n"; } } while
(false)
;
666 SmallVector<int, 16> Heads, Tails;
667 int ConsecutiveChain[64];
668
669 // Do a quadratic search on all of the given stores and find all of the pairs
670 // of stores that follow each other.
671 for (int i = 0, e = Instrs.size(); i < e; ++i) {
672 ConsecutiveChain[i] = -1;
673 for (int j = e - 1; j >= 0; --j) {
674 if (i == j)
675 continue;
676
677 if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
678 if (ConsecutiveChain[i] != -1) {
679 int CurDistance = std::abs(ConsecutiveChain[i] - i);
680 int NewDistance = std::abs(ConsecutiveChain[i] - j);
681 if (j < i || NewDistance > CurDistance)
682 continue; // Should not insert.
683 }
684
685 Tails.push_back(j);
686 Heads.push_back(i);
687 ConsecutiveChain[i] = j;
688 }
689 }
690 }
691
692 bool Changed = false;
693 SmallPtrSet<Instruction *, 16> InstructionsProcessed;
694
695 for (int Head : Heads) {
696 if (InstructionsProcessed.count(Instrs[Head]))
697 continue;
698 bool LongerChainExists = false;
699 for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
700 if (Head == Tails[TIt] &&
701 !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
702 LongerChainExists = true;
703 break;
704 }
705 if (LongerChainExists)
706 continue;
707
708 // We found an instr that starts a chain. Now follow the chain and try to
709 // vectorize it.
710 SmallVector<Instruction *, 16> Operands;
711 int I = Head;
712 while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
713 if (InstructionsProcessed.count(Instrs[I]))
714 break;
715
716 Operands.push_back(Instrs[I]);
717 I = ConsecutiveChain[I];
718 }
719
720 bool Vectorized = false;
721 if (isa<LoadInst>(*Operands.begin()))
722 Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
723 else
724 Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
725
726 Changed |= Vectorized;
727 }
728
729 return Changed;
730}
731
732bool Vectorizer::vectorizeStoreChain(
733 ArrayRef<Instruction *> Chain,
734 SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
735 StoreInst *S0 = cast<StoreInst>(Chain[0]);
736
737 // If the vector has an int element, default to int for the whole load.
738 Type *StoreTy;
739 for (Instruction *I : Chain) {
740 StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
741 if (StoreTy->isIntOrIntVectorTy())
742 break;
743
744 if (StoreTy->isPtrOrPtrVectorTy()) {
745 StoreTy = Type::getIntNTy(F.getParent()->getContext(),
746 DL.getTypeSizeInBits(StoreTy));
747 break;
748 }
749 }
750
751 unsigned Sz = DL.getTypeSizeInBits(StoreTy);
752 unsigned AS = S0->getPointerAddressSpace();
753 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
754 unsigned VF = VecRegSize / Sz;
755 unsigned ChainSize = Chain.size();
756 unsigned Alignment = getAlignment(S0);
757
758 if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
759 InstructionsProcessed->insert(Chain.begin(), Chain.end());
760 return false;
761 }
762
763 ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
764 if (NewChain.empty()) {
765 // No vectorization possible.
766 InstructionsProcessed->insert(Chain.begin(), Chain.end());
767 return false;
768 }
769 if (NewChain.size() == 1) {
770 // Failed after the first instruction. Discard it and try the smaller chain.
771 InstructionsProcessed->insert(NewChain.front());
772 return false;
773 }
774
775 // Update Chain to the valid vectorizable subchain.
776 Chain = NewChain;
777 ChainSize = Chain.size();
778
779 // Check if it's legal to vectorize this chain. If not, split the chain and
780 // try again.
781 unsigned EltSzInBytes = Sz / 8;
782 unsigned SzInBytes = EltSzInBytes * ChainSize;
783 if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
784 auto Chains = splitOddVectorElts(Chain, Sz);
785 return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
786 vectorizeStoreChain(Chains.second, InstructionsProcessed);
787 }
788
789 VectorType *VecTy;
790 VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
791 if (VecStoreTy)
792 VecTy = VectorType::get(StoreTy->getScalarType(),
793 Chain.size() * VecStoreTy->getNumElements());
794 else
795 VecTy = VectorType::get(StoreTy, Chain.size());
796
797 // If it's more than the max vector size or the target has a better
798 // vector factor, break it into two pieces.
799 unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
800 if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
801 DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
802 " Creating two separate arrays.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
;
803 return vectorizeStoreChain(Chain.slice(0, TargetVF),
804 InstructionsProcessed) |
805 vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
806 }
807
808 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
809 dbgs() << "LSV: Stores to vectorize:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
810 for (Instruction *I : Chain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
811 dbgs() << " " << *I << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
812 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Stores to vectorize:\n"
; for (Instruction *I : Chain) dbgs() << " " << *
I << "\n"; }; } } while (false)
;
813
814 // We won't try again to vectorize the elements of the chain, regardless of
815 // whether we succeed below.
816 InstructionsProcessed->insert(Chain.begin(), Chain.end());
817
818 // If the store is going to be misaligned, don't vectorize it.
819 if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
820 if (S0->getPointerAddressSpace() != 0)
821 return false;
822
823 unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
824 StackAdjustedAlignment,
825 DL, S0, nullptr, &DT);
826 if (NewAlign < StackAdjustedAlignment)
827 return false;
828 }
829
830 BasicBlock::iterator First, Last;
831 std::tie(First, Last) = getBoundaryInstrs(Chain);
832 Builder.SetInsertPoint(&*Last);
833
834 Value *Vec = UndefValue::get(VecTy);
835
836 if (VecStoreTy) {
837 unsigned VecWidth = VecStoreTy->getNumElements();
838 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
839 StoreInst *Store = cast<StoreInst>(Chain[I]);
840 for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
841 unsigned NewIdx = J + I * VecWidth;
842 Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
843 Builder.getInt32(J));
844 if (Extract->getType() != StoreTy->getScalarType())
845 Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
846
847 Value *Insert =
848 Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
849 Vec = Insert;
850 }
851 }
852 } else {
853 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
854 StoreInst *Store = cast<StoreInst>(Chain[I]);
855 Value *Extract = Store->getValueOperand();
856 if (Extract->getType() != StoreTy->getScalarType())
857 Extract =
858 Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
859
860 Value *Insert =
861 Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
862 Vec = Insert;
863 }
864 }
865
866 // This cast is safe because Builder.CreateStore() always creates a bona fide
867 // StoreInst.
868 StoreInst *SI = cast<StoreInst>(
869 Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
870 VecTy->getPointerTo(AS))));
871 propagateMetadata(SI, Chain);
872 SI->setAlignment(Alignment);
873
874 eraseInstructions(Chain);
875 ++NumVectorInstructions;
876 NumScalarsVectorized += Chain.size();
877 return true;
878}
879
880bool Vectorizer::vectorizeLoadChain(
881 ArrayRef<Instruction *> Chain,
882 SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
883 LoadInst *L0 = cast<LoadInst>(Chain[0]);
884
885 // If the vector has an int element, default to int for the whole load.
886 Type *LoadTy;
1
'LoadTy' declared without an initial value
887 for (const auto &V : Chain) {
2
Assuming '__begin' is equal to '__end'
888 LoadTy = cast<LoadInst>(V)->getType();
889 if (LoadTy->isIntOrIntVectorTy())
890 break;
891
892 if (LoadTy->isPtrOrPtrVectorTy()) {
893 LoadTy = Type::getIntNTy(F.getParent()->getContext(),
894 DL.getTypeSizeInBits(LoadTy));
895 break;
896 }
897 }
898
899 unsigned Sz = DL.getTypeSizeInBits(LoadTy);
3
1st function call argument is an uninitialized value
900 unsigned AS = L0->getPointerAddressSpace();
901 unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
902 unsigned VF = VecRegSize / Sz;
903 unsigned ChainSize = Chain.size();
904 unsigned Alignment = getAlignment(L0);
905
906 if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
907 InstructionsProcessed->insert(Chain.begin(), Chain.end());
908 return false;
909 }
910
911 ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
912 if (NewChain.empty()) {
913 // No vectorization possible.
914 InstructionsProcessed->insert(Chain.begin(), Chain.end());
915 return false;
916 }
917 if (NewChain.size() == 1) {
918 // Failed after the first instruction. Discard it and try the smaller chain.
919 InstructionsProcessed->insert(NewChain.front());
920 return false;
921 }
922
923 // Update Chain to the valid vectorizable subchain.
924 Chain = NewChain;
925 ChainSize = Chain.size();
926
927 // Check if it's legal to vectorize this chain. If not, split the chain and
928 // try again.
929 unsigned EltSzInBytes = Sz / 8;
930 unsigned SzInBytes = EltSzInBytes * ChainSize;
931 if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
932 auto Chains = splitOddVectorElts(Chain, Sz);
933 return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
934 vectorizeLoadChain(Chains.second, InstructionsProcessed);
935 }
936
937 VectorType *VecTy;
938 VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
939 if (VecLoadTy)
940 VecTy = VectorType::get(LoadTy->getScalarType(),
941 Chain.size() * VecLoadTy->getNumElements());
942 else
943 VecTy = VectorType::get(LoadTy, Chain.size());
944
945 // If it's more than the max vector size or the target has a better
946 // vector factor, break it into two pieces.
947 unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
948 if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
949 DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
950 " Creating two separate arrays.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n"; } } while (false)
;
951 return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
952 vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
953 }
954
955 // We won't try again to vectorize the elements of the chain, regardless of
956 // whether we succeed below.
957 InstructionsProcessed->insert(Chain.begin(), Chain.end());
958
959 // If the load is going to be misaligned, don't vectorize it.
960 if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
961 if (L0->getPointerAddressSpace() != 0)
962 return false;
963
964 unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
965 StackAdjustedAlignment,
966 DL, L0, nullptr, &DT);
967 if (NewAlign < StackAdjustedAlignment)
968 return false;
969
970 Alignment = NewAlign;
971 }
972
973 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
974 dbgs() << "LSV: Loads to vectorize:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
975 for (Instruction *I : Chain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
976 I->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
977 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { { dbgs() << "LSV: Loads to vectorize:\n"
; for (Instruction *I : Chain) I->dump(); }; } } while (false
)
;
978
979 // getVectorizablePrefix already computed getBoundaryInstrs. The value of
980 // Last may have changed since then, but the value of First won't have. If it
981 // matters, we could compute getBoundaryInstrs only once and reuse it here.
982 BasicBlock::iterator First, Last;
983 std::tie(First, Last) = getBoundaryInstrs(Chain);
984 Builder.SetInsertPoint(&*First);
985
986 Value *Bitcast =
987 Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
988 // This cast is safe because Builder.CreateLoad always creates a bona fide
989 // LoadInst.
990 LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
991 propagateMetadata(LI, Chain);
992 LI->setAlignment(Alignment);
993
994 if (VecLoadTy) {
995 SmallVector<Instruction *, 16> InstrsToErase;
996
997 unsigned VecWidth = VecLoadTy->getNumElements();
998 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
999 for (auto Use : Chain[I]->users()) {
1000 // All users of vector loads are ExtractElement instructions with
1001 // constant indices, otherwise we would have bailed before now.
1002 Instruction *UI = cast<Instruction>(Use);
1003 unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1004 unsigned NewIdx = Idx + I * VecWidth;
1005 Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1006 UI->getName());
1007 if (V->getType() != UI->getType())
1008 V = Builder.CreateBitCast(V, UI->getType());
1009
1010 // Replace the old instruction.
1011 UI->replaceAllUsesWith(V);
1012 InstrsToErase.push_back(UI);
1013 }
1014 }
1015
1016 // Bitcast might not be an Instruction, if the value being loaded is a
1017 // constant. In that case, no need to reorder anything.
1018 if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1019 reorder(BitcastInst);
1020
1021 for (auto I : InstrsToErase)
1022 I->eraseFromParent();
1023 } else {
1024 for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1025 Value *CV = Chain[I];
1026 Value *V =
1027 Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1028 if (V->getType() != CV->getType()) {
1029 V = Builder.CreateBitOrPointerCast(V, CV->getType());
1030 }
1031
1032 // Replace the old instruction.
1033 CV->replaceAllUsesWith(V);
1034 }
1035
1036 if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1037 reorder(BitcastInst);
1038 }
1039
1040 eraseInstructions(Chain);
1041
1042 ++NumVectorInstructions;
1043 NumScalarsVectorized += Chain.size();
1044 return true;
1045}
1046
1047bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1048 unsigned Alignment) {
1049 if (Alignment % SzInBytes == 0)
1050 return false;
1051
1052 bool Fast = false;
1053 bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1054 SzInBytes * 8, AddressSpace,
1055 Alignment, &Fast);
1056 DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allowsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Target said misaligned is allowed? "
<< Allows << " and fast? " << Fast <<
"\n";; } } while (false)
1057 << " and fast? " << Fast << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("load-store-vectorizer")) { dbgs() << "LSV: Target said misaligned is allowed? "
<< Allows << " and fast? " << Fast <<
"\n";; } } while (false)
;
1058 return !Allows || !Fast;
1059}