LLVM 20.0.0git
ScalarizeMaskedMemIntrin.cpp
Go to the documentation of this file.
1//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// intrinsics
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/Twine.h"
21#include "llvm/IR/BasicBlock.h"
22#include "llvm/IR/Constant.h"
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/Value.h"
34#include "llvm/Pass.h"
38#include <cassert>
39#include <optional>
40
41using namespace llvm;
42
43#define DEBUG_TYPE "scalarize-masked-mem-intrin"
44
45namespace {
46
47class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
48public:
49 static char ID; // Pass identification, replacement for typeid
50
51 explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
54 }
55
56 bool runOnFunction(Function &F) override;
57
58 StringRef getPassName() const override {
59 return "Scalarize Masked Memory Intrinsics";
60 }
61
62 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 }
66};
67
68} // end anonymous namespace
69
70static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
71 const TargetTransformInfo &TTI, const DataLayout &DL,
72 DomTreeUpdater *DTU);
73static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
75 const DataLayout &DL, DomTreeUpdater *DTU);
76
77char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
78
79INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
80 "Scalarize unsupported masked memory intrinsics", false,
81 false)
84INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
85 "Scalarize unsupported masked memory intrinsics", false,
86 false)
87
89 return new ScalarizeMaskedMemIntrinLegacyPass();
90}
91
92static bool isConstantIntVector(Value *Mask) {
93 Constant *C = dyn_cast<Constant>(Mask);
94 if (!C)
95 return false;
96
97 unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
98 for (unsigned i = 0; i != NumElts; ++i) {
99 Constant *CElt = C->getAggregateElement(i);
100 if (!CElt || !isa<ConstantInt>(CElt))
101 return false;
102 }
103
104 return true;
105}
106
107static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
108 unsigned Idx) {
109 return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
110}
111
112// Translate a masked load intrinsic like
113// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
114// <16 x i1> %mask, <16 x i32> %passthru)
115// to a chain of basic blocks, with loading element one-by-one if
116// the appropriate mask bit is set
117//
118// %1 = bitcast i8* %addr to i32*
119// %2 = extractelement <16 x i1> %mask, i32 0
120// br i1 %2, label %cond.load, label %else
121//
122// cond.load: ; preds = %0
123// %3 = getelementptr i32* %1, i32 0
124// %4 = load i32* %3
125// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
126// br label %else
127//
128// else: ; preds = %0, %cond.load
129// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
130// %6 = extractelement <16 x i1> %mask, i32 1
131// br i1 %6, label %cond.load1, label %else2
132//
133// cond.load1: ; preds = %else
134// %7 = getelementptr i32* %1, i32 1
135// %8 = load i32* %7
136// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
137// br label %else2
138//
139// else2: ; preds = %else, %cond.load1
140// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
141// %10 = extractelement <16 x i1> %mask, i32 2
142// br i1 %10, label %cond.load4, label %else5
143//
145 DomTreeUpdater *DTU, bool &ModifiedDT) {
146 Value *Ptr = CI->getArgOperand(0);
147 Value *Alignment = CI->getArgOperand(1);
148 Value *Mask = CI->getArgOperand(2);
149 Value *Src0 = CI->getArgOperand(3);
150
151 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
152 VectorType *VecType = cast<FixedVectorType>(CI->getType());
153
154 Type *EltTy = VecType->getElementType();
155
156 IRBuilder<> Builder(CI->getContext());
157 Instruction *InsertPt = CI;
158 BasicBlock *IfBlock = CI->getParent();
159
160 Builder.SetInsertPoint(InsertPt);
162
163 // Short-cut if the mask is all-true.
164 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
165 LoadInst *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
166 NewI->copyMetadata(*CI);
167 NewI->takeName(CI);
168 CI->replaceAllUsesWith(NewI);
169 CI->eraseFromParent();
170 return;
171 }
172
173 // Adjust alignment for the scalar instruction.
174 const Align AdjustedAlignVal =
175 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
176 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
177
178 // The result vector
179 Value *VResult = Src0;
180
181 if (isConstantIntVector(Mask)) {
182 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
183 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
184 continue;
185 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
186 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
187 VResult = Builder.CreateInsertElement(VResult, Load, Idx);
188 }
189 CI->replaceAllUsesWith(VResult);
190 CI->eraseFromParent();
191 return;
192 }
193
194 // Optimize the case where the "masked load" is a predicated load - that is,
195 // where the mask is the splat of a non-constant scalar boolean. In that case,
196 // use that splated value as the guard on a conditional vector load.
197 if (isSplatValue(Mask, /*Index=*/0)) {
198 Value *Predicate = Builder.CreateExtractElement(Mask, uint64_t(0ull),
199 Mask->getName() + ".first");
200 Instruction *ThenTerm =
201 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
202 /*BranchWeights=*/nullptr, DTU);
203
204 BasicBlock *CondBlock = ThenTerm->getParent();
205 CondBlock->setName("cond.load");
206 Builder.SetInsertPoint(CondBlock->getTerminator());
207 LoadInst *Load = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal,
208 CI->getName() + ".cond.load");
209 Load->copyMetadata(*CI);
210
211 BasicBlock *PostLoad = ThenTerm->getSuccessor(0);
212 Builder.SetInsertPoint(PostLoad, PostLoad->begin());
213 PHINode *Phi = Builder.CreatePHI(VecType, /*NumReservedValues=*/2);
214 Phi->addIncoming(Load, CondBlock);
215 Phi->addIncoming(Src0, IfBlock);
216 Phi->takeName(CI);
217
218 CI->replaceAllUsesWith(Phi);
219 CI->eraseFromParent();
220 ModifiedDT = true;
221 return;
222 }
223 // If the mask is not v1i1, use scalar bit test operations. This generates
224 // better results on X86 at least.
225 // Note: this produces worse code on AMDGPU, where the "i1" is implicitly SIMD
226 // - what's a good way to detect this?
227 Value *SclrMask;
228 if (VectorWidth != 1) {
229 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
230 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
231 }
232
233 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
234 // Fill the "else" block, created in the previous iteration
235 //
236 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
237 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
238 // %cond = icmp ne i16 %mask_1, 0
239 // br i1 %mask_1, label %cond.load, label %else
240 //
241 Value *Predicate;
242 if (VectorWidth != 1) {
243 Value *Mask = Builder.getInt(APInt::getOneBitSet(
244 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
245 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
246 Builder.getIntN(VectorWidth, 0));
247 } else {
248 Predicate = Builder.CreateExtractElement(Mask, Idx);
249 }
250
251 // Create "cond" block
252 //
253 // %EltAddr = getelementptr i32* %1, i32 0
254 // %Elt = load i32* %EltAddr
255 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
256 //
257 Instruction *ThenTerm =
258 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
259 /*BranchWeights=*/nullptr, DTU);
260
261 BasicBlock *CondBlock = ThenTerm->getParent();
262 CondBlock->setName("cond.load");
263
264 Builder.SetInsertPoint(CondBlock->getTerminator());
265 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
266 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
267 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
268
269 // Create "else" block, fill it in the next iteration
270 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
271 NewIfBlock->setName("else");
272 BasicBlock *PrevIfBlock = IfBlock;
273 IfBlock = NewIfBlock;
274
275 // Create the phi to join the new and previous value.
276 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
277 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
278 Phi->addIncoming(NewVResult, CondBlock);
279 Phi->addIncoming(VResult, PrevIfBlock);
280 VResult = Phi;
281 }
282
283 CI->replaceAllUsesWith(VResult);
284 CI->eraseFromParent();
285
286 ModifiedDT = true;
287}
288
289// Translate a masked store intrinsic, like
290// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
291// <16 x i1> %mask)
292// to a chain of basic blocks, that stores element one-by-one if
293// the appropriate mask bit is set
294//
295// %1 = bitcast i8* %addr to i32*
296// %2 = extractelement <16 x i1> %mask, i32 0
297// br i1 %2, label %cond.store, label %else
298//
299// cond.store: ; preds = %0
300// %3 = extractelement <16 x i32> %val, i32 0
301// %4 = getelementptr i32* %1, i32 0
302// store i32 %3, i32* %4
303// br label %else
304//
305// else: ; preds = %0, %cond.store
306// %5 = extractelement <16 x i1> %mask, i32 1
307// br i1 %5, label %cond.store1, label %else2
308//
309// cond.store1: ; preds = %else
310// %6 = extractelement <16 x i32> %val, i32 1
311// %7 = getelementptr i32* %1, i32 1
312// store i32 %6, i32* %7
313// br label %else2
314// . . .
316 DomTreeUpdater *DTU, bool &ModifiedDT) {
317 Value *Src = CI->getArgOperand(0);
318 Value *Ptr = CI->getArgOperand(1);
319 Value *Alignment = CI->getArgOperand(2);
320 Value *Mask = CI->getArgOperand(3);
321
322 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
323 auto *VecType = cast<VectorType>(Src->getType());
324
325 Type *EltTy = VecType->getElementType();
326
327 IRBuilder<> Builder(CI->getContext());
328 Instruction *InsertPt = CI;
329 Builder.SetInsertPoint(InsertPt);
331
332 // Short-cut if the mask is all-true.
333 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
334 StoreInst *Store = Builder.CreateAlignedStore(Src, Ptr, AlignVal);
335 Store->takeName(CI);
336 Store->copyMetadata(*CI);
337 CI->eraseFromParent();
338 return;
339 }
340
341 // Adjust alignment for the scalar instruction.
342 const Align AdjustedAlignVal =
343 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
344 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
345
346 if (isConstantIntVector(Mask)) {
347 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
348 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
349 continue;
350 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
351 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
352 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
353 }
354 CI->eraseFromParent();
355 return;
356 }
357
358 // Optimize the case where the "masked store" is a predicated store - that is,
359 // when the mask is the splat of a non-constant scalar boolean. In that case,
360 // optimize to a conditional store.
361 if (isSplatValue(Mask, /*Index=*/0)) {
362 Value *Predicate = Builder.CreateExtractElement(Mask, uint64_t(0ull),
363 Mask->getName() + ".first");
364 Instruction *ThenTerm =
365 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
366 /*BranchWeights=*/nullptr, DTU);
367 BasicBlock *CondBlock = ThenTerm->getParent();
368 CondBlock->setName("cond.store");
369 Builder.SetInsertPoint(CondBlock->getTerminator());
370
371 StoreInst *Store = Builder.CreateAlignedStore(Src, Ptr, AlignVal);
372 Store->takeName(CI);
373 Store->copyMetadata(*CI);
374
375 CI->eraseFromParent();
376 ModifiedDT = true;
377 return;
378 }
379
380 // If the mask is not v1i1, use scalar bit test operations. This generates
381 // better results on X86 at least.
382
383 Value *SclrMask;
384 if (VectorWidth != 1) {
385 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
386 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
387 }
388
389 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
390 // Fill the "else" block, created in the previous iteration
391 //
392 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
393 // %cond = icmp ne i16 %mask_1, 0
394 // br i1 %mask_1, label %cond.store, label %else
395 //
396 Value *Predicate;
397 if (VectorWidth != 1) {
398 Value *Mask = Builder.getInt(APInt::getOneBitSet(
399 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
400 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
401 Builder.getIntN(VectorWidth, 0));
402 } else {
403 Predicate = Builder.CreateExtractElement(Mask, Idx);
404 }
405
406 // Create "cond" block
407 //
408 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
409 // %EltAddr = getelementptr i32* %1, i32 0
410 // %store i32 %OneElt, i32* %EltAddr
411 //
412 Instruction *ThenTerm =
413 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
414 /*BranchWeights=*/nullptr, DTU);
415
416 BasicBlock *CondBlock = ThenTerm->getParent();
417 CondBlock->setName("cond.store");
418
419 Builder.SetInsertPoint(CondBlock->getTerminator());
420 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
421 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
422 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
423
424 // Create "else" block, fill it in the next iteration
425 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
426 NewIfBlock->setName("else");
427
428 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
429 }
430 CI->eraseFromParent();
431
432 ModifiedDT = true;
433}
434
435// Translate a masked gather intrinsic like
436// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
437// <16 x i1> %Mask, <16 x i32> %Src)
438// to a chain of basic blocks, with loading element one-by-one if
439// the appropriate mask bit is set
440//
441// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
442// %Mask0 = extractelement <16 x i1> %Mask, i32 0
443// br i1 %Mask0, label %cond.load, label %else
444//
445// cond.load:
446// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
447// %Load0 = load i32, i32* %Ptr0, align 4
448// %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
449// br label %else
450//
451// else:
452// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
453// %Mask1 = extractelement <16 x i1> %Mask, i32 1
454// br i1 %Mask1, label %cond.load1, label %else2
455//
456// cond.load1:
457// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
458// %Load1 = load i32, i32* %Ptr1, align 4
459// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
460// br label %else2
461// . . .
462// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
463// ret <16 x i32> %Result
465 DomTreeUpdater *DTU, bool &ModifiedDT) {
466 Value *Ptrs = CI->getArgOperand(0);
467 Value *Alignment = CI->getArgOperand(1);
468 Value *Mask = CI->getArgOperand(2);
469 Value *Src0 = CI->getArgOperand(3);
470
471 auto *VecType = cast<FixedVectorType>(CI->getType());
472 Type *EltTy = VecType->getElementType();
473
474 IRBuilder<> Builder(CI->getContext());
475 Instruction *InsertPt = CI;
476 BasicBlock *IfBlock = CI->getParent();
477 Builder.SetInsertPoint(InsertPt);
478 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
479
481
482 // The result vector
483 Value *VResult = Src0;
484 unsigned VectorWidth = VecType->getNumElements();
485
486 // Shorten the way if the mask is a vector of constants.
487 if (isConstantIntVector(Mask)) {
488 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
489 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
490 continue;
491 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
492 LoadInst *Load =
493 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
494 VResult =
495 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
496 }
497 CI->replaceAllUsesWith(VResult);
498 CI->eraseFromParent();
499 return;
500 }
501
502 // If the mask is not v1i1, use scalar bit test operations. This generates
503 // better results on X86 at least.
504 Value *SclrMask;
505 if (VectorWidth != 1) {
506 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
507 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
508 }
509
510 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
511 // Fill the "else" block, created in the previous iteration
512 //
513 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
514 // %cond = icmp ne i16 %mask_1, 0
515 // br i1 %Mask1, label %cond.load, label %else
516 //
517
518 Value *Predicate;
519 if (VectorWidth != 1) {
520 Value *Mask = Builder.getInt(APInt::getOneBitSet(
521 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
522 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
523 Builder.getIntN(VectorWidth, 0));
524 } else {
525 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
526 }
527
528 // Create "cond" block
529 //
530 // %EltAddr = getelementptr i32* %1, i32 0
531 // %Elt = load i32* %EltAddr
532 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
533 //
534 Instruction *ThenTerm =
535 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
536 /*BranchWeights=*/nullptr, DTU);
537
538 BasicBlock *CondBlock = ThenTerm->getParent();
539 CondBlock->setName("cond.load");
540
541 Builder.SetInsertPoint(CondBlock->getTerminator());
542 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
543 LoadInst *Load =
544 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
545 Value *NewVResult =
546 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
547
548 // Create "else" block, fill it in the next iteration
549 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
550 NewIfBlock->setName("else");
551 BasicBlock *PrevIfBlock = IfBlock;
552 IfBlock = NewIfBlock;
553
554 // Create the phi to join the new and previous value.
555 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
556 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
557 Phi->addIncoming(NewVResult, CondBlock);
558 Phi->addIncoming(VResult, PrevIfBlock);
559 VResult = Phi;
560 }
561
562 CI->replaceAllUsesWith(VResult);
563 CI->eraseFromParent();
564
565 ModifiedDT = true;
566}
567
568// Translate a masked scatter intrinsic, like
569// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
570// <16 x i1> %Mask)
571// to a chain of basic blocks, that stores element one-by-one if
572// the appropriate mask bit is set.
573//
574// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
575// %Mask0 = extractelement <16 x i1> %Mask, i32 0
576// br i1 %Mask0, label %cond.store, label %else
577//
578// cond.store:
579// %Elt0 = extractelement <16 x i32> %Src, i32 0
580// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
581// store i32 %Elt0, i32* %Ptr0, align 4
582// br label %else
583//
584// else:
585// %Mask1 = extractelement <16 x i1> %Mask, i32 1
586// br i1 %Mask1, label %cond.store1, label %else2
587//
588// cond.store1:
589// %Elt1 = extractelement <16 x i32> %Src, i32 1
590// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
591// store i32 %Elt1, i32* %Ptr1, align 4
592// br label %else2
593// . . .
595 DomTreeUpdater *DTU, bool &ModifiedDT) {
596 Value *Src = CI->getArgOperand(0);
597 Value *Ptrs = CI->getArgOperand(1);
598 Value *Alignment = CI->getArgOperand(2);
599 Value *Mask = CI->getArgOperand(3);
600
601 auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
602
603 assert(
604 isa<VectorType>(Ptrs->getType()) &&
605 isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
606 "Vector of pointers is expected in masked scatter intrinsic");
607
608 IRBuilder<> Builder(CI->getContext());
609 Instruction *InsertPt = CI;
610 Builder.SetInsertPoint(InsertPt);
612
613 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
614 unsigned VectorWidth = SrcFVTy->getNumElements();
615
616 // Shorten the way if the mask is a vector of constants.
617 if (isConstantIntVector(Mask)) {
618 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
619 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
620 continue;
621 Value *OneElt =
622 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
623 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
624 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
625 }
626 CI->eraseFromParent();
627 return;
628 }
629
630 // If the mask is not v1i1, use scalar bit test operations. This generates
631 // better results on X86 at least.
632 Value *SclrMask;
633 if (VectorWidth != 1) {
634 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
635 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
636 }
637
638 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
639 // Fill the "else" block, created in the previous iteration
640 //
641 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
642 // %cond = icmp ne i16 %mask_1, 0
643 // br i1 %Mask1, label %cond.store, label %else
644 //
645 Value *Predicate;
646 if (VectorWidth != 1) {
647 Value *Mask = Builder.getInt(APInt::getOneBitSet(
648 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
649 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
650 Builder.getIntN(VectorWidth, 0));
651 } else {
652 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
653 }
654
655 // Create "cond" block
656 //
657 // %Elt1 = extractelement <16 x i32> %Src, i32 1
658 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
659 // %store i32 %Elt1, i32* %Ptr1
660 //
661 Instruction *ThenTerm =
662 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
663 /*BranchWeights=*/nullptr, DTU);
664
665 BasicBlock *CondBlock = ThenTerm->getParent();
666 CondBlock->setName("cond.store");
667
668 Builder.SetInsertPoint(CondBlock->getTerminator());
669 Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
670 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
671 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
672
673 // Create "else" block, fill it in the next iteration
674 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
675 NewIfBlock->setName("else");
676
677 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
678 }
679 CI->eraseFromParent();
680
681 ModifiedDT = true;
682}
683
685 DomTreeUpdater *DTU, bool &ModifiedDT) {
686 Value *Ptr = CI->getArgOperand(0);
687 Value *Mask = CI->getArgOperand(1);
688 Value *PassThru = CI->getArgOperand(2);
689 Align Alignment = CI->getParamAlign(0).valueOrOne();
690
691 auto *VecType = cast<FixedVectorType>(CI->getType());
692
693 Type *EltTy = VecType->getElementType();
694
695 IRBuilder<> Builder(CI->getContext());
696 Instruction *InsertPt = CI;
697 BasicBlock *IfBlock = CI->getParent();
698
699 Builder.SetInsertPoint(InsertPt);
701
702 unsigned VectorWidth = VecType->getNumElements();
703
704 // The result vector
705 Value *VResult = PassThru;
706
707 // Adjust alignment for the scalar instruction.
708 const Align AdjustedAlignment =
709 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
710
711 // Shorten the way if the mask is a vector of constants.
712 // Create a build_vector pattern, with loads/poisons as necessary and then
713 // shuffle blend with the pass through value.
714 if (isConstantIntVector(Mask)) {
715 unsigned MemIndex = 0;
716 VResult = PoisonValue::get(VecType);
717 SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
718 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
719 Value *InsertElt;
720 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
721 InsertElt = PoisonValue::get(EltTy);
722 ShuffleMask[Idx] = Idx + VectorWidth;
723 } else {
724 Value *NewPtr =
725 Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
726 InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
727 "Load" + Twine(Idx));
728 ShuffleMask[Idx] = Idx;
729 ++MemIndex;
730 }
731 VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
732 "Res" + Twine(Idx));
733 }
734 VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
735 CI->replaceAllUsesWith(VResult);
736 CI->eraseFromParent();
737 return;
738 }
739
740 // If the mask is not v1i1, use scalar bit test operations. This generates
741 // better results on X86 at least.
742 Value *SclrMask;
743 if (VectorWidth != 1) {
744 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
745 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
746 }
747
748 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
749 // Fill the "else" block, created in the previous iteration
750 //
751 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
752 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
753 // br i1 %mask_1, label %cond.load, label %else
754 //
755
756 Value *Predicate;
757 if (VectorWidth != 1) {
758 Value *Mask = Builder.getInt(APInt::getOneBitSet(
759 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
760 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
761 Builder.getIntN(VectorWidth, 0));
762 } else {
763 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
764 }
765
766 // Create "cond" block
767 //
768 // %EltAddr = getelementptr i32* %1, i32 0
769 // %Elt = load i32* %EltAddr
770 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
771 //
772 Instruction *ThenTerm =
773 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
774 /*BranchWeights=*/nullptr, DTU);
775
776 BasicBlock *CondBlock = ThenTerm->getParent();
777 CondBlock->setName("cond.load");
778
779 Builder.SetInsertPoint(CondBlock->getTerminator());
780 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
781 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
782
783 // Move the pointer if there are more blocks to come.
784 Value *NewPtr;
785 if ((Idx + 1) != VectorWidth)
786 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
787
788 // Create "else" block, fill it in the next iteration
789 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
790 NewIfBlock->setName("else");
791 BasicBlock *PrevIfBlock = IfBlock;
792 IfBlock = NewIfBlock;
793
794 // Create the phi to join the new and previous value.
795 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
796 PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
797 ResultPhi->addIncoming(NewVResult, CondBlock);
798 ResultPhi->addIncoming(VResult, PrevIfBlock);
799 VResult = ResultPhi;
800
801 // Add a PHI for the pointer if this isn't the last iteration.
802 if ((Idx + 1) != VectorWidth) {
803 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
804 PtrPhi->addIncoming(NewPtr, CondBlock);
805 PtrPhi->addIncoming(Ptr, PrevIfBlock);
806 Ptr = PtrPhi;
807 }
808 }
809
810 CI->replaceAllUsesWith(VResult);
811 CI->eraseFromParent();
812
813 ModifiedDT = true;
814}
815
817 DomTreeUpdater *DTU,
818 bool &ModifiedDT) {
819 Value *Src = CI->getArgOperand(0);
820 Value *Ptr = CI->getArgOperand(1);
821 Value *Mask = CI->getArgOperand(2);
822 Align Alignment = CI->getParamAlign(1).valueOrOne();
823
824 auto *VecType = cast<FixedVectorType>(Src->getType());
825
826 IRBuilder<> Builder(CI->getContext());
827 Instruction *InsertPt = CI;
828 BasicBlock *IfBlock = CI->getParent();
829
830 Builder.SetInsertPoint(InsertPt);
832
833 Type *EltTy = VecType->getElementType();
834
835 // Adjust alignment for the scalar instruction.
836 const Align AdjustedAlignment =
837 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
838
839 unsigned VectorWidth = VecType->getNumElements();
840
841 // Shorten the way if the mask is a vector of constants.
842 if (isConstantIntVector(Mask)) {
843 unsigned MemIndex = 0;
844 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
845 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
846 continue;
847 Value *OneElt =
848 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
849 Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
850 Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
851 ++MemIndex;
852 }
853 CI->eraseFromParent();
854 return;
855 }
856
857 // If the mask is not v1i1, use scalar bit test operations. This generates
858 // better results on X86 at least.
859 Value *SclrMask;
860 if (VectorWidth != 1) {
861 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
862 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
863 }
864
865 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
866 // Fill the "else" block, created in the previous iteration
867 //
868 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
869 // br i1 %mask_1, label %cond.store, label %else
870 //
871 Value *Predicate;
872 if (VectorWidth != 1) {
873 Value *Mask = Builder.getInt(APInt::getOneBitSet(
874 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
875 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
876 Builder.getIntN(VectorWidth, 0));
877 } else {
878 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
879 }
880
881 // Create "cond" block
882 //
883 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
884 // %EltAddr = getelementptr i32* %1, i32 0
885 // %store i32 %OneElt, i32* %EltAddr
886 //
887 Instruction *ThenTerm =
888 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
889 /*BranchWeights=*/nullptr, DTU);
890
891 BasicBlock *CondBlock = ThenTerm->getParent();
892 CondBlock->setName("cond.store");
893
894 Builder.SetInsertPoint(CondBlock->getTerminator());
895 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
896 Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
897
898 // Move the pointer if there are more blocks to come.
899 Value *NewPtr;
900 if ((Idx + 1) != VectorWidth)
901 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
902
903 // Create "else" block, fill it in the next iteration
904 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
905 NewIfBlock->setName("else");
906 BasicBlock *PrevIfBlock = IfBlock;
907 IfBlock = NewIfBlock;
908
909 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
910
911 // Add a PHI for the pointer if this isn't the last iteration.
912 if ((Idx + 1) != VectorWidth) {
913 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
914 PtrPhi->addIncoming(NewPtr, CondBlock);
915 PtrPhi->addIncoming(Ptr, PrevIfBlock);
916 Ptr = PtrPhi;
917 }
918 }
919 CI->eraseFromParent();
920
921 ModifiedDT = true;
922}
923
925 DomTreeUpdater *DTU,
926 bool &ModifiedDT) {
927 // If we extend histogram to return a result someday (like the updated vector)
928 // then we'll need to support it here.
929 assert(CI->getType()->isVoidTy() && "Histogram with non-void return.");
930 Value *Ptrs = CI->getArgOperand(0);
931 Value *Inc = CI->getArgOperand(1);
932 Value *Mask = CI->getArgOperand(2);
933
934 auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
935 Type *EltTy = Inc->getType();
936
937 IRBuilder<> Builder(CI->getContext());
938 Instruction *InsertPt = CI;
939 Builder.SetInsertPoint(InsertPt);
940
942
943 // FIXME: Do we need to add an alignment parameter to the intrinsic?
944 unsigned VectorWidth = AddrType->getNumElements();
945
946 // Shorten the way if the mask is a vector of constants.
947 if (isConstantIntVector(Mask)) {
948 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
949 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
950 continue;
951 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
952 LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
953 Value *Add = Builder.CreateAdd(Load, Inc);
954 Builder.CreateStore(Add, Ptr);
955 }
956 CI->eraseFromParent();
957 return;
958 }
959
960 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
961 Value *Predicate =
962 Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
963
964 Instruction *ThenTerm =
965 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
966 /*BranchWeights=*/nullptr, DTU);
967
968 BasicBlock *CondBlock = ThenTerm->getParent();
969 CondBlock->setName("cond.histogram.update");
970
971 Builder.SetInsertPoint(CondBlock->getTerminator());
972 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
973 LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
974 Value *Add = Builder.CreateAdd(Load, Inc);
975 Builder.CreateStore(Add, Ptr);
976
977 // Create "else" block, fill it in the next iteration
978 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
979 NewIfBlock->setName("else");
980 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
981 }
982
983 CI->eraseFromParent();
984 ModifiedDT = true;
985}
986
988 DominatorTree *DT) {
989 std::optional<DomTreeUpdater> DTU;
990 if (DT)
991 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
992
993 bool EverMadeChange = false;
994 bool MadeChange = true;
995 auto &DL = F.getDataLayout();
996 while (MadeChange) {
997 MadeChange = false;
999 bool ModifiedDTOnIteration = false;
1000 MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
1001 DTU ? &*DTU : nullptr);
1002
1003 // Restart BB iteration if the dominator tree of the Function was changed
1004 if (ModifiedDTOnIteration)
1005 break;
1006 }
1007
1008 EverMadeChange |= MadeChange;
1009 }
1010 return EverMadeChange;
1011}
1012
1013bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
1014 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1015 DominatorTree *DT = nullptr;
1016 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
1017 DT = &DTWP->getDomTree();
1018 return runImpl(F, TTI, DT);
1019}
1020
1023 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1024 auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
1025 if (!runImpl(F, TTI, DT))
1026 return PreservedAnalyses::all();
1030 return PA;
1031}
1032
1033static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
1034 const TargetTransformInfo &TTI, const DataLayout &DL,
1035 DomTreeUpdater *DTU) {
1036 bool MadeChange = false;
1037
1038 BasicBlock::iterator CurInstIterator = BB.begin();
1039 while (CurInstIterator != BB.end()) {
1040 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
1041 MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
1042 if (ModifiedDT)
1043 return true;
1044 }
1045
1046 return MadeChange;
1047}
1048
1049static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
1050 const TargetTransformInfo &TTI,
1051 const DataLayout &DL, DomTreeUpdater *DTU) {
1052 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1053 if (II) {
1054 // The scalarization code below does not work for scalable vectors.
1055 if (isa<ScalableVectorType>(II->getType()) ||
1056 any_of(II->args(),
1057 [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
1058 return false;
1059 switch (II->getIntrinsicID()) {
1060 default:
1061 break;
1062 case Intrinsic::experimental_vector_histogram_add:
1064 CI->getArgOperand(1)->getType()))
1065 return false;
1066 scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
1067 return true;
1068 case Intrinsic::masked_load:
1069 // Scalarize unsupported vector masked load
1071 CI->getType(),
1072 cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
1073 return false;
1074 scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
1075 return true;
1076 case Intrinsic::masked_store:
1078 CI->getArgOperand(0)->getType(),
1079 cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
1080 return false;
1081 scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
1082 return true;
1083 case Intrinsic::masked_gather: {
1084 MaybeAlign MA =
1085 cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
1086 Type *LoadTy = CI->getType();
1087 Align Alignment = DL.getValueOrABITypeAlignment(MA,
1088 LoadTy->getScalarType());
1089 if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
1090 !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
1091 return false;
1092 scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
1093 return true;
1094 }
1095 case Intrinsic::masked_scatter: {
1096 MaybeAlign MA =
1097 cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
1098 Type *StoreTy = CI->getArgOperand(0)->getType();
1099 Align Alignment = DL.getValueOrABITypeAlignment(MA,
1100 StoreTy->getScalarType());
1101 if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
1102 !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
1103 Alignment))
1104 return false;
1105 scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
1106 return true;
1107 }
1108 case Intrinsic::masked_expandload:
1110 CI->getType(),
1112 return false;
1113 scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
1114 return true;
1115 case Intrinsic::masked_compressstore:
1117 CI->getArgOperand(0)->getType(),
1119 return false;
1120 scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
1121 return true;
1122 }
1123 }
1124
1125 return false;
1126}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runImpl(Function &F, const TargetLowering &TLI)
#define F(x, y, z)
Definition: MD5.cpp:55
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, unsigned Idx)
static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool runImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT)
static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool isConstantIntVector(Value *Mask)
#define DEBUG_TYPE
Scalarize unsupported masked memory intrinsics
This pass exposes codegen information to IR-level passes.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
MaybeAlign getAlignment() const
Definition: Attributes.cpp:926
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1838
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2492
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1824
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1906
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2265
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2417
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1807
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1820
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1843
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:499
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:174
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
FunctionPass * createScalarizeMaskedMemIntrinLegacyPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &)
constexpr int PoisonMaskElem
@ Add
Sum of integers.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)