LLVM 19.0.0git
ScalarizeMaskedMemIntrin.cpp
Go to the documentation of this file.
1//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// intrinsics
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/Constant.h"
22#include "llvm/IR/Constants.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
33#include "llvm/Pass.h"
37#include <cassert>
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "scalarize-masked-mem-intrin"
43
44namespace {
45
46class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
47public:
48 static char ID; // Pass identification, replacement for typeid
49
50 explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
53 }
54
55 bool runOnFunction(Function &F) override;
56
57 StringRef getPassName() const override {
58 return "Scalarize Masked Memory Intrinsics";
59 }
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 }
65};
66
67} // end anonymous namespace
68
69static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
70 const TargetTransformInfo &TTI, const DataLayout &DL,
71 DomTreeUpdater *DTU);
72static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
74 const DataLayout &DL, DomTreeUpdater *DTU);
75
76char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
77
78INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
79 "Scalarize unsupported masked memory intrinsics", false,
80 false)
83INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
84 "Scalarize unsupported masked memory intrinsics", false,
85 false)
86
88 return new ScalarizeMaskedMemIntrinLegacyPass();
89}
90
91static bool isConstantIntVector(Value *Mask) {
92 Constant *C = dyn_cast<Constant>(Mask);
93 if (!C)
94 return false;
95
96 unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
97 for (unsigned i = 0; i != NumElts; ++i) {
98 Constant *CElt = C->getAggregateElement(i);
99 if (!CElt || !isa<ConstantInt>(CElt))
100 return false;
101 }
102
103 return true;
104}
105
106static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
107 unsigned Idx) {
108 return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
109}
110
111// Translate a masked load intrinsic like
112// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
113// <16 x i1> %mask, <16 x i32> %passthru)
114// to a chain of basic blocks, with loading element one-by-one if
115// the appropriate mask bit is set
116//
117// %1 = bitcast i8* %addr to i32*
118// %2 = extractelement <16 x i1> %mask, i32 0
119// br i1 %2, label %cond.load, label %else
120//
121// cond.load: ; preds = %0
122// %3 = getelementptr i32* %1, i32 0
123// %4 = load i32* %3
124// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
125// br label %else
126//
127// else: ; preds = %0, %cond.load
128// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
129// %6 = extractelement <16 x i1> %mask, i32 1
130// br i1 %6, label %cond.load1, label %else2
131//
132// cond.load1: ; preds = %else
133// %7 = getelementptr i32* %1, i32 1
134// %8 = load i32* %7
135// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
136// br label %else2
137//
138// else2: ; preds = %else, %cond.load1
139// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
140// %10 = extractelement <16 x i1> %mask, i32 2
141// br i1 %10, label %cond.load4, label %else5
142//
144 DomTreeUpdater *DTU, bool &ModifiedDT) {
145 Value *Ptr = CI->getArgOperand(0);
146 Value *Alignment = CI->getArgOperand(1);
147 Value *Mask = CI->getArgOperand(2);
148 Value *Src0 = CI->getArgOperand(3);
149
150 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
151 VectorType *VecType = cast<FixedVectorType>(CI->getType());
152
153 Type *EltTy = VecType->getElementType();
154
155 IRBuilder<> Builder(CI->getContext());
156 Instruction *InsertPt = CI;
157 BasicBlock *IfBlock = CI->getParent();
158
159 Builder.SetInsertPoint(InsertPt);
161
162 // Short-cut if the mask is all-true.
163 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
164 Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
165 CI->replaceAllUsesWith(NewI);
166 CI->eraseFromParent();
167 return;
168 }
169
170 // Adjust alignment for the scalar instruction.
171 const Align AdjustedAlignVal =
172 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
173 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
174
175 // The result vector
176 Value *VResult = Src0;
177
178 if (isConstantIntVector(Mask)) {
179 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
180 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
181 continue;
182 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
183 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
184 VResult = Builder.CreateInsertElement(VResult, Load, Idx);
185 }
186 CI->replaceAllUsesWith(VResult);
187 CI->eraseFromParent();
188 return;
189 }
190
191 // If the mask is not v1i1, use scalar bit test operations. This generates
192 // better results on X86 at least.
193 Value *SclrMask;
194 if (VectorWidth != 1) {
195 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
196 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
197 }
198
199 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
200 // Fill the "else" block, created in the previous iteration
201 //
202 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
203 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
204 // %cond = icmp ne i16 %mask_1, 0
205 // br i1 %mask_1, label %cond.load, label %else
206 //
207 Value *Predicate;
208 if (VectorWidth != 1) {
209 Value *Mask = Builder.getInt(APInt::getOneBitSet(
210 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
211 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
212 Builder.getIntN(VectorWidth, 0));
213 } else {
214 Predicate = Builder.CreateExtractElement(Mask, Idx);
215 }
216
217 // Create "cond" block
218 //
219 // %EltAddr = getelementptr i32* %1, i32 0
220 // %Elt = load i32* %EltAddr
221 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
222 //
223 Instruction *ThenTerm =
224 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
225 /*BranchWeights=*/nullptr, DTU);
226
227 BasicBlock *CondBlock = ThenTerm->getParent();
228 CondBlock->setName("cond.load");
229
230 Builder.SetInsertPoint(CondBlock->getTerminator());
231 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
232 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
233 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
234
235 // Create "else" block, fill it in the next iteration
236 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
237 NewIfBlock->setName("else");
238 BasicBlock *PrevIfBlock = IfBlock;
239 IfBlock = NewIfBlock;
240
241 // Create the phi to join the new and previous value.
242 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
243 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
244 Phi->addIncoming(NewVResult, CondBlock);
245 Phi->addIncoming(VResult, PrevIfBlock);
246 VResult = Phi;
247 }
248
249 CI->replaceAllUsesWith(VResult);
250 CI->eraseFromParent();
251
252 ModifiedDT = true;
253}
254
255// Translate a masked store intrinsic, like
256// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
257// <16 x i1> %mask)
258// to a chain of basic blocks, that stores element one-by-one if
259// the appropriate mask bit is set
260//
261// %1 = bitcast i8* %addr to i32*
262// %2 = extractelement <16 x i1> %mask, i32 0
263// br i1 %2, label %cond.store, label %else
264//
265// cond.store: ; preds = %0
266// %3 = extractelement <16 x i32> %val, i32 0
267// %4 = getelementptr i32* %1, i32 0
268// store i32 %3, i32* %4
269// br label %else
270//
271// else: ; preds = %0, %cond.store
272// %5 = extractelement <16 x i1> %mask, i32 1
273// br i1 %5, label %cond.store1, label %else2
274//
275// cond.store1: ; preds = %else
276// %6 = extractelement <16 x i32> %val, i32 1
277// %7 = getelementptr i32* %1, i32 1
278// store i32 %6, i32* %7
279// br label %else2
280// . . .
282 DomTreeUpdater *DTU, bool &ModifiedDT) {
283 Value *Src = CI->getArgOperand(0);
284 Value *Ptr = CI->getArgOperand(1);
285 Value *Alignment = CI->getArgOperand(2);
286 Value *Mask = CI->getArgOperand(3);
287
288 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
289 auto *VecType = cast<VectorType>(Src->getType());
290
291 Type *EltTy = VecType->getElementType();
292
293 IRBuilder<> Builder(CI->getContext());
294 Instruction *InsertPt = CI;
295 Builder.SetInsertPoint(InsertPt);
297
298 // Short-cut if the mask is all-true.
299 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
300 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
301 CI->eraseFromParent();
302 return;
303 }
304
305 // Adjust alignment for the scalar instruction.
306 const Align AdjustedAlignVal =
307 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
308 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
309
310 if (isConstantIntVector(Mask)) {
311 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
312 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
313 continue;
314 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
315 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
316 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
317 }
318 CI->eraseFromParent();
319 return;
320 }
321
322 // If the mask is not v1i1, use scalar bit test operations. This generates
323 // better results on X86 at least.
324 Value *SclrMask;
325 if (VectorWidth != 1) {
326 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
327 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
328 }
329
330 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
331 // Fill the "else" block, created in the previous iteration
332 //
333 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
334 // %cond = icmp ne i16 %mask_1, 0
335 // br i1 %mask_1, label %cond.store, label %else
336 //
337 Value *Predicate;
338 if (VectorWidth != 1) {
339 Value *Mask = Builder.getInt(APInt::getOneBitSet(
340 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
341 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
342 Builder.getIntN(VectorWidth, 0));
343 } else {
344 Predicate = Builder.CreateExtractElement(Mask, Idx);
345 }
346
347 // Create "cond" block
348 //
349 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
350 // %EltAddr = getelementptr i32* %1, i32 0
351 // %store i32 %OneElt, i32* %EltAddr
352 //
353 Instruction *ThenTerm =
354 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
355 /*BranchWeights=*/nullptr, DTU);
356
357 BasicBlock *CondBlock = ThenTerm->getParent();
358 CondBlock->setName("cond.store");
359
360 Builder.SetInsertPoint(CondBlock->getTerminator());
361 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
362 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
363 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
364
365 // Create "else" block, fill it in the next iteration
366 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
367 NewIfBlock->setName("else");
368
369 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
370 }
371 CI->eraseFromParent();
372
373 ModifiedDT = true;
374}
375
376// Translate a masked gather intrinsic like
377// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
378// <16 x i1> %Mask, <16 x i32> %Src)
379// to a chain of basic blocks, with loading element one-by-one if
380// the appropriate mask bit is set
381//
382// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
383// %Mask0 = extractelement <16 x i1> %Mask, i32 0
384// br i1 %Mask0, label %cond.load, label %else
385//
386// cond.load:
387// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
388// %Load0 = load i32, i32* %Ptr0, align 4
389// %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
390// br label %else
391//
392// else:
393// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
394// %Mask1 = extractelement <16 x i1> %Mask, i32 1
395// br i1 %Mask1, label %cond.load1, label %else2
396//
397// cond.load1:
398// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
399// %Load1 = load i32, i32* %Ptr1, align 4
400// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
401// br label %else2
402// . . .
403// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
404// ret <16 x i32> %Result
406 DomTreeUpdater *DTU, bool &ModifiedDT) {
407 Value *Ptrs = CI->getArgOperand(0);
408 Value *Alignment = CI->getArgOperand(1);
409 Value *Mask = CI->getArgOperand(2);
410 Value *Src0 = CI->getArgOperand(3);
411
412 auto *VecType = cast<FixedVectorType>(CI->getType());
413 Type *EltTy = VecType->getElementType();
414
415 IRBuilder<> Builder(CI->getContext());
416 Instruction *InsertPt = CI;
417 BasicBlock *IfBlock = CI->getParent();
418 Builder.SetInsertPoint(InsertPt);
419 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
420
422
423 // The result vector
424 Value *VResult = Src0;
425 unsigned VectorWidth = VecType->getNumElements();
426
427 // Shorten the way if the mask is a vector of constants.
428 if (isConstantIntVector(Mask)) {
429 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
430 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
431 continue;
432 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
433 LoadInst *Load =
434 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
435 VResult =
436 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
437 }
438 CI->replaceAllUsesWith(VResult);
439 CI->eraseFromParent();
440 return;
441 }
442
443 // If the mask is not v1i1, use scalar bit test operations. This generates
444 // better results on X86 at least.
445 Value *SclrMask;
446 if (VectorWidth != 1) {
447 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
448 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
449 }
450
451 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
452 // Fill the "else" block, created in the previous iteration
453 //
454 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
455 // %cond = icmp ne i16 %mask_1, 0
456 // br i1 %Mask1, label %cond.load, label %else
457 //
458
459 Value *Predicate;
460 if (VectorWidth != 1) {
461 Value *Mask = Builder.getInt(APInt::getOneBitSet(
462 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
463 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
464 Builder.getIntN(VectorWidth, 0));
465 } else {
466 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
467 }
468
469 // Create "cond" block
470 //
471 // %EltAddr = getelementptr i32* %1, i32 0
472 // %Elt = load i32* %EltAddr
473 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
474 //
475 Instruction *ThenTerm =
476 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
477 /*BranchWeights=*/nullptr, DTU);
478
479 BasicBlock *CondBlock = ThenTerm->getParent();
480 CondBlock->setName("cond.load");
481
482 Builder.SetInsertPoint(CondBlock->getTerminator());
483 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
484 LoadInst *Load =
485 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
486 Value *NewVResult =
487 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
488
489 // Create "else" block, fill it in the next iteration
490 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
491 NewIfBlock->setName("else");
492 BasicBlock *PrevIfBlock = IfBlock;
493 IfBlock = NewIfBlock;
494
495 // Create the phi to join the new and previous value.
496 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
497 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
498 Phi->addIncoming(NewVResult, CondBlock);
499 Phi->addIncoming(VResult, PrevIfBlock);
500 VResult = Phi;
501 }
502
503 CI->replaceAllUsesWith(VResult);
504 CI->eraseFromParent();
505
506 ModifiedDT = true;
507}
508
509// Translate a masked scatter intrinsic, like
510// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
511// <16 x i1> %Mask)
512// to a chain of basic blocks, that stores element one-by-one if
513// the appropriate mask bit is set.
514//
515// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
516// %Mask0 = extractelement <16 x i1> %Mask, i32 0
517// br i1 %Mask0, label %cond.store, label %else
518//
519// cond.store:
520// %Elt0 = extractelement <16 x i32> %Src, i32 0
521// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
522// store i32 %Elt0, i32* %Ptr0, align 4
523// br label %else
524//
525// else:
526// %Mask1 = extractelement <16 x i1> %Mask, i32 1
527// br i1 %Mask1, label %cond.store1, label %else2
528//
529// cond.store1:
530// %Elt1 = extractelement <16 x i32> %Src, i32 1
531// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
532// store i32 %Elt1, i32* %Ptr1, align 4
533// br label %else2
534// . . .
536 DomTreeUpdater *DTU, bool &ModifiedDT) {
537 Value *Src = CI->getArgOperand(0);
538 Value *Ptrs = CI->getArgOperand(1);
539 Value *Alignment = CI->getArgOperand(2);
540 Value *Mask = CI->getArgOperand(3);
541
542 auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
543
544 assert(
545 isa<VectorType>(Ptrs->getType()) &&
546 isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
547 "Vector of pointers is expected in masked scatter intrinsic");
548
549 IRBuilder<> Builder(CI->getContext());
550 Instruction *InsertPt = CI;
551 Builder.SetInsertPoint(InsertPt);
553
554 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
555 unsigned VectorWidth = SrcFVTy->getNumElements();
556
557 // Shorten the way if the mask is a vector of constants.
558 if (isConstantIntVector(Mask)) {
559 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
560 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
561 continue;
562 Value *OneElt =
563 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
564 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
565 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
566 }
567 CI->eraseFromParent();
568 return;
569 }
570
571 // If the mask is not v1i1, use scalar bit test operations. This generates
572 // better results on X86 at least.
573 Value *SclrMask;
574 if (VectorWidth != 1) {
575 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
576 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
577 }
578
579 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
580 // Fill the "else" block, created in the previous iteration
581 //
582 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
583 // %cond = icmp ne i16 %mask_1, 0
584 // br i1 %Mask1, label %cond.store, label %else
585 //
586 Value *Predicate;
587 if (VectorWidth != 1) {
588 Value *Mask = Builder.getInt(APInt::getOneBitSet(
589 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
590 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
591 Builder.getIntN(VectorWidth, 0));
592 } else {
593 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
594 }
595
596 // Create "cond" block
597 //
598 // %Elt1 = extractelement <16 x i32> %Src, i32 1
599 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
600 // %store i32 %Elt1, i32* %Ptr1
601 //
602 Instruction *ThenTerm =
603 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
604 /*BranchWeights=*/nullptr, DTU);
605
606 BasicBlock *CondBlock = ThenTerm->getParent();
607 CondBlock->setName("cond.store");
608
609 Builder.SetInsertPoint(CondBlock->getTerminator());
610 Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
611 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
612 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
613
614 // Create "else" block, fill it in the next iteration
615 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
616 NewIfBlock->setName("else");
617
618 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
619 }
620 CI->eraseFromParent();
621
622 ModifiedDT = true;
623}
624
626 DomTreeUpdater *DTU, bool &ModifiedDT) {
627 Value *Ptr = CI->getArgOperand(0);
628 Value *Mask = CI->getArgOperand(1);
629 Value *PassThru = CI->getArgOperand(2);
630 Align Alignment = CI->getParamAlign(0).valueOrOne();
631
632 auto *VecType = cast<FixedVectorType>(CI->getType());
633
634 Type *EltTy = VecType->getElementType();
635
636 IRBuilder<> Builder(CI->getContext());
637 Instruction *InsertPt = CI;
638 BasicBlock *IfBlock = CI->getParent();
639
640 Builder.SetInsertPoint(InsertPt);
642
643 unsigned VectorWidth = VecType->getNumElements();
644
645 // The result vector
646 Value *VResult = PassThru;
647
648 // Adjust alignment for the scalar instruction.
649 const Align AdjustedAlignment =
650 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
651
652 // Shorten the way if the mask is a vector of constants.
653 // Create a build_vector pattern, with loads/poisons as necessary and then
654 // shuffle blend with the pass through value.
655 if (isConstantIntVector(Mask)) {
656 unsigned MemIndex = 0;
657 VResult = PoisonValue::get(VecType);
658 SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
659 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
660 Value *InsertElt;
661 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
662 InsertElt = PoisonValue::get(EltTy);
663 ShuffleMask[Idx] = Idx + VectorWidth;
664 } else {
665 Value *NewPtr =
666 Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
667 InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
668 "Load" + Twine(Idx));
669 ShuffleMask[Idx] = Idx;
670 ++MemIndex;
671 }
672 VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
673 "Res" + Twine(Idx));
674 }
675 VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
676 CI->replaceAllUsesWith(VResult);
677 CI->eraseFromParent();
678 return;
679 }
680
681 // If the mask is not v1i1, use scalar bit test operations. This generates
682 // better results on X86 at least.
683 Value *SclrMask;
684 if (VectorWidth != 1) {
685 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
686 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
687 }
688
689 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
690 // Fill the "else" block, created in the previous iteration
691 //
692 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
693 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
694 // br i1 %mask_1, label %cond.load, label %else
695 //
696
697 Value *Predicate;
698 if (VectorWidth != 1) {
699 Value *Mask = Builder.getInt(APInt::getOneBitSet(
700 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
701 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
702 Builder.getIntN(VectorWidth, 0));
703 } else {
704 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
705 }
706
707 // Create "cond" block
708 //
709 // %EltAddr = getelementptr i32* %1, i32 0
710 // %Elt = load i32* %EltAddr
711 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
712 //
713 Instruction *ThenTerm =
714 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
715 /*BranchWeights=*/nullptr, DTU);
716
717 BasicBlock *CondBlock = ThenTerm->getParent();
718 CondBlock->setName("cond.load");
719
720 Builder.SetInsertPoint(CondBlock->getTerminator());
721 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
722 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
723
724 // Move the pointer if there are more blocks to come.
725 Value *NewPtr;
726 if ((Idx + 1) != VectorWidth)
727 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
728
729 // Create "else" block, fill it in the next iteration
730 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
731 NewIfBlock->setName("else");
732 BasicBlock *PrevIfBlock = IfBlock;
733 IfBlock = NewIfBlock;
734
735 // Create the phi to join the new and previous value.
736 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
737 PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
738 ResultPhi->addIncoming(NewVResult, CondBlock);
739 ResultPhi->addIncoming(VResult, PrevIfBlock);
740 VResult = ResultPhi;
741
742 // Add a PHI for the pointer if this isn't the last iteration.
743 if ((Idx + 1) != VectorWidth) {
744 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
745 PtrPhi->addIncoming(NewPtr, CondBlock);
746 PtrPhi->addIncoming(Ptr, PrevIfBlock);
747 Ptr = PtrPhi;
748 }
749 }
750
751 CI->replaceAllUsesWith(VResult);
752 CI->eraseFromParent();
753
754 ModifiedDT = true;
755}
756
758 DomTreeUpdater *DTU,
759 bool &ModifiedDT) {
760 Value *Src = CI->getArgOperand(0);
761 Value *Ptr = CI->getArgOperand(1);
762 Value *Mask = CI->getArgOperand(2);
763 Align Alignment = CI->getParamAlign(1).valueOrOne();
764
765 auto *VecType = cast<FixedVectorType>(Src->getType());
766
767 IRBuilder<> Builder(CI->getContext());
768 Instruction *InsertPt = CI;
769 BasicBlock *IfBlock = CI->getParent();
770
771 Builder.SetInsertPoint(InsertPt);
773
774 Type *EltTy = VecType->getElementType();
775
776 // Adjust alignment for the scalar instruction.
777 const Align AdjustedAlignment =
778 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
779
780 unsigned VectorWidth = VecType->getNumElements();
781
782 // Shorten the way if the mask is a vector of constants.
783 if (isConstantIntVector(Mask)) {
784 unsigned MemIndex = 0;
785 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
786 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
787 continue;
788 Value *OneElt =
789 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
790 Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
791 Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
792 ++MemIndex;
793 }
794 CI->eraseFromParent();
795 return;
796 }
797
798 // If the mask is not v1i1, use scalar bit test operations. This generates
799 // better results on X86 at least.
800 Value *SclrMask;
801 if (VectorWidth != 1) {
802 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
803 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
804 }
805
806 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
807 // Fill the "else" block, created in the previous iteration
808 //
809 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
810 // br i1 %mask_1, label %cond.store, label %else
811 //
812 Value *Predicate;
813 if (VectorWidth != 1) {
814 Value *Mask = Builder.getInt(APInt::getOneBitSet(
815 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
816 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
817 Builder.getIntN(VectorWidth, 0));
818 } else {
819 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
820 }
821
822 // Create "cond" block
823 //
824 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
825 // %EltAddr = getelementptr i32* %1, i32 0
826 // %store i32 %OneElt, i32* %EltAddr
827 //
828 Instruction *ThenTerm =
829 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
830 /*BranchWeights=*/nullptr, DTU);
831
832 BasicBlock *CondBlock = ThenTerm->getParent();
833 CondBlock->setName("cond.store");
834
835 Builder.SetInsertPoint(CondBlock->getTerminator());
836 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
837 Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
838
839 // Move the pointer if there are more blocks to come.
840 Value *NewPtr;
841 if ((Idx + 1) != VectorWidth)
842 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
843
844 // Create "else" block, fill it in the next iteration
845 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
846 NewIfBlock->setName("else");
847 BasicBlock *PrevIfBlock = IfBlock;
848 IfBlock = NewIfBlock;
849
850 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
851
852 // Add a PHI for the pointer if this isn't the last iteration.
853 if ((Idx + 1) != VectorWidth) {
854 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
855 PtrPhi->addIncoming(NewPtr, CondBlock);
856 PtrPhi->addIncoming(Ptr, PrevIfBlock);
857 Ptr = PtrPhi;
858 }
859 }
860 CI->eraseFromParent();
861
862 ModifiedDT = true;
863}
864
866 DominatorTree *DT) {
867 std::optional<DomTreeUpdater> DTU;
868 if (DT)
869 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
870
871 bool EverMadeChange = false;
872 bool MadeChange = true;
873 auto &DL = F.getParent()->getDataLayout();
874 while (MadeChange) {
875 MadeChange = false;
877 bool ModifiedDTOnIteration = false;
878 MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
879 DTU ? &*DTU : nullptr);
880
881 // Restart BB iteration if the dominator tree of the Function was changed
882 if (ModifiedDTOnIteration)
883 break;
884 }
885
886 EverMadeChange |= MadeChange;
887 }
888 return EverMadeChange;
889}
890
891bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
892 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
893 DominatorTree *DT = nullptr;
894 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
895 DT = &DTWP->getDomTree();
896 return runImpl(F, TTI, DT);
897}
898
901 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
903 if (!runImpl(F, TTI, DT))
904 return PreservedAnalyses::all();
908 return PA;
909}
910
911static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
912 const TargetTransformInfo &TTI, const DataLayout &DL,
913 DomTreeUpdater *DTU) {
914 bool MadeChange = false;
915
916 BasicBlock::iterator CurInstIterator = BB.begin();
917 while (CurInstIterator != BB.end()) {
918 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
919 MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
920 if (ModifiedDT)
921 return true;
922 }
923
924 return MadeChange;
925}
926
927static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
929 const DataLayout &DL, DomTreeUpdater *DTU) {
930 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
931 if (II) {
932 // The scalarization code below does not work for scalable vectors.
933 if (isa<ScalableVectorType>(II->getType()) ||
934 any_of(II->args(),
935 [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
936 return false;
937
938 switch (II->getIntrinsicID()) {
939 default:
940 break;
941 case Intrinsic::masked_load:
942 // Scalarize unsupported vector masked load
944 CI->getType(),
945 cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
946 return false;
947 scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
948 return true;
949 case Intrinsic::masked_store:
951 CI->getArgOperand(0)->getType(),
952 cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
953 return false;
954 scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
955 return true;
956 case Intrinsic::masked_gather: {
957 MaybeAlign MA =
958 cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
959 Type *LoadTy = CI->getType();
960 Align Alignment = DL.getValueOrABITypeAlignment(MA,
961 LoadTy->getScalarType());
962 if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
963 !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
964 return false;
965 scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
966 return true;
967 }
968 case Intrinsic::masked_scatter: {
969 MaybeAlign MA =
970 cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
971 Type *StoreTy = CI->getArgOperand(0)->getType();
972 Align Alignment = DL.getValueOrABITypeAlignment(MA,
973 StoreTy->getScalarType());
974 if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
975 !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
976 Alignment))
977 return false;
978 scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
979 return true;
980 }
981 case Intrinsic::masked_expandload:
983 CI->getType(),
985 return false;
986 scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
987 return true;
988 case Intrinsic::masked_compressstore:
990 CI->getArgOperand(0)->getType(),
992 return false;
993 scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
994 return true;
995 }
996 }
997
998 return false;
999}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runImpl(Function &F, const TargetLowering &TLI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, unsigned Idx)
static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool runImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT)
static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool isConstantIntVector(Value *Mask)
#define DEBUG_TYPE
Scalarize unsupported masked memory intrinsics
This pass exposes codegen information to IR-level passes.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:519
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
MaybeAlign getAlignment() const
Definition: Attributes.cpp:857
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:2065
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1639
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:533
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1806
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1890
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2228
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:491
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:496
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
An instruction for reading from memory.
Definition: Instructions.h:184
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
FunctionPass * createScalarizeMaskedMemIntrinLegacyPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &)
constexpr int PoisonMaskElem
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)