LLVM 20.0.0git
DXILIntrinsicExpansion.cpp
Go to the documentation of this file.
1//===- DXILIntrinsicExpansion.cpp - Prepare LLVM Module for DXIL encoding--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file This file contains DXIL intrinsic expansions for those that don't have
10// opcodes in DirectX Intermediate Language (DXIL).
11//===----------------------------------------------------------------------===//
12
14#include "DirectX.h"
15#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/Passes.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/InstrTypes.h"
20#include "llvm/IR/Instruction.h"
22#include "llvm/IR/Intrinsics.h"
23#include "llvm/IR/IntrinsicsDirectX.h"
24#include "llvm/IR/Module.h"
25#include "llvm/IR/PassManager.h"
26#include "llvm/IR/Type.h"
27#include "llvm/Pass.h"
30
31#define DEBUG_TYPE "dxil-intrinsic-expansion"
32
33using namespace llvm;
34
36
37public:
38 bool runOnModule(Module &M) override;
40
41 static char ID; // Pass identification.
42};
43
45 switch (F.getIntrinsicID()) {
46 case Intrinsic::abs:
47 case Intrinsic::atan2:
48 case Intrinsic::exp:
49 case Intrinsic::log:
50 case Intrinsic::log10:
51 case Intrinsic::pow:
52 case Intrinsic::dx_all:
53 case Intrinsic::dx_any:
54 case Intrinsic::dx_cross:
55 case Intrinsic::dx_uclamp:
56 case Intrinsic::dx_sclamp:
57 case Intrinsic::dx_nclamp:
58 case Intrinsic::dx_degrees:
59 case Intrinsic::dx_lerp:
60 case Intrinsic::dx_length:
61 case Intrinsic::dx_normalize:
62 case Intrinsic::dx_fdot:
63 case Intrinsic::dx_sdot:
64 case Intrinsic::dx_udot:
65 case Intrinsic::dx_sign:
66 case Intrinsic::dx_step:
67 case Intrinsic::dx_radians:
68 case Intrinsic::vector_reduce_add:
69 case Intrinsic::vector_reduce_fadd:
70 return true;
71 }
72 return false;
73}
74static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
75 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
76 IntrinsicId == Intrinsic::vector_reduce_fadd);
77
78 IRBuilder<> Builder(Orig);
79 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
80
81 Value *X = Orig->getOperand(IsFAdd ? 1 : 0);
82 Type *Ty = X->getType();
83 auto *XVec = dyn_cast<FixedVectorType>(Ty);
84 unsigned XVecSize = XVec->getNumElements();
85 Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
86
87 // Handle the initial start value for floating-point addition.
88 if (IsFAdd) {
89 Constant *StartValue = dyn_cast<Constant>(Orig->getOperand(0));
90 if (StartValue && !StartValue->isZeroValue())
91 Sum = Builder.CreateFAdd(Sum, StartValue);
92 }
93
94 // Accumulate the remaining vector elements.
95 for (unsigned I = 1; I < XVecSize; I++) {
96 Value *Elt = Builder.CreateExtractElement(X, I);
97 if (IsFAdd)
98 Sum = Builder.CreateFAdd(Sum, Elt);
99 else
100 Sum = Builder.CreateAdd(Sum, Elt);
101 }
102
103 return Sum;
104}
105
106static Value *expandAbs(CallInst *Orig) {
107 Value *X = Orig->getOperand(0);
108 IRBuilder<> Builder(Orig);
109 Type *Ty = X->getType();
110 Type *EltTy = Ty->getScalarType();
111 Constant *Zero = Ty->isVectorTy()
114 cast<FixedVectorType>(Ty)->getNumElements()),
115 ConstantInt::get(EltTy, 0))
116 : ConstantInt::get(EltTy, 0);
117 auto *V = Builder.CreateSub(Zero, X);
118 return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {X, V}, nullptr,
119 "dx.max");
120}
121
123
124 VectorType *VT = cast<VectorType>(Orig->getType());
125 if (cast<FixedVectorType>(VT)->getNumElements() != 3)
126 report_fatal_error(Twine("return vector must have exactly 3 elements"),
127 /* gen_crash_diag=*/false);
128
129 Value *op0 = Orig->getOperand(0);
130 Value *op1 = Orig->getOperand(1);
131 IRBuilder<> Builder(Orig);
132
133 Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0, "x0");
134 Value *op0_y = Builder.CreateExtractElement(op0, 1, "x1");
135 Value *op0_z = Builder.CreateExtractElement(op0, 2, "x2");
136
137 Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0, "y0");
138 Value *op1_y = Builder.CreateExtractElement(op1, 1, "y1");
139 Value *op1_z = Builder.CreateExtractElement(op1, 2, "y2");
140
141 auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
142 Value *xy = Builder.CreateFMul(x0, y1);
143 Value *yx = Builder.CreateFMul(y0, x1);
144 return Builder.CreateFSub(xy, yx, Orig->getName());
145 };
146
147 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
148 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
149 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
150
151 Value *cross = UndefValue::get(VT);
152 cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
153 cross = Builder.CreateInsertElement(cross, zx_xz, 1);
154 cross = Builder.CreateInsertElement(cross, xy_yx, 2);
155 return cross;
156}
157
158// Create appropriate DXIL float dot intrinsic for the given A and B operands
159// The appropriate opcode will be determined by the size of the operands
160// The dot product is placed in the position indicated by Orig
162 Type *ATy = A->getType();
163 [[maybe_unused]] Type *BTy = B->getType();
164 assert(ATy->isVectorTy() && BTy->isVectorTy());
165
166 IRBuilder<> Builder(Orig);
167
168 auto *AVec = dyn_cast<FixedVectorType>(ATy);
169
171
172 Intrinsic::ID DotIntrinsic = Intrinsic::dx_dot4;
173 switch (AVec->getNumElements()) {
174 case 2:
175 DotIntrinsic = Intrinsic::dx_dot2;
176 break;
177 case 3:
178 DotIntrinsic = Intrinsic::dx_dot3;
179 break;
180 case 4:
181 DotIntrinsic = Intrinsic::dx_dot4;
182 break;
183 default:
185 Twine("Invalid dot product input vector: length is outside 2-4"),
186 /* gen_crash_diag=*/false);
187 return nullptr;
188 }
189 return Builder.CreateIntrinsic(ATy->getScalarType(), DotIntrinsic,
190 ArrayRef<Value *>{A, B}, nullptr, "dot");
191}
192
193// Create the appropriate DXIL float dot intrinsic for the operands of Orig
194// The appropriate opcode will be determined by the size of the operands
195// The dot product is placed in the position indicated by Orig
197 return expandFloatDotIntrinsic(Orig, Orig->getOperand(0),
198 Orig->getOperand(1));
199}
200
201// Expand integer dot product to multiply and add ops
203 Intrinsic::ID DotIntrinsic) {
204 assert(DotIntrinsic == Intrinsic::dx_sdot ||
205 DotIntrinsic == Intrinsic::dx_udot);
206 Value *A = Orig->getOperand(0);
207 Value *B = Orig->getOperand(1);
208 Type *ATy = A->getType();
209 [[maybe_unused]] Type *BTy = B->getType();
210 assert(ATy->isVectorTy() && BTy->isVectorTy());
211
212 IRBuilder<> Builder(Orig);
213
214 auto *AVec = dyn_cast<FixedVectorType>(ATy);
215
217
218 Value *Result;
219 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
220 ? Intrinsic::dx_imad
221 : Intrinsic::dx_umad;
222 Value *Elt0 = Builder.CreateExtractElement(A, (uint64_t)0);
223 Value *Elt1 = Builder.CreateExtractElement(B, (uint64_t)0);
224 Result = Builder.CreateMul(Elt0, Elt1);
225 for (unsigned I = 1; I < AVec->getNumElements(); I++) {
226 Elt0 = Builder.CreateExtractElement(A, I);
227 Elt1 = Builder.CreateExtractElement(B, I);
228 Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,
229 ArrayRef<Value *>{Elt0, Elt1, Result},
230 nullptr, "dx.mad");
231 }
232 return Result;
233}
234
236 Value *X = Orig->getOperand(0);
237 IRBuilder<> Builder(Orig);
238 Type *Ty = X->getType();
239 Type *EltTy = Ty->getScalarType();
240 Constant *Log2eConst =
243 cast<FixedVectorType>(Ty)->getNumElements()),
244 ConstantFP::get(EltTy, numbers::log2ef))
245 : ConstantFP::get(EltTy, numbers::log2ef);
246 Value *NewX = Builder.CreateFMul(Log2eConst, X);
247 auto *Exp2Call =
248 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
249 Exp2Call->setTailCall(Orig->isTailCall());
250 Exp2Call->setAttributes(Orig->getAttributes());
251 return Exp2Call;
252}
253
255 Intrinsic::ID intrinsicId) {
256 Value *X = Orig->getOperand(0);
257 IRBuilder<> Builder(Orig);
258 Type *Ty = X->getType();
259 Type *EltTy = Ty->getScalarType();
260
261 auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result,
262 Value *Elt) {
263 if (IntrinsicId == Intrinsic::dx_any)
264 return Builder.CreateOr(Result, Elt);
265 assert(IntrinsicId == Intrinsic::dx_all);
266 return Builder.CreateAnd(Result, Elt);
267 };
268
269 Value *Result = nullptr;
270 if (!Ty->isVectorTy()) {
271 Result = EltTy->isFloatingPointTy()
272 ? Builder.CreateFCmpUNE(X, ConstantFP::get(EltTy, 0))
273 : Builder.CreateICmpNE(X, ConstantInt::get(EltTy, 0));
274 } else {
275 auto *XVec = dyn_cast<FixedVectorType>(Ty);
276 Value *Cond =
277 EltTy->isFloatingPointTy()
278 ? Builder.CreateFCmpUNE(
280 ElementCount::getFixed(XVec->getNumElements()),
281 ConstantFP::get(EltTy, 0)))
282 : Builder.CreateICmpNE(
284 ElementCount::getFixed(XVec->getNumElements()),
285 ConstantInt::get(EltTy, 0)));
286 Result = Builder.CreateExtractElement(Cond, (uint64_t)0);
287 for (unsigned I = 1; I < XVec->getNumElements(); I++) {
288 Value *Elt = Builder.CreateExtractElement(Cond, I);
289 Result = ApplyOp(intrinsicId, Result, Elt);
290 }
291 }
292 return Result;
293}
294
296 Value *X = Orig->getOperand(0);
297 IRBuilder<> Builder(Orig);
298 Type *Ty = X->getType();
299 Type *EltTy = Ty->getScalarType();
300
301 // Though dx.length does work on scalar type, we can optimize it to just emit
302 // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
303 // CGBuiltin.cpp should have emitted a fabs call.
304 Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
305 auto *XVec = dyn_cast<FixedVectorType>(Ty);
306 unsigned XVecSize = XVec->getNumElements();
307 if (!(Ty->isVectorTy() && XVecSize > 1))
308 report_fatal_error(Twine("Invalid input type for length intrinsic"),
309 /* gen_crash_diag=*/false);
310
311 Value *Sum = Builder.CreateFMul(Elt, Elt);
312 for (unsigned I = 1; I < XVecSize; I++) {
313 Elt = Builder.CreateExtractElement(X, I);
314 Value *Mul = Builder.CreateFMul(Elt, Elt);
315 Sum = Builder.CreateFAdd(Sum, Mul);
316 }
317 return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum},
318 nullptr, "elt.sqrt");
319}
320
322 Value *X = Orig->getOperand(0);
323 Value *Y = Orig->getOperand(1);
324 Value *S = Orig->getOperand(2);
325 IRBuilder<> Builder(Orig);
326 auto *V = Builder.CreateFSub(Y, X);
327 V = Builder.CreateFMul(S, V);
328 return Builder.CreateFAdd(X, V, "dx.lerp");
329}
330
332 float LogConstVal = numbers::ln2f) {
333 Value *X = Orig->getOperand(0);
334 IRBuilder<> Builder(Orig);
335 Type *Ty = X->getType();
336 Type *EltTy = Ty->getScalarType();
337 Constant *Ln2Const =
340 cast<FixedVectorType>(Ty)->getNumElements()),
341 ConstantFP::get(EltTy, LogConstVal))
342 : ConstantFP::get(EltTy, LogConstVal);
343 auto *Log2Call =
344 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
345 Log2Call->setTailCall(Orig->isTailCall());
346 Log2Call->setAttributes(Orig->getAttributes());
347 return Builder.CreateFMul(Ln2Const, Log2Call);
348}
351}
352
353// Use dot product of vector operand with itself to calculate the length.
354// Divide the vector by that length to normalize it.
356 Value *X = Orig->getOperand(0);
357 Type *Ty = Orig->getType();
358 Type *EltTy = Ty->getScalarType();
359 IRBuilder<> Builder(Orig);
360
361 auto *XVec = dyn_cast<FixedVectorType>(Ty);
362 if (!XVec) {
363 if (auto *constantFP = dyn_cast<ConstantFP>(X)) {
364 const APFloat &fpVal = constantFP->getValueAPF();
365 if (fpVal.isZero())
366 report_fatal_error(Twine("Invalid input scalar: length is zero"),
367 /* gen_crash_diag=*/false);
368 }
369 return Builder.CreateFDiv(X, X);
370 }
371
372 Value *DotProduct = expandFloatDotIntrinsic(Orig, X, X);
373
374 // verify that the length is non-zero
375 // (if the dot product is non-zero, then the length is non-zero)
376 if (auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {
377 const APFloat &fpVal = constantFP->getValueAPF();
378 if (fpVal.isZero())
379 report_fatal_error(Twine("Invalid input vector: length is zero"),
380 /* gen_crash_diag=*/false);
381 }
382
383 Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,
384 ArrayRef<Value *>{DotProduct},
385 nullptr, "dx.rsqrt");
386
387 Value *MultiplicandVec =
388 Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);
389 return Builder.CreateFMul(X, MultiplicandVec);
390}
391
393 Value *Y = Orig->getOperand(0);
394 Value *X = Orig->getOperand(1);
395 Type *Ty = X->getType();
396 IRBuilder<> Builder(Orig);
397 Builder.setFastMathFlags(Orig->getFastMathFlags());
398
399 Value *Tan = Builder.CreateFDiv(Y, X);
400
401 CallInst *Atan =
402 Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
403 Atan->setTailCall(Orig->isTailCall());
404 Atan->setAttributes(Orig->getAttributes());
405
406 // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
407 Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi);
408 Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2);
409 Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2);
410 Constant *Zero = ConstantFP::get(Ty, 0);
411 Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
412 Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
413
414 // x > 0 -> atan.
415 Value *Result = Atan;
416 Value *XLt0 = Builder.CreateFCmpOLT(X, Zero);
417 Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero);
418 Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero);
419 Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero);
420
421 // x < 0, y >= 0 -> atan + pi.
422 Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
423 Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
424
425 // x < 0, y < 0 -> atan - pi.
426 Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
427 Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
428
429 // x == 0, y < 0 -> -pi/2
430 Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
431 Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
432
433 // x == 0, y > 0 -> pi/2
434 Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
435 Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
436
437 return Result;
438}
439
441
442 Value *X = Orig->getOperand(0);
443 Value *Y = Orig->getOperand(1);
444 Type *Ty = X->getType();
445 IRBuilder<> Builder(Orig);
446
447 auto *Log2Call =
448 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
449 auto *Mul = Builder.CreateFMul(Log2Call, Y);
450 auto *Exp2Call =
451 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
452 Exp2Call->setTailCall(Orig->isTailCall());
453 Exp2Call->setAttributes(Orig->getAttributes());
454 return Exp2Call;
455}
456
458
459 Value *X = Orig->getOperand(0);
460 Value *Y = Orig->getOperand(1);
461 Type *Ty = X->getType();
462 IRBuilder<> Builder(Orig);
463
464 Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
465 Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
466 Value *Cond = Builder.CreateFCmpOLT(Y, X);
467
468 if (Ty != Ty->getScalarType()) {
469 auto *XVec = dyn_cast<FixedVectorType>(Ty);
471 ElementCount::getFixed(XVec->getNumElements()), One);
473 ElementCount::getFixed(XVec->getNumElements()), Zero);
474 }
475
476 return Builder.CreateSelect(Cond, Zero, One);
477}
478
480 Value *X = Orig->getOperand(0);
481 Type *Ty = X->getType();
482 IRBuilder<> Builder(Orig);
483 Value *PiOver180 = ConstantFP::get(Ty, llvm::numbers::pi / 180.0);
484 return Builder.CreateFMul(X, PiOver180);
485}
486
488 if (ClampIntrinsic == Intrinsic::dx_uclamp)
489 return Intrinsic::umax;
490 if (ClampIntrinsic == Intrinsic::dx_sclamp)
491 return Intrinsic::smax;
492 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
493 return Intrinsic::maxnum;
494}
495
497 if (ClampIntrinsic == Intrinsic::dx_uclamp)
498 return Intrinsic::umin;
499 if (ClampIntrinsic == Intrinsic::dx_sclamp)
500 return Intrinsic::smin;
501 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
502 return Intrinsic::minnum;
503}
504
506 Intrinsic::ID ClampIntrinsic) {
507 Value *X = Orig->getOperand(0);
508 Value *Min = Orig->getOperand(1);
509 Value *Max = Orig->getOperand(2);
510 Type *Ty = X->getType();
511 IRBuilder<> Builder(Orig);
512 auto *MaxCall = Builder.CreateIntrinsic(Ty, getMaxForClamp(ClampIntrinsic),
513 {X, Min}, nullptr, "dx.max");
514 return Builder.CreateIntrinsic(Ty, getMinForClamp(ClampIntrinsic),
515 {MaxCall, Max}, nullptr, "dx.min");
516}
517
519 Value *X = Orig->getOperand(0);
520 Type *Ty = X->getType();
521 IRBuilder<> Builder(Orig);
522 Value *DegreesRatio = ConstantFP::get(Ty, 180.0 * llvm::numbers::inv_pi);
523 return Builder.CreateFMul(X, DegreesRatio);
524}
525
527 Value *X = Orig->getOperand(0);
528 Type *Ty = X->getType();
529 Type *ScalarTy = Ty->getScalarType();
530 Type *RetTy = Orig->getType();
532
533 IRBuilder<> Builder(Orig);
534
535 Value *GT;
536 Value *LT;
537 if (ScalarTy->isFloatingPointTy()) {
538 GT = Builder.CreateFCmpOLT(Zero, X);
539 LT = Builder.CreateFCmpOLT(X, Zero);
540 } else {
541 assert(ScalarTy->isIntegerTy());
542 GT = Builder.CreateICmpSLT(Zero, X);
543 LT = Builder.CreateICmpSLT(X, Zero);
544 }
545
546 Value *ZextGT = Builder.CreateZExt(GT, RetTy);
547 Value *ZextLT = Builder.CreateZExt(LT, RetTy);
548
549 return Builder.CreateSub(ZextGT, ZextLT);
550}
551
552static bool expandIntrinsic(Function &F, CallInst *Orig) {
553 Value *Result = nullptr;
554 Intrinsic::ID IntrinsicId = F.getIntrinsicID();
555 switch (IntrinsicId) {
556 case Intrinsic::abs:
557 Result = expandAbs(Orig);
558 break;
559 case Intrinsic::atan2:
560 Result = expandAtan2Intrinsic(Orig);
561 break;
562 case Intrinsic::exp:
563 Result = expandExpIntrinsic(Orig);
564 break;
565 case Intrinsic::log:
566 Result = expandLogIntrinsic(Orig);
567 break;
568 case Intrinsic::log10:
569 Result = expandLog10Intrinsic(Orig);
570 break;
571 case Intrinsic::pow:
572 Result = expandPowIntrinsic(Orig);
573 break;
574 case Intrinsic::dx_all:
575 case Intrinsic::dx_any:
576 Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId);
577 break;
578 case Intrinsic::dx_cross:
579 Result = expandCrossIntrinsic(Orig);
580 break;
581 case Intrinsic::dx_uclamp:
582 case Intrinsic::dx_sclamp:
583 case Intrinsic::dx_nclamp:
584 Result = expandClampIntrinsic(Orig, IntrinsicId);
585 break;
586 case Intrinsic::dx_degrees:
587 Result = expandDegreesIntrinsic(Orig);
588 break;
589 case Intrinsic::dx_lerp:
590 Result = expandLerpIntrinsic(Orig);
591 break;
592 case Intrinsic::dx_length:
593 Result = expandLengthIntrinsic(Orig);
594 break;
595 case Intrinsic::dx_normalize:
596 Result = expandNormalizeIntrinsic(Orig);
597 break;
598 case Intrinsic::dx_fdot:
599 Result = expandFloatDotIntrinsic(Orig);
600 break;
601 case Intrinsic::dx_sdot:
602 case Intrinsic::dx_udot:
603 Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
604 break;
605 case Intrinsic::dx_sign:
606 Result = expandSignIntrinsic(Orig);
607 break;
608 case Intrinsic::dx_step:
609 Result = expandStepIntrinsic(Orig);
610 break;
611 case Intrinsic::dx_radians:
612 Result = expandRadiansIntrinsic(Orig);
613 break;
614 case Intrinsic::vector_reduce_add:
615 case Intrinsic::vector_reduce_fadd:
616 Result = expandVecReduceAdd(Orig, IntrinsicId);
617 break;
618 }
619 if (Result) {
620 Orig->replaceAllUsesWith(Result);
621 Orig->eraseFromParent();
622 return true;
623 }
624 return false;
625}
626
628 for (auto &F : make_early_inc_range(M.functions())) {
630 continue;
631 bool IntrinsicExpanded = false;
632 for (User *U : make_early_inc_range(F.users())) {
633 auto *IntrinsicCall = dyn_cast<CallInst>(U);
634 if (!IntrinsicCall)
635 continue;
636 IntrinsicExpanded = expandIntrinsic(F, IntrinsicCall);
637 }
638 if (F.user_empty() && IntrinsicExpanded)
639 F.eraseFromParent();
640 }
641 return true;
642}
643
646 if (expansionIntrinsics(M))
648 return PreservedAnalyses::all();
649}
650
652 return expansionIntrinsics(M);
653}
654
656
658 "DXIL Intrinsic Expansion", false, false)
660 "DXIL Intrinsic Expansion", false, false)
661
663 return new DXILIntrinsicExpansionLegacy();
664}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Value * expandNormalizeIntrinsic(CallInst *Orig)
DXIL Intrinsic Expansion
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandLengthIntrinsic(CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static bool expansionIntrinsics(Module &M)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static Value * expandPowIntrinsic(CallInst *Orig)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID intrinsicId)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
return RetTy
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
BinaryOperator * Mul
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
bool isZero() const
Definition: APFloat.h:1436
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1428
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
void setTailCall(bool IsTc=true)
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1472
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:76
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1583
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1637
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1556
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1152
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
Value * CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2378
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2328
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2313
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2305
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1610
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2323
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr double inv_pi
Definition: MathExtras.h:54
constexpr float ln10f
Definition: MathExtras.h:65
constexpr float log2ef
Definition: MathExtras.h:66
constexpr double pi
Definition: MathExtras.h:53
constexpr float ln2f
Definition: MathExtras.h:64
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.