LLVM 23.0.0git
DXILOpLowering.cpp
Go to the documentation of this file.
1//===- DXILOpLowering.cpp - Lowering to DXIL operations -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "DXILOpLowering.h"
10#include "DXILConstants.h"
11#include "DXILOpBuilder.h"
12#include "DXILRootSignature.h"
13#include "DXILShaderFlags.h"
14#include "DirectX.h"
18#include "llvm/CodeGen/Passes.h"
19#include "llvm/IR/Constant.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsDirectX.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/IR/Use.h"
30#include "llvm/Pass.h"
33
34#define DEBUG_TYPE "dxil-op-lower"
35
36using namespace llvm;
37using namespace llvm::dxil;
38
39namespace {
40class OpLowerer {
41 Module &M;
42 DXILOpBuilder OpBuilder;
43 DXILResourceMap &DRM;
45 const ModuleMetadataInfo &MMDI;
46 SmallVector<CallInst *> CleanupCasts;
47 Function *CleanupNURI = nullptr;
48
49public:
50 OpLowerer(Module &M, DXILResourceMap &DRM, DXILResourceTypeMap &DRTM,
51 const ModuleMetadataInfo &MMDI)
52 : M(M), OpBuilder(M), DRM(DRM), DRTM(DRTM), MMDI(MMDI) {}
53
54 /// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If
55 /// there is an error replacing a call, we emit a diagnostic and return true.
56 [[nodiscard]] bool
57 replaceFunction(Function &F,
58 llvm::function_ref<Error(CallInst *CI)> ReplaceCall) {
59 for (User *U : make_early_inc_range(F.users())) {
61 if (!CI)
62 continue;
63
64 if (Error E = ReplaceCall(CI)) {
65 std::string Message(toString(std::move(E)));
66 M.getContext().diagnose(DiagnosticInfoUnsupported(
67 *CI->getFunction(), Message, CI->getDebugLoc()));
68
69 return true;
70 }
71 }
72 if (F.user_empty())
73 F.eraseFromParent();
74 return false;
75 }
76
77 struct IntrinArgSelect {
78 enum class Type {
79#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(name) name,
80#include "DXILOperation.inc"
81 };
82 Type Type;
83 int Value;
84 };
85
86 /// Replaces uses of a struct with uses of an equivalent named struct.
87 ///
88 /// DXIL operations that return structs give them well known names, so we need
89 /// to update uses when we switch from an LLVM intrinsic to an op.
90 Error replaceNamedStructUses(CallInst *Intrin, CallInst *DXILOp) {
91 auto *IntrinTy = cast<StructType>(Intrin->getType());
92 auto *DXILOpTy = cast<StructType>(DXILOp->getType());
93 if (!IntrinTy->isLayoutIdentical(DXILOpTy))
95 "Type mismatch between intrinsic and DXIL op",
97
98 for (Use &U : make_early_inc_range(Intrin->uses()))
99 if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()))
100 EVI->setOperand(0, DXILOp);
101 else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser()))
102 IVI->setOperand(0, DXILOp);
103 else
104 return make_error<StringError>("DXIL ops that return structs may only "
105 "be used by insert- and extractvalue",
107 return Error::success();
108 }
109
110 bool isFast(FastMathFlags Flags) {
111 // HLSL Fast Math doesn't enable AllowContract flag; This can be
112 // removed when we enable it in the future.
113 return Flags.allowReassoc() && Flags.noNaNs() && Flags.noInfs() &&
114 Flags.noSignedZeros() && Flags.allowReciprocal() &&
115 Flags.approxFunc();
116 }
117
118 void setDxPrecise(CallInst *CI) {
119 const StringRef Key = "dx.precise";
120 Module *M = CI->getModule();
121
122 LLVMContext &Ctx = M->getContext();
123 MDNode *One =
124 llvm::MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get(
125 llvm::Type::getInt32Ty(Ctx), 1)));
126
127 CI->setMetadata(Key, One);
128 }
129
130 [[nodiscard]] bool
131 replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
132 ArrayRef<IntrinArgSelect> ArgSelects) {
133 return replaceFunction(F, [&](CallInst *CI) -> Error {
134 OpBuilder.getIRB().SetInsertPoint(CI);
136 if (ArgSelects.size()) {
137 for (const IntrinArgSelect &A : ArgSelects) {
138 switch (A.Type) {
139 case IntrinArgSelect::Type::Index:
140 Args.push_back(CI->getArgOperand(A.Value));
141 break;
142 case IntrinArgSelect::Type::I8:
143 Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
144 break;
145 case IntrinArgSelect::Type::I32:
146 Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
147 break;
148 }
149 }
150 } else {
151 Args.append(CI->arg_begin(), CI->arg_end());
152 }
153
154 Expected<CallInst *> OpCall =
155 OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
156 if (Error E = OpCall.takeError())
157 return E;
158
159 if (isa<FPMathOperator>(CI) &&
160 !isFast(cast<FPMathOperator>(CI)->getFastMathFlags()))
161 setDxPrecise(*OpCall);
162
163 if (isa<StructType>(CI->getType())) {
164 if (Error E = replaceNamedStructUses(CI, *OpCall))
165 return E;
166 } else
167 CI->replaceAllUsesWith(*OpCall);
168
169 CI->eraseFromParent();
170 return Error::success();
171 });
172 }
173
174 /// Create a cast between a `target("dx")` type and `dx.types.Handle`, which
175 /// is intended to be removed by the end of lowering. This is used to allow
176 /// lowering of ops which need to change their return or argument types in a
177 /// piecemeal way - we can add the casts in to avoid updating all of the uses
178 /// or defs, and by the end all of the casts will be redundant.
179 Value *createTmpHandleCast(Value *V, Type *Ty) {
180 CallInst *Cast = OpBuilder.getIRB().CreateIntrinsic(
181 Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V});
182 CleanupCasts.push_back(Cast);
183 return Cast;
184 }
185
186 void cleanupHandleCasts() {
189
190 for (CallInst *Cast : CleanupCasts) {
191 // These casts were only put in to ease the move from `target("dx")` types
192 // to `dx.types.Handle in a piecemeal way. At this point, all of the
193 // non-cast uses should now be `dx.types.Handle`, and remaining casts
194 // should all form pairs to and from the now unused `target("dx")` type.
195 CastFns.push_back(Cast->getCalledFunction());
196
197 // If the cast is not to `dx.types.Handle`, it should be the first part of
198 // the pair. Keep track so we can remove it once it has no more uses.
199 if (Cast->getType() != OpBuilder.getHandleType()) {
200 ToRemove.push_back(Cast);
201 continue;
202 }
203 // Otherwise, we're the second handle in a pair. Forward the arguments and
204 // remove the (second) cast.
205 CallInst *Def = cast<CallInst>(Cast->getOperand(0));
206 assert(Def->getIntrinsicID() == Intrinsic::dx_resource_casthandle &&
207 "Unbalanced pair of temporary handle casts");
208 Cast->replaceAllUsesWith(Def->getOperand(0));
209 Cast->eraseFromParent();
210 }
211 for (CallInst *Cast : ToRemove) {
212 assert(Cast->user_empty() && "Temporary handle cast still has users");
213 Cast->eraseFromParent();
214 }
215
216 // Deduplicate the cast functions so that we only erase each one once.
217 llvm::sort(CastFns);
218 CastFns.erase(llvm::unique(CastFns), CastFns.end());
219 for (Function *F : CastFns)
220 F->eraseFromParent();
221
222 CleanupCasts.clear();
223 }
224
225 void cleanupNonUniformResourceIndexCalls() {
226 // Replace all NonUniformResourceIndex calls with their argument.
227 if (!CleanupNURI)
228 return;
229 for (User *U : make_early_inc_range(CleanupNURI->users())) {
230 CallInst *CI = dyn_cast<CallInst>(U);
231 if (!CI)
232 continue;
234 CI->eraseFromParent();
235 }
236 CleanupNURI->eraseFromParent();
237 CleanupNURI = nullptr;
238 }
239
240 // Remove the resource global associated with the handleFromBinding call
241 // instruction and their uses as they aren't needed anymore.
242 // TODO: We should verify that all the globals get removed.
243 // It's expected we'll need a custom pass in the future that will eliminate
244 // the need for this here.
245 void removeResourceGlobals(CallInst *CI) {
246 for (User *User : make_early_inc_range(CI->users())) {
247 if (StoreInst *Store = dyn_cast<StoreInst>(User)) {
248 Value *V = Store->getOperand(1);
249 Store->eraseFromParent();
250 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
251 if (GV->use_empty()) {
252 GV->removeDeadConstantUsers();
253 GV->eraseFromParent();
254 }
255 }
256 }
257 }
258
259 void replaceHandleFromBindingCall(CallInst *CI, Value *Replacement) {
261 Intrinsic::dx_resource_handlefrombinding);
262
263 removeResourceGlobals(CI);
264
265 auto *NameGlobal = dyn_cast<llvm::GlobalVariable>(CI->getArgOperand(4));
266
267 CI->replaceAllUsesWith(Replacement);
268 CI->eraseFromParent();
269
270 if (NameGlobal && NameGlobal->use_empty())
271 NameGlobal->removeFromParent();
272 }
273
274 bool hasNonUniformIndex(Value *IndexOp) {
275 if (isa<llvm::Constant>(IndexOp))
276 return false;
277
278 SmallVector<Value *, 16> Worklist;
279 SmallPtrSet<Value *, 16> Visited;
280 Worklist.push_back(IndexOp);
281
282 while (!Worklist.empty()) {
283 Value *V = Worklist.pop_back_val();
284
285 if (isa<llvm::Constant>(V))
286 continue;
287
288 if (!Visited.insert(V).second)
289 continue;
290
291 if (auto *CI = dyn_cast<CallInst>(V))
292 if (CI->getIntrinsicID() == Intrinsic::dx_resource_nonuniformindex)
293 return true;
294
295 // If it's a PHI node, check ALL incoming values —
296 // taint from ANY predecessor counts
297 if (auto *Phi = dyn_cast<PHINode>(V)) {
298 for (Value *Incoming : Phi->incoming_values())
299 Worklist.push_back(Incoming);
300 continue;
301 }
302
303 if (auto *Inst = dyn_cast<Instruction>(V))
304 if (Inst->getNumOperands() > 0 && !Inst->isTerminator())
305 for (Value *Op : Inst->operands())
306 Worklist.push_back(Op);
307 }
308 return false;
309 }
310
311 Error validateRawBufferElementIndex(Value *Resource, Value *ElementIndex) {
312 bool IsStructured =
313 cast<RawBufferExtType>(Resource->getType())->isStructured();
314 bool IsPoison = isa<PoisonValue>(ElementIndex);
315
316 if (IsStructured && IsPoison)
318 "Element index of structured buffer may not be poison",
320
321 if (!IsStructured && !IsPoison)
323 "Element index of raw buffer must be poison",
325
326 return Error::success();
327 }
328
329 [[nodiscard]] bool lowerToCreateHandle(Function &F) {
330 IRBuilder<> &IRB = OpBuilder.getIRB();
331 Type *Int8Ty = IRB.getInt8Ty();
332 Type *Int32Ty = IRB.getInt32Ty();
333 Type *Int1Ty = IRB.getInt1Ty();
334
335 return replaceFunction(F, [&](CallInst *CI) -> Error {
336 IRB.SetInsertPoint(CI);
337
338 auto *It = DRM.find(CI);
339 assert(It != DRM.end() && "Resource not in map?");
340 dxil::ResourceInfo &RI = *It;
341
342 const auto &Binding = RI.getBinding();
343 dxil::ResourceClass RC = DRTM[RI.getHandleTy()].getResourceClass();
344
345 Value *IndexOp = CI->getArgOperand(3);
346 if (Binding.LowerBound != 0)
347 IndexOp = IRB.CreateAdd(IndexOp,
348 ConstantInt::get(Int32Ty, Binding.LowerBound));
349
350 bool HasNonUniformIndex =
351 (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
352 std::array<Value *, 4> Args{
353 ConstantInt::get(Int8Ty, llvm::to_underlying(RC)),
354 ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp,
355 ConstantInt::get(Int1Ty, HasNonUniformIndex)};
356 Expected<CallInst *> OpCall =
357 OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName());
358 if (Error E = OpCall.takeError())
359 return E;
360
361 Value *Cast = createTmpHandleCast(*OpCall, CI->getType());
362 replaceHandleFromBindingCall(CI, Cast);
363 return Error::success();
364 });
365 }
366
367 [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
368 IRBuilder<> &IRB = OpBuilder.getIRB();
369 Type *Int32Ty = IRB.getInt32Ty();
370 Type *Int1Ty = IRB.getInt1Ty();
371
372 return replaceFunction(F, [&](CallInst *CI) -> Error {
373 IRB.SetInsertPoint(CI);
374
375 auto *It = DRM.find(CI);
376 assert(It != DRM.end() && "Resource not in map?");
377 dxil::ResourceInfo &RI = *It;
378
379 const auto &Binding = RI.getBinding();
380 dxil::ResourceTypeInfo &RTI = DRTM[RI.getHandleTy()];
382
383 Value *IndexOp = CI->getArgOperand(3);
384 if (Binding.LowerBound != 0)
385 IndexOp = IRB.CreateAdd(IndexOp,
386 ConstantInt::get(Int32Ty, Binding.LowerBound));
387
388 std::pair<uint32_t, uint32_t> Props =
389 RI.getAnnotateProps(*F.getParent(), RTI);
390
391 // For `CreateHandleFromBinding` we need the upper bound rather than the
392 // size, so we need to be careful about the difference for "unbounded".
393 uint32_t UpperBound = Binding.Size == 0
394 ? std::numeric_limits<uint32_t>::max()
395 : Binding.LowerBound + Binding.Size - 1;
396 Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound,
397 Binding.Space, RC);
398 bool NonUniformIndex =
399 (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
400 Constant *NonUniformOp = ConstantInt::get(Int1Ty, NonUniformIndex);
401 std::array<Value *, 3> BindArgs{ResBind, IndexOp, NonUniformOp};
402 Expected<CallInst *> OpBind = OpBuilder.tryCreateOp(
403 OpCode::CreateHandleFromBinding, BindArgs, CI->getName());
404 if (Error E = OpBind.takeError())
405 return E;
406
407 std::array<Value *, 2> AnnotateArgs{
408 *OpBind, OpBuilder.getResProps(Props.first, Props.second)};
409 Expected<CallInst *> OpAnnotate = OpBuilder.tryCreateOp(
410 OpCode::AnnotateHandle, AnnotateArgs,
411 CI->hasName() ? CI->getName() + "_annot" : Twine());
412 if (Error E = OpAnnotate.takeError())
413 return E;
414
415 Value *Cast = createTmpHandleCast(*OpAnnotate, CI->getType());
416 replaceHandleFromBindingCall(CI, Cast);
417 return Error::success();
418 });
419 }
420
421 /// Lower `dx.resource.handlefrombinding` intrinsics depending on the shader
422 /// model and taking into account binding information from
423 /// DXILResourceAnalysis.
424 bool lowerHandleFromBinding(Function &F) {
425 if (MMDI.DXILVersion < VersionTuple(1, 6))
426 return lowerToCreateHandle(F);
427 return lowerToBindAndAnnotateHandle(F);
428 }
429
430 /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
431 /// Since we expect to be post-scalarization, make an effort to avoid vectors.
432 Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
433 IRBuilder<> &IRB = OpBuilder.getIRB();
434
435 Instruction *OldResult = Intrin;
436 Type *OldTy = Intrin->getType();
437
438 if (HasCheckBit) {
439 auto *ST = cast<StructType>(OldTy);
440
441 Value *CheckOp = nullptr;
442 Type *Int32Ty = IRB.getInt32Ty();
443 for (Use &U : make_early_inc_range(OldResult->uses())) {
444 if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
445 ArrayRef<unsigned> Indices = EVI->getIndices();
446 assert(Indices.size() == 1);
447 // We're only interested in uses of the check bit for now.
448 if (Indices[0] != 1)
449 continue;
450 if (!CheckOp) {
451 Value *NewEVI = IRB.CreateExtractValue(Op, 4);
452 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
453 OpCode::CheckAccessFullyMapped, {NewEVI},
454 OldResult->hasName() ? OldResult->getName() + "_check"
455 : Twine(),
456 Int32Ty);
457 if (Error E = OpCall.takeError())
458 return E;
459 CheckOp = *OpCall;
460 }
461 EVI->replaceAllUsesWith(CheckOp);
462 EVI->eraseFromParent();
463 }
464 }
465
466 if (OldResult->use_empty()) {
467 // Only the check bit was used, so we're done here.
468 OldResult->eraseFromParent();
469 return Error::success();
470 }
471
472 assert(OldResult->hasOneUse() &&
473 isa<ExtractValueInst>(*OldResult->user_begin()) &&
474 "Expected only use to be extract of first element");
475 OldResult = cast<Instruction>(*OldResult->user_begin());
476 OldTy = ST->getElementType(0);
477 }
478
479 // For scalars, we just extract the first element.
480 if (!isa<FixedVectorType>(OldTy)) {
481 Value *EVI = IRB.CreateExtractValue(Op, 0);
482 OldResult->replaceAllUsesWith(EVI);
483 OldResult->eraseFromParent();
484 if (OldResult != Intrin) {
485 assert(Intrin->use_empty() && "Intrinsic still has uses?");
486 Intrin->eraseFromParent();
487 }
488 return Error::success();
489 }
490
491 std::array<Value *, 4> Extracts = {};
492 SmallVector<ExtractElementInst *> DynamicAccesses;
493
494 // The users of the operation should all be scalarized, so we attempt to
495 // replace the extractelements with extractvalues directly.
496 for (Use &U : make_early_inc_range(OldResult->uses())) {
497 if (auto *EEI = dyn_cast<ExtractElementInst>(U.getUser())) {
498 if (auto *IndexOp = dyn_cast<ConstantInt>(EEI->getIndexOperand())) {
499 size_t IndexVal = IndexOp->getZExtValue();
500 assert(IndexVal < 4 && "Index into buffer load out of range");
501 if (!Extracts[IndexVal])
502 Extracts[IndexVal] = IRB.CreateExtractValue(Op, IndexVal);
503 EEI->replaceAllUsesWith(Extracts[IndexVal]);
504 EEI->eraseFromParent();
505 } else {
506 DynamicAccesses.push_back(EEI);
507 }
508 }
509 }
510
511 const auto *VecTy = cast<FixedVectorType>(OldTy);
512 const unsigned N = VecTy->getNumElements();
513
514 // If there's a dynamic access we need to round trip through stack memory so
515 // that we don't leave vectors around.
516 if (!DynamicAccesses.empty()) {
517 Type *Int32Ty = IRB.getInt32Ty();
518 Constant *Zero = ConstantInt::get(Int32Ty, 0);
519
520 Type *ElTy = VecTy->getElementType();
521 Type *ArrayTy = ArrayType::get(ElTy, N);
522 Value *Alloca = IRB.CreateAlloca(ArrayTy);
523
524 for (int I = 0, E = N; I != E; ++I) {
525 if (!Extracts[I])
526 Extracts[I] = IRB.CreateExtractValue(Op, I);
528 ArrayTy, Alloca, {Zero, ConstantInt::get(Int32Ty, I)});
529 IRB.CreateStore(Extracts[I], GEP);
530 }
531
532 for (ExtractElementInst *EEI : DynamicAccesses) {
533 Value *GEP = IRB.CreateInBoundsGEP(ArrayTy, Alloca,
534 {Zero, EEI->getIndexOperand()});
535 Value *Load = IRB.CreateLoad(ElTy, GEP);
536 EEI->replaceAllUsesWith(Load);
537 EEI->eraseFromParent();
538 }
539 }
540
541 // If we still have uses, then we're not fully scalarized and need to
542 // recreate the vector. This should only happen for things like exported
543 // functions from libraries.
544 if (!OldResult->use_empty()) {
545 for (int I = 0, E = N; I != E; ++I)
546 if (!Extracts[I])
547 Extracts[I] = IRB.CreateExtractValue(Op, I);
548
549 Value *Vec = PoisonValue::get(OldTy);
550 for (int I = 0, E = N; I != E; ++I)
551 Vec = IRB.CreateInsertElement(Vec, Extracts[I], I);
552 OldResult->replaceAllUsesWith(Vec);
553 }
554
555 OldResult->eraseFromParent();
556 if (OldResult != Intrin) {
557 assert(Intrin->use_empty() && "Intrinsic still has uses?");
558 Intrin->eraseFromParent();
559 }
560
561 return Error::success();
562 }
563
564 [[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) {
565 IRBuilder<> &IRB = OpBuilder.getIRB();
566 Type *Int32Ty = IRB.getInt32Ty();
567
568 return replaceFunction(F, [&](CallInst *CI) -> Error {
569 IRB.SetInsertPoint(CI);
570
571 Value *Handle =
572 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
573 Value *Index0 = CI->getArgOperand(1);
574 Value *Index1 = UndefValue::get(Int32Ty);
575
576 Type *OldTy = CI->getType();
577 if (HasCheckBit)
578 OldTy = cast<StructType>(OldTy)->getElementType(0);
579 Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
580
581 std::array<Value *, 3> Args{Handle, Index0, Index1};
582 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
583 OpCode::BufferLoad, Args, CI->getName(), NewRetTy);
584 if (Error E = OpCall.takeError())
585 return E;
586 if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit))
587 return E;
588
589 return Error::success();
590 });
591 }
592
593 // Copies `Src` into `Args` starting at `ArgIdx`. If `Src` is a vector, its
594 // elements are extracted and stored in consecutive slots; otherwise `Src`
595 // is stored directly. At most `MaxElements` elements are expected.
596 static void extractElementsIntoArgs(IRBuilder<> &IRB,
598 unsigned ArgIdx, Value *Src,
599 unsigned MaxElements) {
600 Type *Ty = Src->getType();
601 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
602 unsigned Count = VecTy->getNumElements();
603 assert(Count <= MaxElements && "Expected at most 3 elements in vector");
604 for (unsigned I = 0; I < Count; ++I)
605 Args[ArgIdx + I] = IRB.CreateExtractElement(Src, uint64_t(I));
606 } else {
607 Args[ArgIdx] = Src;
608 }
609 }
610
611 /// Copy offsets into the argument list at the given index, unless
612 /// the offsets are known to be zero (i.e., a null constant).
613 static void extractNonZeroOffsets(IRBuilder<> &IRB,
615 unsigned ArgIdx, Value *Offsets,
616 unsigned MaxElements) {
617 auto *COff = dyn_cast<Constant>(Offsets);
618 bool OffsetsAreZero = COff && COff->isNullValue();
619 if (!OffsetsAreZero)
620 extractElementsIntoArgs(IRB, Args, ArgIdx, Offsets, MaxElements);
621 }
622
623 [[nodiscard]] bool lowerTextureLoad(Function &F) {
624 IRBuilder<> &IRB = OpBuilder.getIRB();
625 Type *Int32Ty = IRB.getInt32Ty();
626
627 return replaceFunction(F, [&](CallInst *CI) -> Error {
628 IRB.SetInsertPoint(CI);
629
630 Value *Handle =
631 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
632 Value *Coords = CI->getArgOperand(1);
633 Value *MipLevel = CI->getArgOperand(2);
634 Value *Offsets = CI->getArgOperand(3);
635
636 Type *OldTy = CI->getType();
637 Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
638
640 std::array<Value *, 8> Args{Handle, MipLevel, Undef, Undef,
642
643 // Copy coordinates and offsets into Args.
644 extractElementsIntoArgs(IRB, Args, 2, Coords, 3);
645 extractNonZeroOffsets(IRB, Args, 5, Offsets, 3);
646
647 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
648 OpCode::TextureLoad, Args, CI->getName(), NewRetTy);
649 if (Error E = OpCall.takeError())
650 return E;
651 if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/false))
652 return E;
653
654 return Error::success();
655 });
656 }
657
658 /// Common helper for lowering sample operations (SampleBias, SampleGrad,
659 /// etc.) that share the same pattern: extract handle/sampler, unpack
660 /// coordinates and offsets, build the DXIL arg list, and replace uses.
661 [[nodiscard]] bool lowerSampleOp(
662 Function &F, OpCode Op, unsigned CoordsIdx, unsigned OffsetsIdx,
663 llvm::function_ref<void(IRBuilder<> &, CallInst *,
664 SmallVectorImpl<Value *> &)> EmitExtraArgs) {
665 IRBuilder<> &IRB = OpBuilder.getIRB();
666 return replaceFunction(F, [&](CallInst *CI) -> Error {
667 IRB.SetInsertPoint(CI);
668
669 Value *Handle =
670 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
671 Value *Sampler =
672 createTmpHandleCast(CI->getArgOperand(1), OpBuilder.getHandleType());
673 Value *Coords = CI->getArgOperand(CoordsIdx);
674 Value *Offsets = CI->getArgOperand(OffsetsIdx);
675
676 Type *OldTy = CI->getType();
677 Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
678
679 Value *UndefF = UndefValue::get(IRB.getFloatTy());
680 Value *UndefI = UndefValue::get(IRB.getInt32Ty());
681 // Common prefix: Handle, Sampler, Coord0..3, Offset0..2
682 SmallVector<Value *, 17> Args{Handle, Sampler, UndefF, UndefF, UndefF,
683 UndefF, UndefI, UndefI, UndefI};
684
685 // Copy coordinates and offsets into Args.
686 extractElementsIntoArgs(IRB, Args, 2, Coords, 4);
687 extractNonZeroOffsets(IRB, Args, 6, Offsets, 3);
688
689 // Emit op-specific trailing arguments (e.g. Bias+Clamp, DDX+DDY+Clamp).
690 EmitExtraArgs(IRB, CI, Args);
691
692 Expected<CallInst *> OpCall =
693 OpBuilder.tryCreateOp(Op, Args, CI->getName(), NewRetTy);
694 if (Error E = OpCall.takeError())
695 return E;
696 if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/false))
697 return E;
698
699 return Error::success();
700 });
701 }
702
703 [[nodiscard]] bool lowerSample(Function &F, bool HasClamp) {
704 return lowerSampleOp(F, OpCode::Sample, /*CoordsIdx=*/2, /*OffsetsIdx=*/3,
705 [HasClamp](IRBuilder<> &IRB, CallInst *CI,
706 SmallVectorImpl<Value *> &Args) {
707 // Clamp
708 Args.push_back(
709 HasClamp ? CI->getArgOperand(4)
710 : UndefValue::get(IRB.getFloatTy()));
711 });
712 }
713
714 [[nodiscard]] bool lowerSampleBias(Function &F, bool HasClamp) {
715 return lowerSampleOp(
716 F, OpCode::SampleBias, /*CoordsIdx=*/2, /*OffsetsIdx=*/4,
717 [HasClamp](IRBuilder<> &IRB, CallInst *CI,
718 SmallVectorImpl<Value *> &Args) {
719 // Bias is operand 3.
720 Args.push_back(CI->getArgOperand(3));
721 // Clamp
722 Args.push_back(HasClamp ? CI->getArgOperand(5)
723 : UndefValue::get(IRB.getFloatTy()));
724 });
725 }
726
727 [[nodiscard]] bool lowerSampleLevel(Function &F) {
728 return lowerSampleOp(
729 F, OpCode::SampleLevel, /*CoordsIdx=*/2, /*OffsetsIdx=*/4,
730 [](IRBuilder<> &, CallInst *CI, SmallVectorImpl<Value *> &Args) {
731 // LOD is operand 3.
732 Args.push_back(CI->getArgOperand(3));
733 });
734 }
735
736 [[nodiscard]] bool lowerSampleGrad(Function &F, bool HasClamp) {
737 return lowerSampleOp(
738 F, OpCode::SampleGrad, /*CoordsIdx=*/2, /*OffsetsIdx=*/5,
739 [HasClamp](IRBuilder<> &IRB, CallInst *CI,
740 SmallVectorImpl<Value *> &Args) {
741 Value *DDX = CI->getArgOperand(3);
742 Value *DDY = CI->getArgOperand(4);
743 Value *UndefF = UndefValue::get(IRB.getFloatTy());
744 // DDX0..2
745 size_t DDXStart = Args.size();
746 Args.append(3, UndefF);
747 extractElementsIntoArgs(IRB, Args, DDXStart, DDX, 3);
748 // DDY0..2
749 size_t DDYStart = Args.size();
750 Args.append(3, UndefF);
751 extractElementsIntoArgs(IRB, Args, DDYStart, DDY, 3);
752 // Clamp
753 Args.push_back(HasClamp ? CI->getArgOperand(6) : UndefF);
754 });
755 }
756
757 [[nodiscard]] bool lowerRawBufferLoad(Function &F) {
758 const DataLayout &DL = F.getDataLayout();
759 IRBuilder<> &IRB = OpBuilder.getIRB();
760 Type *Int8Ty = IRB.getInt8Ty();
761 Type *Int32Ty = IRB.getInt32Ty();
762
763 return replaceFunction(F, [&](CallInst *CI) -> Error {
764 IRB.SetInsertPoint(CI);
765
766 Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
767 Type *ScalarTy = OldTy->getScalarType();
768 Type *NewRetTy = OpBuilder.getResRetType(ScalarTy);
769
770 Value *Handle =
771 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
772 Value *Index0 = CI->getArgOperand(1);
773 Value *Index1 = CI->getArgOperand(2);
774 uint64_t NumElements =
775 DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy);
776 Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
777 Value *Align =
778 ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value());
779
780 if (Error E = validateRawBufferElementIndex(CI->getOperand(0), Index1))
781 return E;
782 if (isa<PoisonValue>(Index1))
783 Index1 = UndefValue::get(Index1->getType());
784
785 Expected<CallInst *> OpCall =
786 MMDI.DXILVersion >= VersionTuple(1, 2)
787 ? OpBuilder.tryCreateOp(OpCode::RawBufferLoad,
788 {Handle, Index0, Index1, Mask, Align},
789 CI->getName(), NewRetTy)
790 : OpBuilder.tryCreateOp(OpCode::BufferLoad,
791 {Handle, Index0, Index1}, CI->getName(),
792 NewRetTy);
793 if (Error E = OpCall.takeError())
794 return E;
795 if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true))
796 return E;
797
798 return Error::success();
799 });
800 }
801
802 [[nodiscard]] bool lowerCBufferLoad(Function &F) {
803 IRBuilder<> &IRB = OpBuilder.getIRB();
804
805 return replaceFunction(F, [&](CallInst *CI) -> Error {
806 IRB.SetInsertPoint(CI);
807
808 Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
809 Type *ScalarTy = OldTy->getScalarType();
810 Type *NewRetTy = OpBuilder.getCBufRetType(ScalarTy);
811
812 Value *Handle =
813 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
814 Value *Index = CI->getArgOperand(1);
815
816 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
817 OpCode::CBufferLoadLegacy, {Handle, Index}, CI->getName(), NewRetTy);
818 if (Error E = OpCall.takeError())
819 return E;
820 if (Error E = replaceNamedStructUses(CI, *OpCall))
821 return E;
822
823 CI->eraseFromParent();
824 return Error::success();
825 });
826 }
827
828 [[nodiscard]] bool lowerUpdateCounter(Function &F) {
829 IRBuilder<> &IRB = OpBuilder.getIRB();
830 Type *Int32Ty = IRB.getInt32Ty();
831
832 return replaceFunction(F, [&](CallInst *CI) -> Error {
833 IRB.SetInsertPoint(CI);
834 Value *Handle =
835 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
836 Value *Op1 = CI->getArgOperand(1);
837
838 std::array<Value *, 2> Args{Handle, Op1};
839
840 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
841 OpCode::UpdateCounter, Args, CI->getName(), Int32Ty);
842
843 if (Error E = OpCall.takeError())
844 return E;
845
846 CI->replaceAllUsesWith(*OpCall);
847 CI->eraseFromParent();
848 return Error::success();
849 });
850 }
851
852 [[nodiscard]] bool lowerGetDimensionsX(Function &F) {
853 IRBuilder<> &IRB = OpBuilder.getIRB();
854 Type *Int32Ty = IRB.getInt32Ty();
855
856 return replaceFunction(F, [&](CallInst *CI) -> Error {
857 IRB.SetInsertPoint(CI);
858 Value *Handle =
859 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
861
862 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
863 OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty);
864 if (Error E = OpCall.takeError())
865 return E;
866 Value *Dim = IRB.CreateExtractValue(*OpCall, 0);
867
868 CI->replaceAllUsesWith(Dim);
869 CI->eraseFromParent();
870 return Error::success();
871 });
872 }
873
874 [[nodiscard]] bool lowerGetPointer(Function &F) {
875 // These should have already been handled in DXILResourceAccess, so we can
876 // just clean up the dead prototype.
877 assert(F.user_empty() && "getpointer operations should have been removed");
878 F.eraseFromParent();
879 return false;
880 }
881
882 [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) {
883 const DataLayout &DL = F.getDataLayout();
884 IRBuilder<> &IRB = OpBuilder.getIRB();
885 Type *Int8Ty = IRB.getInt8Ty();
886 Type *Int32Ty = IRB.getInt32Ty();
887
888 return replaceFunction(F, [&](CallInst *CI) -> Error {
889 IRB.SetInsertPoint(CI);
890
891 Value *Handle =
892 createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
893 Value *Index0 = CI->getArgOperand(1);
894 Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty);
895
896 if (IsRaw) {
897 if (Error E = validateRawBufferElementIndex(CI->getOperand(0), Index1))
898 return E;
899 if (isa<PoisonValue>(Index1))
900 Index1 = UndefValue::get(Index1->getType());
901 }
902
903 Value *Data = CI->getArgOperand(IsRaw ? 3 : 2);
904 Type *DataTy = Data->getType();
905 Type *ScalarTy = DataTy->getScalarType();
906
907 uint64_t NumElements =
908 DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy);
909 Value *Mask =
910 ConstantInt::get(Int8Ty, IsRaw ? ~(~0U << NumElements) : 15U);
911
912 // TODO: check that we only have vector or scalar...
913 if (NumElements > 4)
915 "Buffer store data must have at most 4 elements",
917
918 std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr};
919 if (DataTy == ScalarTy)
920 DataElements[0] = Data;
921 else {
922 // Since we're post-scalarizer, if we see a vector here it's likely
923 // constructed solely for the argument of the store. Just use the scalar
924 // values from before they're inserted into the temporary.
926 while (IEI) {
927 auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
928 if (!IndexOp)
929 break;
930 size_t IndexVal = IndexOp->getZExtValue();
931 assert(IndexVal < 4 && "Too many elements for buffer store");
932 DataElements[IndexVal] = IEI->getOperand(1);
933 IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
934 }
935 }
936
937 // If for some reason we weren't able to forward the arguments from the
938 // scalarizer artifact, then we may need to actually extract elements from
939 // the vector.
940 for (int I = 0, E = NumElements; I < E; ++I)
941 if (DataElements[I] == nullptr)
942 DataElements[I] =
943 IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I));
944
945 // For any elements beyond the length of the vector, we should fill it up
946 // with undef - however, for typed buffers we repeat the first element to
947 // match DXC.
948 for (int I = NumElements, E = 4; I < E; ++I)
949 if (DataElements[I] == nullptr)
950 DataElements[I] = IsRaw ? UndefValue::get(ScalarTy) : DataElements[0];
951
952 dxil::OpCode Op = OpCode::BufferStore;
954 Handle, Index0, Index1, DataElements[0],
955 DataElements[1], DataElements[2], DataElements[3], Mask};
956 if (IsRaw && MMDI.DXILVersion >= VersionTuple(1, 2)) {
957 Op = OpCode::RawBufferStore;
958 // RawBufferStore requires the alignment
959 Args.push_back(
960 ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()));
961 }
962 Expected<CallInst *> OpCall =
963 OpBuilder.tryCreateOp(Op, Args, CI->getName());
964 if (Error E = OpCall.takeError())
965 return E;
966
967 CI->eraseFromParent();
968 // Clean up any leftover `insertelement`s
970 while (IEI && IEI->use_empty()) {
971 InsertElementInst *Tmp = IEI;
972 IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
973 Tmp->eraseFromParent();
974 }
975
976 return Error::success();
977 });
978 }
979
980 [[nodiscard]] bool lowerCtpopToCountBits(Function &F) {
981 IRBuilder<> &IRB = OpBuilder.getIRB();
982 Type *Int32Ty = IRB.getInt32Ty();
983
984 return replaceFunction(F, [&](CallInst *CI) -> Error {
985 IRB.SetInsertPoint(CI);
987 Args.append(CI->arg_begin(), CI->arg_end());
988
989 Type *RetTy = Int32Ty;
990 Type *FRT = F.getReturnType();
991 if (const auto *VT = dyn_cast<VectorType>(FRT))
992 RetTy = VectorType::get(RetTy, VT);
993
994 Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
995 dxil::OpCode::CountBits, Args, CI->getName(), RetTy);
996 if (Error E = OpCall.takeError())
997 return E;
998
999 // If the result type is 32 bits we can do a direct replacement.
1000 if (FRT->isIntOrIntVectorTy(32)) {
1001 CI->replaceAllUsesWith(*OpCall);
1002 CI->eraseFromParent();
1003 return Error::success();
1004 }
1005
1006 unsigned CastOp;
1007 unsigned CastOp2;
1008 if (FRT->isIntOrIntVectorTy(16)) {
1009 CastOp = Instruction::ZExt;
1010 CastOp2 = Instruction::SExt;
1011 } else { // must be 64 bits
1012 assert(FRT->isIntOrIntVectorTy(64) &&
1013 "Currently only lowering 16, 32, or 64 bit ctpop to CountBits \
1014 is supported.");
1015 CastOp = Instruction::Trunc;
1016 CastOp2 = Instruction::Trunc;
1017 }
1018
1019 // It is correct to replace the ctpop with the dxil op and
1020 // remove all casts to i32
1021 bool NeedsCast = false;
1022 for (User *User : make_early_inc_range(CI->users())) {
1024 if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) &&
1025 I->getType() == RetTy) {
1026 I->replaceAllUsesWith(*OpCall);
1027 I->eraseFromParent();
1028 } else
1029 NeedsCast = true;
1030 }
1031
1032 // It is correct to replace a ctpop with the dxil op and
1033 // a cast from i32 to the return type of the ctpop
1034 // the cast is emitted here if there is a non-cast to i32
1035 // instr which uses the ctpop
1036 if (NeedsCast) {
1037 Value *Cast =
1038 IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast");
1039 CI->replaceAllUsesWith(Cast);
1040 }
1041
1042 CI->eraseFromParent();
1043 return Error::success();
1044 });
1045 }
1046
1047 [[nodiscard]] bool lowerLifetimeIntrinsic(Function &F) {
1048 IRBuilder<> &IRB = OpBuilder.getIRB();
1049 return replaceFunction(F, [&](CallInst *CI) -> Error {
1050 IRB.SetInsertPoint(CI);
1051 Value *Ptr = CI->getArgOperand(0);
1052 assert(Ptr->getType()->isPointerTy() &&
1053 "Expected operand of lifetime intrinsic to be a pointer");
1054
1055 auto ZeroOrUndef = [&](Type *Ty) {
1056 return MMDI.ValidatorVersion < VersionTuple(1, 6)
1058 : UndefValue::get(Ty);
1059 };
1060
1061 Value *Val = nullptr;
1062 if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) {
1063 if (GV->hasInitializer() || GV->isExternallyInitialized())
1064 return Error::success();
1065 Val = ZeroOrUndef(GV->getValueType());
1066 } else if (auto *AI = dyn_cast<AllocaInst>(Ptr))
1067 Val = ZeroOrUndef(AI->getAllocatedType());
1068
1069 assert(Val && "Expected operand of lifetime intrinsic to be a global "
1070 "variable or alloca instruction");
1071 IRB.CreateStore(Val, Ptr, false);
1072
1073 CI->eraseFromParent();
1074 return Error::success();
1075 });
1076 }
1077
1078 [[nodiscard]] bool lowerIsFPClass(Function &F) {
1079 IRBuilder<> &IRB = OpBuilder.getIRB();
1080 Type *RetTy = IRB.getInt1Ty();
1081
1082 return replaceFunction(F, [&](CallInst *CI) -> Error {
1083 IRB.SetInsertPoint(CI);
1085 Value *Fl = CI->getArgOperand(0);
1086 Args.push_back(Fl);
1087
1089 Value *T = CI->getArgOperand(1);
1090 auto *TCI = dyn_cast<ConstantInt>(T);
1091 switch (TCI->getZExtValue()) {
1092 case FPClassTest::fcInf:
1093 OpCode = dxil::OpCode::IsInf;
1094 break;
1095 case FPClassTest::fcNan:
1096 OpCode = dxil::OpCode::IsNaN;
1097 break;
1098 case FPClassTest::fcNormal:
1099 OpCode = dxil::OpCode::IsNormal;
1100 break;
1101 case FPClassTest::fcFinite:
1102 OpCode = dxil::OpCode::IsFinite;
1103 break;
1104 default:
1105 SmallString<128> Msg =
1106 formatv("Unsupported FPClassTest {0} for DXIL Op Lowering",
1107 TCI->getZExtValue());
1109 }
1110
1111 Expected<CallInst *> OpCall =
1112 OpBuilder.tryCreateOp(OpCode, Args, CI->getName(), RetTy);
1113 if (Error E = OpCall.takeError())
1114 return E;
1115
1116 CI->replaceAllUsesWith(*OpCall);
1117 CI->eraseFromParent();
1118 return Error::success();
1119 });
1120 }
1121
1122 bool lowerIntrinsics() {
1123 bool Updated = false;
1124 bool HasErrors = false;
1125
1126 for (Function &F : make_early_inc_range(M.functions())) {
1127 if (!F.isDeclaration())
1128 continue;
1129 Intrinsic::ID ID = F.getIntrinsicID();
1130 switch (ID) {
1131 // NOTE: Skip dx_resource_casthandle here. They are
1132 // resolved after this loop in cleanupHandleCasts.
1133 case Intrinsic::dx_resource_casthandle:
1134 // NOTE: llvm.dbg.value is supported as is in DXIL.
1135 case Intrinsic::dbg_value:
1137 if (F.use_empty())
1138 F.eraseFromParent();
1139 continue;
1140 default:
1141 if (F.use_empty())
1142 F.eraseFromParent();
1143 else {
1144 SmallString<128> Msg = formatv(
1145 "Unsupported intrinsic {0} for DXIL lowering", F.getName());
1146 M.getContext().emitError(Msg);
1147 HasErrors |= true;
1148 }
1149 break;
1150
1151#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \
1152 case Intrin: \
1153 HasErrors |= replaceFunctionWithOp( \
1154 F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__}); \
1155 break;
1156#include "DXILOperation.inc"
1157 case Intrinsic::dx_resource_handlefrombinding:
1158 HasErrors |= lowerHandleFromBinding(F);
1159 break;
1160 case Intrinsic::dx_resource_getpointer:
1161 HasErrors |= lowerGetPointer(F);
1162 break;
1163 case Intrinsic::dx_resource_nonuniformindex:
1164 assert(!CleanupNURI &&
1165 "overloaded llvm.dx.resource.nonuniformindex intrinsics?");
1166 CleanupNURI = &F;
1167 break;
1168 case Intrinsic::dx_resource_load_typedbuffer:
1169 HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
1170 break;
1171 case Intrinsic::dx_resource_load_level:
1172 HasErrors |= lowerTextureLoad(F);
1173 break;
1174 case Intrinsic::dx_resource_sample:
1175 HasErrors |= lowerSample(F, /*HasClamp=*/false);
1176 break;
1177 case Intrinsic::dx_resource_sample_clamp:
1178 HasErrors |= lowerSample(F, /*HasClamp=*/true);
1179 break;
1180 case Intrinsic::dx_resource_samplebias:
1181 HasErrors |= lowerSampleBias(F, /*HasClamp=*/false);
1182 break;
1183 case Intrinsic::dx_resource_samplebias_clamp:
1184 HasErrors |= lowerSampleBias(F, /*HasClamp=*/true);
1185 break;
1186 case Intrinsic::dx_resource_samplelevel:
1187 HasErrors |= lowerSampleLevel(F);
1188 break;
1189 case Intrinsic::dx_resource_samplegrad:
1190 HasErrors |= lowerSampleGrad(F, /*HasClamp=*/false);
1191 break;
1192 case Intrinsic::dx_resource_samplegrad_clamp:
1193 HasErrors |= lowerSampleGrad(F, /*HasClamp=*/true);
1194 break;
1195 case Intrinsic::dx_resource_store_typedbuffer:
1196 HasErrors |= lowerBufferStore(F, /*IsRaw=*/false);
1197 break;
1198 case Intrinsic::dx_resource_load_rawbuffer:
1199 HasErrors |= lowerRawBufferLoad(F);
1200 break;
1201 case Intrinsic::dx_resource_store_rawbuffer:
1202 HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
1203 break;
1204 case Intrinsic::dx_resource_load_cbufferrow_2:
1205 case Intrinsic::dx_resource_load_cbufferrow_4:
1206 case Intrinsic::dx_resource_load_cbufferrow_8:
1207 HasErrors |= lowerCBufferLoad(F);
1208 break;
1209 case Intrinsic::dx_resource_updatecounter:
1210 HasErrors |= lowerUpdateCounter(F);
1211 break;
1212 case Intrinsic::dx_resource_getdimensions_x:
1213 HasErrors |= lowerGetDimensionsX(F);
1214 break;
1215 case Intrinsic::ctpop:
1216 HasErrors |= lowerCtpopToCountBits(F);
1217 break;
1218 case Intrinsic::lifetime_start:
1219 case Intrinsic::lifetime_end:
1220 if (F.use_empty())
1221 F.eraseFromParent();
1222 else {
1223 if (MMDI.DXILVersion < VersionTuple(1, 6))
1224 HasErrors |= lowerLifetimeIntrinsic(F);
1225 else
1226 continue;
1227 }
1228 break;
1229 case Intrinsic::is_fpclass:
1230 HasErrors |= lowerIsFPClass(F);
1231 break;
1232 }
1233 Updated = true;
1234 }
1235 if (Updated && !HasErrors) {
1236 cleanupHandleCasts();
1237 cleanupNonUniformResourceIndexCalls();
1238 }
1239
1240 return Updated;
1241 }
1242};
1243} // namespace
1244
1246 DXILResourceMap &DRM = MAM.getResult<DXILResourceAnalysis>(M);
1247 DXILResourceTypeMap &DRTM = MAM.getResult<DXILResourceTypeAnalysis>(M);
1248 const ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M);
1249
1250 const bool MadeChanges = OpLowerer(M, DRM, DRTM, MMDI).lowerIntrinsics();
1251 if (!MadeChanges)
1252 return PreservedAnalyses::all();
1258 return PA;
1259}
1260
1261namespace {
1262class DXILOpLoweringLegacy : public ModulePass {
1263public:
1264 bool runOnModule(Module &M) override {
1265 DXILResourceMap &DRM =
1266 getAnalysis<DXILResourceWrapperPass>().getResourceMap();
1267 DXILResourceTypeMap &DRTM =
1268 getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
1269 const ModuleMetadataInfo MMDI =
1270 getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
1271
1272 return OpLowerer(M, DRM, DRTM, MMDI).lowerIntrinsics();
1273 }
1274 StringRef getPassName() const override { return "DXIL Op Lowering"; }
1275 DXILOpLoweringLegacy() : ModulePass(ID) {}
1276
1277 static char ID; // Pass identification.
1278 void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
1279 AU.addRequired<DXILResourceTypeWrapperPass>();
1280 AU.addRequired<DXILResourceWrapperPass>();
1281 AU.addRequired<DXILMetadataAnalysisWrapperPass>();
1282 AU.addPreserved<DXILResourceWrapperPass>();
1283 AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
1284 AU.addPreserved<ShaderFlagsAnalysisWrapper>();
1285 AU.addPreserved<RootSignatureAnalysisWrapper>();
1286 }
1287};
1288char DXILOpLoweringLegacy::ID = 0;
1289} // end anonymous namespace
1290
1291INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
1292 false, false)
1295INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false,
1296 false)
1297
1299 return new DXILOpLoweringLegacy();
1300}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DXIL Resource Implicit Binding
#define DEBUG_TYPE
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define T
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallVector class.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Diagnostic information for unsupported feature in backend.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2637
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition IRBuilder.h:1901
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition IRBuilder.h:571
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2625
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2148
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2684
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:586
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2028
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1928
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1941
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1444
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition IRBuilder.h:614
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:576
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:287
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:207
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
bool user_empty() const
Definition Value.h:389
TargetExtType * getHandleTy() const
LLVM_ABI std::pair< uint32_t, uint32_t > getAnnotateProps(Module &M, dxil::ResourceTypeInfo &RTI) const
const ResourceBinding & getBinding() const
dxil::ResourceClass getResourceClass() const
An efficient, type-erasing, non-owning reference to a callable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
Offsets
Offsets in bytes from the start of the input buffer.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
constexpr std::underlying_type_t< Enum > to_underlying(Enum E)
Returns underlying integer value of an enum.
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ModulePass * createDXILOpLoweringLegacyPass()
Pass to lowering LLVM intrinsic call to DXIL op function call.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N