/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Bug Summary

File:	build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
Warning:	line 763, column 36 Called C++ object pointer is null
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name Sparsification.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D MLIR_CUDA_CONVERSIONS_ENABLED=1 -D MLIR_ROCM_CONVERSIONS_ENABLED=1 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/mlir/lib/Dialect/SparseTensor/Transforms -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/mlir/lib/Dialect/SparseTensor/Transforms -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/mlir/include -I tools/mlir/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
1//===- Sparsification.cpp - Implementation of sparsification --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements converting sparse tensor types to actual sparse code.
10//
11//===----------------------------------------------------------------------===//

13#include "CodegenUtils.h"

15#include "mlir/Dialect/Affine/IR/AffineOps.h"
16#include "mlir/Dialect/Arith/IR/Arith.h"
17#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
18#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
19#include "mlir/Dialect/Func/IR/FuncOps.h"
20#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
21#include "mlir/Dialect/Linalg/IR/Linalg.h"
22#include "mlir/Dialect/Linalg/Utils/Utils.h"
23#include "mlir/Dialect/MemRef/IR/MemRef.h"
24#include "mlir/Dialect/SCF/IR/SCF.h"
25#include "mlir/Dialect/SCF/Transforms/Transforms.h"
26#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
27#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
28#include "mlir/Dialect/SparseTensor/Utils/Merger.h"
29#include "mlir/Dialect/Tensor/IR/Tensor.h"
30#include "mlir/Dialect/Vector/IR/VectorOps.h"
31#include "mlir/IR/Matchers.h"
32#include "mlir/IR/TensorEncoding.h"
33#include "llvm/ADT/SmallBitVector.h"

35using namespace mlir;
36using namespace mlir::sparse_tensor;

38//===----------------------------------------------------------------------===//
39// Declarations of data structures.
40//===----------------------------------------------------------------------===//

42namespace {

44// Iteration graph sorting.
45enum SortMask {
kSparseOnly = 0x0,
kIncludeDense = 0x1,
kIncludeUndef = 0x2,
kIncludeAll = 0x3
50};

52// Reduction kinds.
53enum Reduction { kNoReduc, kSum, kProduct, kAnd, kOr, kXor, kCustom };

55// Code generation.
56struct CodeGen {
CodeGen(SparsificationOptions o, unsigned numTensors, unsigned numLoops,
        OpOperand *op, unsigned nest, std::vector<unsigned> &ts)
    : options(o), loops(numLoops), sizes(numLoops), buffers(numTensors),
      pointers(numTensors, std::vector<Value>(numLoops)),
      indices(numTensors, std::vector<Value>(numLoops)),
      highs(numTensors, std::vector<Value>(numLoops)),
      pidxs(numTensors, std::vector<Value>(numLoops)),
      idxs(numTensors, std::vector<Value>(numLoops)), sparseOut(op),
      outerParNest(nest), topSort(ts) {}
/// Sparsification options.
SparsificationOptions options;
/// Universal dense indices and upper bounds (by index). The loops array
/// is updated with the value of the universal dense index in the current
/// loop. The sizes array is set once with the inferred dimension sizes.
std::vector<Value> loops;
std::vector<Value> sizes;
/// Buffers for storing dense and sparse numerical values (by tensor).
/// This array is set once during bufferization of all tensors.
std::vector<Value> buffers;
/// Sparse storage schemes (1-D): pointers and indices (by tensor and index).
/// This array is set once during bufferization of all sparse tensors.
std::vector<std::vector<Value>> pointers;
std::vector<std::vector<Value>> indices;
/// Sparse iteration information (by tensor and index). These arrays
/// are updated to remain current within the current loop.
std::vector<std::vector<Value>> highs;
std::vector<std::vector<Value>> pidxs;
std::vector<std::vector<Value>> idxs;
/// Current reduction, updated during code generation. When indices of a
/// reduction are exhausted, all inner loops can use a scalarized reduction.
unsigned redExp = -1u;
Value redVal;
Reduction redKind = kNoReduc;
unsigned redCustom = -1u;
// Sparse tensor as output. Implemented either through direct injective
// insertion in lexicographic index order or through access pattern expansion
// in the innermost loop nest (`expValues` through `expCount`).
OpOperand *sparseOut;
unsigned outerParNest;
Value expValues;
Value expFilled;
Value expAdded;
Value expCount;
// Current vector length and mask.
unsigned curVecLength = 1;
Value curVecMask;
// Topsort (reference should remain in scope).
std::vector<unsigned> &topSort;
105};

107} // namespace

109//===----------------------------------------------------------------------===//
110// Sparse compiler analysis methods.
111//===----------------------------------------------------------------------===//

113/// Helper method to construct a permuted dimension ordering
114/// that adheres to the given topological sort.
115static AffineMap permute(MLIRContext *context, AffineMap m,
                       std::vector<unsigned> &topSort) {
unsigned sz = topSort.size();
assert(m.getNumResults() == sz && "TopoSort/AffineMap size mismatch")(static_cast <bool> (m.getNumResults() == sz &&
 "TopoSort/AffineMap size mismatch") ? void (0) : __assert_fail
 ("m.getNumResults() == sz && \"TopoSort/AffineMap size mismatch\""
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 118, __extension__ __PRETTY_FUNCTION__));
// Construct the inverse of `m`; to avoid the asymptotic complexity
// of calling `m.getPermutedPosition` repeatedly.
SmallVector<unsigned, 4> inv(sz);
for (unsigned i = 0; i < sz; i++)
  inv[i] = m.getDimPosition(i);
// Construct the permutation.
SmallVector<unsigned, 4> perm(sz);
for (unsigned i = 0; i < sz; i++)
  perm[i] = inv[topSort[i]];
return AffineMap::getPermutationMap(perm, context);
129}

131/// Helper method to obtain the dimension level format from the encoding.
132//
133//  TODO: note that we store, but currently completely *ignore* the properties
134//
135static DimLevelFormat toDimLevelFormat(const SparseTensorEncodingAttr &enc,
                                     unsigned d) {
if (enc) {
  switch (enc.getDimLevelType()[d]) {
  case SparseTensorEncodingAttr::DimLevelType::Dense:
    return DimLevelFormat(DimLvlType::kDense);
  case SparseTensorEncodingAttr::DimLevelType::Compressed:
    return DimLevelFormat(DimLvlType::kCompressed);
  case SparseTensorEncodingAttr::DimLevelType::CompressedNu:
    return DimLevelFormat(DimLvlType::kCompressed, true, false);
  case SparseTensorEncodingAttr::DimLevelType::CompressedNo:
    return DimLevelFormat(DimLvlType::kCompressed, false, true);
  case SparseTensorEncodingAttr::DimLevelType::CompressedNuNo:
    return DimLevelFormat(DimLvlType::kCompressed, false, false);
  case SparseTensorEncodingAttr::DimLevelType::Singleton:
    return DimLevelFormat(DimLvlType::kSingleton);
  case SparseTensorEncodingAttr::DimLevelType::SingletonNu:
    return DimLevelFormat(DimLvlType::kSingleton, true, false);
  case SparseTensorEncodingAttr::DimLevelType::SingletonNo:
    return DimLevelFormat(DimLvlType::kSingleton, false, true);
  case SparseTensorEncodingAttr::DimLevelType::SingletonNuNo:
    return DimLevelFormat(DimLvlType::kSingleton, false, false);
  }
}
return DimLevelFormat(DimLvlType::kDense);
160}

162/// Helper method to inspect affine expressions. Rejects cases where the
163/// same index is used more than once. Also rejects compound affine
164/// expressions in sparse dimensions.
165static bool findAffine(Merger &merger, unsigned tensor, AffineExpr a,
                     DimLevelFormat dim) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
  unsigned idx = a.cast<AffineDimExpr>().getPosition();
  if (!merger.isDimLevelType(tensor, idx, DimLvlType::kUndef))
    return false; // used more than once
  merger.setDimLevelFormat(tensor, idx, dim);
  return true;
}
case AffineExprKind::Add:
case AffineExprKind::Mul: {
  if (dim.levelType != DimLvlType::kDense)
    return false; // compound only in dense dim
  auto binOp = a.cast<AffineBinaryOpExpr>();
  return findAffine(merger, tensor, binOp.getLHS(), dim) &&
         findAffine(merger, tensor, binOp.getRHS(), dim);
}
case AffineExprKind::Constant:
  return dim.levelType == DimLvlType::kDense; // const only in dense dim
default:
  return false;
}
188}

190/// Helper method to inspect sparse encodings in the tensor types.
191/// Fills the per-dimension sparsity information for all tensors.
192/// Returns true if the sparse annotations and affine subscript
193/// expressions of all tensors are admissible. Returns false if
194/// no annotations are found or inadmissible constructs occur.
195static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {
bool annotated = false;
for (OpOperand *t : op.getInputAndOutputOperands()) {
  auto map = op.getMatchingIndexingMap(t);
  auto enc = getSparseTensorEncoding(t->get().getType());
  if (enc)
    annotated = true;
  assert(map.getNumResults() == op.getRank(t))(static_cast <bool> (map.getNumResults() == op.getRank(
t)) ? void (0) : __assert_fail ("map.getNumResults() == op.getRank(t)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 202, __extension__ __PRETTY_FUNCTION__));
  for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
    unsigned tensor = t->getOperandNumber();
    AffineExpr a = map.getResult(toOrigDim(enc, d));
    if (!findAffine(merger, tensor, a, toDimLevelFormat(enc, d)))
      return false; // inadmissible affine expression
  }
}
return annotated;
211}

213/// A helper to compute a topological sort. O(n^2) time complexity
214/// as we use adj matrix for the graph.
215/// The sorted result will put the first Reduction iterator to the
216/// latest possible index.
217static bool topSortOptimal(unsigned n, ArrayRef<StringRef> iteratorTypes,
                         std::vector<unsigned> &topSort,
                         std::vector<unsigned> &inDegree,
                         std::vector<std::vector<bool>> &adjM) {
std::vector<unsigned> redIt; // reduce iterator with 0 degree
std::vector<unsigned> parIt; // parallel iterator with 0 degree
for (unsigned i = 0; i < n; i++) {
  if (inDegree[i] == 0) {
    if (linalg::isReductionIterator(iteratorTypes[i]))
      redIt.push_back(i);
    else
      parIt.push_back(i);
  }
}

while (!redIt.empty() || !parIt.empty()) {
  // We always choose parallel iterator if there is any.
  auto &it = !parIt.empty() ? parIt : redIt;
  auto src = it.back();
  topSort.push_back(src);
  it.pop_back();
  // Update in-degree, and push 0-degree node into worklist.
  for (unsigned dst = 0; dst < n; dst++)
    if (adjM[src][dst] && --inDegree[dst] == 0) {
      if (linalg::isReductionIterator(iteratorTypes[dst]))
        redIt.push_back(dst);
      else
        parIt.push_back(dst);
    }
}
return topSort.size() == n;
248}

250/// Helper method to add all constraints from the indices in one affine
251/// expression before all indices in the other affine expression. For
252/// example i0+i1 < i2+i3+1 yields i0<i2, i0<i3, i1<i2, and i1<i3.
253static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,
                             std::vector<unsigned> &inDegree, AffineExpr a,
                             AffineExpr b, unsigned fidx) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
  unsigned idx = a.cast<AffineDimExpr>().getPosition();
  if (b)
    addAffineOrderings(adjM, inDegree, b, AffineExpr(), idx);
  else if (!adjM[fidx][idx]) {
    adjM[fidx][idx] = true;
    inDegree[idx]++;
  }
  break;
}
case AffineExprKind::Add:
case AffineExprKind::Mul: {
  auto binOp = a.cast<AffineBinaryOpExpr>();
  addAffineOrderings(adjM, inDegree, binOp.getLHS(), b, fidx);
  addAffineOrderings(adjM, inDegree, binOp.getRHS(), b, fidx);
  break;
}
default:
  break;
}
277}

279/// Computes a topologically sorted iteration graph for the linalg operation.
280/// Ensures all tensors are visited in natural index order. This is essential
281/// for sparse storage formats since these only support access along fixed
282/// dimensions. Even for dense storage formats, however, the natural index
283/// order yields innermost unit-stride access with better spatial locality.
284static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
                                std::vector<unsigned> &topSort, unsigned mask,
                                OpOperand *skip = nullptr) {
// Set up an n x n from/to adjacency matrix of the iteration graph
// for the implicit loop indices i_0 .. i_n-1.
unsigned n = op.getNumLoops();
std::vector<std::vector<bool>> adjM(n, std::vector<bool>(n, false));
std::vector<unsigned> inDegree(n, 0); // in-degree of each node.
auto iteratorTypes = op.getIteratorTypesArray();
// Iterate over the indexing maps of every tensor in the tensor expression.
for (OpOperand *t : op.getInputAndOutputOperands()) {
  // Skip tensor during cycle resolution.
  if (t == skip)
    continue;
  // Get map and encoding.
  auto map = op.getMatchingIndexingMap(t);
  auto enc = getSparseTensorEncoding(t->get().getType());
  assert(map.getNumDims() == n)(static_cast <bool> (map.getNumDims() == n) ? void (0) :
 __assert_fail ("map.getNumDims() == n", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 301, __extension__ __PRETTY_FUNCTION__));
  // Skip dense tensor constraints when not requested.
  if (!(mask & SortMask::kIncludeDense) && !enc)
    continue;
  // Each tensor expression and optional dimension ordering (row-major
  // by default) puts an ordering constraint on the loop indices. For
  // example, the tensor expresion A_ijk forces the ordering i < j < k
  // on the loop indices if no explicit dimension ordering is given.
  for (unsigned d = 1, rank = map.getNumResults(); d < rank; d++) {
    AffineExpr f = map.getResult(toOrigDim(enc, d - 1));
    AffineExpr t = map.getResult(toOrigDim(enc, d));
    addAffineOrderings(adjM, inDegree, f, t, 0);
  }
  // Push unrelated loops into sparse iteration space, so these
  // will be skipped more often.
  if (mask & SortMask::kIncludeUndef) {
    unsigned tensor = t->getOperandNumber();
    for (unsigned i = 0; i < n; i++)
      if (merger.isDimLevelType(tensor, i, DimLvlType::kCompressed) ||
          merger.isDimLevelType(tensor, i, DimLvlType::kSingleton)) {
        for (unsigned j = 0; j < n; j++)
          if (merger.isDimLevelType(tensor, j, DimLvlType::kUndef)) {
            adjM[i][j] = true;
            inDegree[j]++;
          }
      } else {
        assert(merger.isDimLevelType(tensor, i, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(tensor, i, DimLvlType
::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef
)) ? void (0) : __assert_fail ("merger.isDimLevelType(tensor, i, DimLvlType::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 328, __extension__ __PRETTY_FUNCTION__))
               merger.isDimLevelType(tensor, i, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(tensor, i, DimLvlType
::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef
)) ? void (0) : __assert_fail ("merger.isDimLevelType(tensor, i, DimLvlType::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 328, __extension__ __PRETTY_FUNCTION__));
      }
  }
}
// Topologically sort the iteration graph to determine loop order.
// Report failure for a cyclic iteration graph.
topSort.clear();
topSort.reserve(n);
return topSortOptimal(n, iteratorTypes, topSort, inDegree, adjM);
337}

339/// Returns true if tensor materializes uninitialized into the computation.
340static bool isMaterializing(Value val) {
return val.getDefiningOp<linalg::InitTensorOp>() ||
       val.getDefiningOp<bufferization::AllocTensorOp>();
343}

345/// Returns true when the tensor expression is admissible for codegen.
346/// Since all sparse input tensors are admissible, we just need to check
347/// whether the out tensor in the tensor expression codegen is admissible.
348/// Sets `sparseOut` to the tensor and `outerParNest` to the outer injective
349/// nesting depth when a "truly dynamic" sparse tensor output occurs.
350static bool isAdmissibleTensorExp(Merger &merger, linalg::GenericOp op,
                                std::vector<unsigned> &topSort, unsigned exp,
                                OpOperand **sparseOut,
                                unsigned &outerParNest) {
OpOperand *lhs = op.getOutputOperand(0);
unsigned tensor = lhs->getOperandNumber();
auto enc = getSparseTensorEncoding(lhs->get().getType());
// An non-annotated output tensor is assumed dense, and becomes a random
// access n-dim memref. Admissible since insertions cannot occur.
if (!enc)
  return true;
// An all-dense annotated "sparse" output tensor becomes a linearized random
// access 1-dim memref. Also admissible since insertions cannot occur.
bool allDense = true;
auto iteratorTypes = op.getIteratorTypesArray();
unsigned numLoops = iteratorTypes.size();
for (unsigned i = 0; i < numLoops; i++)
  if (merger.isDimLevelType(tensor, i, DimLvlType::kCompressed) ||
      merger.isDimLevelType(tensor, i, DimLvlType::kSingleton)) {
    allDense = false;
    break;
  } else {
    assert(merger.isDimLevelType(tensor, i, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(tensor, i, DimLvlType
::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef
)) ? void (0) : __assert_fail ("merger.isDimLevelType(tensor, i, DimLvlType::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 373, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(tensor, i, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(tensor, i, DimLvlType
::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef
)) ? void (0) : __assert_fail ("merger.isDimLevelType(tensor, i, DimLvlType::kDense) || merger.isDimLevelType(tensor, i, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 373, __extension__ __PRETTY_FUNCTION__));
  }
if (allDense)
  return true;
// A tensor expression with a sparse output tensor that changes its values
// but not its nonzero structure, an operation called "simply dynamic" in
// [Bik96,Ch9], is also admissible without special codegen.
if (merger.isSingleCondition(tensor, exp))
  return true;
// Accept "truly dynamic" if the output tensor materializes uninitialized
// into the computation and insertions occur in lexicographic index order.
if (isMaterializing(lhs->get())) {
  unsigned nest = 0;
  for (unsigned i = 0; i < numLoops; i++) {
    if (linalg::isReductionIterator(iteratorTypes[topSort[i]]))
      break; // terminate at first reduction
    nest++;
  }
  // Determine admissible dynamic insertion situations:
  // (1) fully injective, since there are no reductions,
  // (2) admissible 1-d expansion in innermost dimension.
  if (nest >= op.getRank(lhs) - 1) {
    *sparseOut = lhs;
    outerParNest = nest;
    return true;
  }
}
return false;
401}

403//===----------------------------------------------------------------------===//
404// Sparse compiler synthesis methods (reductions).
405//===----------------------------------------------------------------------===//

407/// Maps reduction kind to vector::CombiningKind.
408static vector::CombiningKind getCombiningKind(Reduction kind) {
switch (kind) {
case kNoReduc:
case kCustom:
  break;
case kSum:
  return vector::CombiningKind::ADD;
case kProduct:
  return vector::CombiningKind::MUL;
case kAnd:
  return vector::CombiningKind::AND;
case kOr:
  return vector::CombiningKind::OR;
case kXor:
  return vector::CombiningKind::XOR;
}
llvm_unreachable("unknown reduction kind")::llvm::llvm_unreachable_internal("unknown reduction kind", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 424);
425}

427/// Maps operation to reduction.
428static Reduction getReduction(Kind kind) {
switch (kind) {
case Kind::kAddF:
case Kind::kAddC:
case Kind::kAddI:
case Kind::kSubF:
case Kind::kSubC:
case Kind::kSubI:
  return kSum;
case Kind::kMulF:
case Kind::kMulC:
case Kind::kMulI:
  return kProduct;
case Kind::kAndI:
  return kAnd;
case Kind::kOrI:
  return kOr;
case Kind::kXorI:
  return kXor;
case Kind::kReduce:
  return kCustom;
default:
  llvm_unreachable("unexpected reduction operator")::llvm::llvm_unreachable_internal("unexpected reduction operator"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 450);
}
452}

454/// Generates an initial value for a vector reduction, following the scheme
455/// given in Chapter 5 of "The Software Vectorization Handbook", where the
456/// initial scalar value is correctly embedded in the vector reduction value,
457/// and a straightforward horizontal reduction will complete the operation.
458static Value genVectorReducInit(CodeGen &codegen, OpBuilder &builder,
                              Location loc, VectorType vtp) {
Value r = codegen.redVal;
switch (codegen.redKind) {
case kNoReduc:
case kCustom:
  break;
case kSum:
case kXor:
  // Initialize reduction vector to: | 0 | .. | 0 | r |
  return builder.create<vector::InsertElementOp>(
      loc, r, constantZero(builder, loc, vtp),
      constantIndex(builder, loc, 0));
case kProduct:
  // Initialize reduction vector to: | 1 | .. | 1 | r |
  return builder.create<vector::InsertElementOp>(
      loc, r, constantOne(builder, loc, vtp), constantIndex(builder, loc, 0));
case kAnd:
case kOr:
  // Initialize reduction vector to: | r | .. | r | r |
  return builder.create<vector::BroadcastOp>(loc, vtp, r);
}
llvm_unreachable("unknown reduction kind")::llvm::llvm_unreachable_internal("unknown reduction kind", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 480);
481}

483/// Generates final value for a vector reduction.
484static Value genVectorReducEnd(CodeGen &codegen, OpBuilder &builder,
                             Location loc, VectorType vtp) {
vector::CombiningKind kind = getCombiningKind(codegen.redKind);
return builder.create<vector::ReductionOp>(loc, kind, codegen.redVal);
488}

490/// Updates scalarized reduction value.
491static void updateReduc(Merger &merger, CodeGen &codegen, Value reduc) {
assert(codegen.redKind != kNoReduc)(static_cast <bool> (codegen.redKind != kNoReduc) ? void
 (0) : __assert_fail ("codegen.redKind != kNoReduc", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 492, __extension__ __PRETTY_FUNCTION__));
codegen.redVal = merger.exp(codegen.redExp).val = reduc;
494}

496/// Extracts identity from custom reduce.
497static Value getCustomRedId(Operation *op) {
return dyn_cast<sparse_tensor::ReduceOp>(op).getIdentity();
499}

501//===----------------------------------------------------------------------===//
502// Sparse compiler synthesis methods (statements and expressions).
503//===----------------------------------------------------------------------===//

505/// Generates buffer for the output tensor. Note that all sparse kernels
506/// assume that when all elements are written to (viz. x(i) = y(i) * z(i)),
507/// the output buffer is already initialized to all zeroes and only nonzeroes
508/// values are computed and written out. For updates (viz. x(i) += y(i) * z(i)),
509/// only nonzeroes values are used for the updates and no assumption on the
510/// original contents of the output buffer is necessary.
511static Value genOutputBuffer(CodeGen &codegen, OpBuilder &builder,
                           linalg::GenericOp op, MemRefType denseTp,
                           ArrayRef<Value> args) {
Location loc = op.getLoc();
OpOperand *lhs = op.getOutputOperand(0);
Value tensor = lhs->get();
bool isInit = op.isInitTensor(lhs);
// An output tensor can simply materialize from the buffer of the tensor that
// appears in the outs() clause. For updates, this has the advantage that only
// the nonzero value are involved in the computation, keeping the operation
// O(nnz). In all other cases, we are forced to zero out the buffer to enforce
// the assumption above, which may negatively impact running complexity
// (viz. O(n^2 + nnz) vs. O(nnz) for matrices).
// TODO: use better analysis to avoid zeroing out the buffer?
Value init = builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
if (!isInit) {
  Value zero = constantZero(builder, loc, denseTp.getElementType());
  builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{init});
}
return init;
531}

533/// Local bufferization of all dense and sparse data structures.
534static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                     linalg::GenericOp op) {
Location loc = op.getLoc();
assert(op.getNumInputsAndOutputs() == op.getNumInputs() + 1)(static_cast <bool> (op.getNumInputsAndOutputs() == op.
getNumInputs() + 1) ? void (0) : __assert_fail ("op.getNumInputsAndOutputs() == op.getNumInputs() + 1"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 537, __extension__ __PRETTY_FUNCTION__));
// For every tensor, find lower and upper bound on dimensions, set the
// same bounds on loop indices, and obtain dense or sparse buffer(s).
auto dynShape = {ShapedType::kDynamicSize};
SmallVector<Value, 4> args;
for (OpOperand *t : op.getInputAndOutputOperands()) {
  unsigned tensor = t->getOperandNumber();
  auto shape = op.getShape(t);
  auto map = op.getMatchingIndexingMap(t);
  auto enc = getSparseTensorEncoding(t->get().getType());
  // Scan all dimensions of current tensor.
  args.clear();
  for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
    AffineExpr a = map.getResult(toOrigDim(enc, d));
    if (a.getKind() != AffineExprKind::DimId)
      continue; // compound
    unsigned idx = a.cast<AffineDimExpr>().getPosition();
    // Handle the different storage schemes.
    if (merger.isDimLevelType(tensor, idx, DimLvlType::kCompressed)) {
      // Compressed dimension, fetch pointer and indices.
      auto ptrTp =
          MemRefType::get(dynShape, getPointerOverheadType(builder, enc));
      auto indTp =
          MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
      auto dim = builder.getIndexAttr(d);
      codegen.pointers[tensor][idx] =
          builder.create<ToPointersOp>(loc, ptrTp, t->get(), dim);
      codegen.indices[tensor][idx] =
          builder.create<ToIndicesOp>(loc, indTp, t->get(), dim);
    } else if (merger.isDimLevelType(tensor, idx, DimLvlType::kSingleton)) {
      // Singleton dimension, fetch indices.
      auto indTp =
          MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
      auto dim = builder.getIndexAttr(d);
      codegen.indices[tensor][idx] =
          builder.create<ToIndicesOp>(loc, indTp, t->get(), dim);
    } else {
      // Dense dimension, nothing to fetch.
      assert(merger.isDimLevelType(tensor, idx, DimLvlType::kDense))(static_cast <bool> (merger.isDimLevelType(tensor, idx,
 DimLvlType::kDense)) ? void (0) : __assert_fail ("merger.isDimLevelType(tensor, idx, DimLvlType::kDense)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 575, __extension__ __PRETTY_FUNCTION__));
    }
    // Find upper bound in current dimension.
    unsigned p = toOrigDim(enc, d);
    Value up = linalg::createOrFoldDimOp(builder, loc, t->get(), p);
    if (ShapedType::isDynamic(shape[p]))
      args.push_back(up);
    assert(codegen.highs[tensor][idx] == nullptr)(static_cast <bool> (codegen.highs[tensor][idx] == nullptr
) ? void (0) : __assert_fail ("codegen.highs[tensor][idx] == nullptr"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 582, __extension__ __PRETTY_FUNCTION__));
    codegen.sizes[idx] = codegen.highs[tensor][idx] = up;
  }
  // Perform the required bufferization. Dense inputs materialize
  // from the input tensors. Dense outputs need special handling.
  // Sparse inputs use sparse primitives to obtain the values.
  Type elementType = getElementTypeOrSelf(t->get().getType());
  if (!enc) {
    // Non-annotated dense tensors.
    auto denseTp = MemRefType::get(shape, elementType);
    if (tensor < op.getNumInputs())
      codegen.buffers[tensor] =
          builder.create<bufferization::ToMemrefOp>(loc, denseTp, t->get());
    else
      codegen.buffers[tensor] =
          genOutputBuffer(codegen, builder, op, denseTp, args);
  } else if (t != codegen.sparseOut) {
    // Annotated sparse tensors (not involved in output).
    auto sparseTp = MemRefType::get(dynShape, elementType);
    codegen.buffers[tensor] =
        builder.create<ToValuesOp>(loc, sparseTp, t->get());
  }
}
605}

607/// Constructs vector type.
608static VectorType vectorType(CodeGen &codegen, Type etp) {
unsigned numScalableDims = codegen.options.enableVLAVectorization;
return VectorType::get(codegen.curVecLength, etp, numScalableDims);
611}

613/// Constructs vector type from pointer.
614static VectorType vectorType(CodeGen &codegen, Value ptr) {
return vectorType(codegen, ptr.getType().cast<MemRefType>().getElementType());
616}

618/// Constructs vector iteration mask.
619static Value genVectorMask(CodeGen &codegen, OpBuilder &builder, Value iv,
                         Value lo, Value hi, Value step) {
Location loc = iv.getLoc();
VectorType mtp = vectorType(codegen, builder.getI1Type());
// Special case if the vector length evenly divides the trip count (for
// example, "for i = 0, 128, 16"). A constant all-true mask is generated
// so that all subsequent masked memory operations are immediately folded
// into unconditional memory operations.
IntegerAttr loInt, hiInt, stepInt;
if (matchPattern(lo, m_Constant(&loInt)) &&
    matchPattern(hi, m_Constant(&hiInt)) &&
    matchPattern(step, m_Constant(&stepInt))) {
  if (((hiInt.getInt() - loInt.getInt()) % stepInt.getInt()) == 0)
    return builder.create<vector::BroadcastOp>(
        loc, mtp, constantI1(builder, loc, true));
}
// Otherwise, generate a vector mask that avoids overrunning the upperbound
// during vector execution. Here we rely on subsequent loop optimizations to
// avoid executing the mask in all iterations, for example, by splitting the
// loop into an unconditional vector loop and a scalar cleanup loop.
auto minMap = AffineMap::get(
    /*dimCount=*/2, /*symbolCount=*/1,
    {builder.getAffineSymbolExpr(0),
     builder.getAffineDimExpr(0) - builder.getAffineDimExpr(1)},
    builder.getContext());
Value end =
    builder.createOrFold<AffineMinOp>(loc, minMap, ValueRange{hi, iv, step});
return builder.create<vector::CreateMaskOp>(loc, mtp, end);
647}

649/// Generates a vectorized load lhs = a[ind[lo:hi]] or lhs = a[lo:hi].
650static Value genVectorLoad(CodeGen &codegen, OpBuilder &builder, Value ptr,
                         ArrayRef<Value> args) {
Location loc = ptr.getLoc();
VectorType vtp = vectorType(codegen, ptr);
Value pass = constantZero(builder, loc, vtp);
if (args.back().getType().isa<VectorType>()) {
  SmallVector<Value, 4> scalarArgs(args.begin(), args.end());
  Value indexVec = args.back();
  scalarArgs.back() = constantIndex(builder, loc, 0);
  return builder.create<vector::GatherOp>(loc, vtp, ptr, scalarArgs, indexVec,
                                          codegen.curVecMask, pass);
}
return builder.create<vector::MaskedLoadOp>(loc, vtp, ptr, args,
                                            codegen.curVecMask, pass);
664}

666/// Generates a vectorized store a[ind[lo:hi]] = rhs or a[lo:hi] = rhs.
667static void genVectorStore(CodeGen &codegen, OpBuilder &builder, Value rhs,
                         Value ptr, ArrayRef<Value> args) {
Location loc = ptr.getLoc();
if (args.back().getType().isa<VectorType>()) {
  SmallVector<Value, 4> scalarArgs(args.begin(), args.end());
  Value indexVec = args.back();
  scalarArgs.back() = constantIndex(builder, loc, 0);
  builder.create<vector::ScatterOp>(loc, ptr, scalarArgs, indexVec,
                                    codegen.curVecMask, rhs);
  return;
}
builder.create<vector::MaskedStoreOp>(loc, ptr, args, codegen.curVecMask,
                                      rhs);
680}

682/// Generates a vectorized invariant. Here we rely on subsequent loop
683/// optimizations to hoist the invariant broadcast out of the vector loop.
684static Value genVectorInvariantValue(CodeGen &codegen, OpBuilder &builder,
                                   Value val) {
VectorType vtp = vectorType(codegen, val.getType());
return builder.create<vector::BroadcastOp>(val.getLoc(), vtp, val);
688}

690/// Generates an affine expression.
691//
692// TODO: generalize for sparse tensor subscripts
693//
694static Value genAffine(CodeGen &codegen, OpBuilder &builder, AffineExpr a,
                     Location loc) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
  unsigned idx = a.cast<AffineDimExpr>().getPosition();
  return codegen.loops[idx]; // universal dense index
}
case AffineExprKind::Add: {
  auto binOp = a.cast<AffineBinaryOpExpr>();
  return builder.create<arith::AddIOp>(
      loc, genAffine(codegen, builder, binOp.getLHS(), loc),
      genAffine(codegen, builder, binOp.getRHS(), loc));
}
case AffineExprKind::Mul: {
  auto binOp = a.cast<AffineBinaryOpExpr>();
  return builder.create<arith::MulIOp>(
      loc, genAffine(codegen, builder, binOp.getLHS(), loc),
      genAffine(codegen, builder, binOp.getRHS(), loc));
}
case AffineExprKind::Constant: {
  int64_t c = a.cast<AffineConstantExpr>().getValue();
  return constantIndex(builder, loc, c);
}
default:
  llvm_unreachable("unexpected affine subscript")::llvm::llvm_unreachable_internal("unexpected affine subscript"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 718);
}
720}

722/// Generates index for load/store on sparse tensor.
723static Value genIndex(CodeGen &codegen, linalg::GenericOp op, OpOperand *t) {
auto map = op.getMatchingIndexingMap(t);
auto enc = getSparseTensorEncoding(t->get().getType());
AffineExpr a = map.getResult(toOrigDim(enc, map.getNumResults() - 1));
assert(a.getKind() == AffineExprKind::DimId)(static_cast <bool> (a.getKind() == AffineExprKind::DimId
) ? void (0) : __assert_fail ("a.getKind() == AffineExprKind::DimId"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 727, __extension__ __PRETTY_FUNCTION__));
unsigned idx = a.cast<AffineDimExpr>().getPosition();
return codegen.loops[idx];
730}

732/// Generates subscript for load/store on a dense or sparse tensor.
733static Value genSubscript(CodeGen &codegen, OpBuilder &builder,
                        linalg::GenericOp op, OpOperand *t,
                        SmallVector<Value, 4> &args) {
unsigned tensor = t->getOperandNumber();
auto map = op.getMatchingIndexingMap(t);
auto enc = getSparseTensorEncoding(t->get().getType());
unsigned rank = map.getNumResults();
if (enc) {
  // Note that currently, all sparse subscripts are simple.
  // TODO: accept affine too?
  AffineExpr a = map.getResult(toOrigDim(enc, rank - 1));
  assert(a.getKind() == AffineExprKind::DimId)(static_cast <bool> (a.getKind() == AffineExprKind::DimId
) ? void (0) : __assert_fail ("a.getKind() == AffineExprKind::DimId"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 744, __extension__ __PRETTY_FUNCTION__));
  unsigned idx = a.cast<AffineDimExpr>().getPosition();
  assert(codegen.pidxs[tensor][idx] != nullptr)(static_cast <bool> (codegen.pidxs[tensor][idx] != nullptr
) ? void (0) : __assert_fail ("codegen.pidxs[tensor][idx] != nullptr"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 746, __extension__ __PRETTY_FUNCTION__));
  args.push_back(codegen.pidxs[tensor][idx]); // position index
} else {
  for (unsigned d = 0; d < rank; d++) {
    AffineExpr a = map.getResult(d);
    args.push_back(genAffine(codegen, builder, a, op.getLoc()));
  }
}
return codegen.buffers[tensor];
755}

757/// Generates insertion code to implement dynamic tensor load.
758static Value genInsertionLoad(CodeGen &codegen, OpBuilder &builder,
                            linalg::GenericOp op, OpOperand *t) {
Location loc = op.getLoc();
// Direct lexicographic index order, tensor loads as zero.
if (!codegen.expValues) {
30
←
Taking true branch→
  Type tp = getElementTypeOrSelf(t->get().getType());
31
←
Called C++ object pointer is null
  return constantZero(builder, loc, tp);
}
// Load from expanded access pattern.
Value index = genIndex(codegen, op, t);
return builder.create<memref::LoadOp>(loc, codegen.expValues, index);
769}

771/// Generates insertion code to implement dynamic tensor load for reduction.
772static Value genInsertionLoadReduce(Merger &merger, CodeGen &codegen,
                                  OpBuilder &builder, linalg::GenericOp op,
                                  OpOperand *t) {
Location loc = op.getLoc();
Value identity = getCustomRedId(merger.exp(codegen.redCustom).op);
// Direct lexicographic index order, tensor loads as identity.
if (!codegen.expValues) {
  return identity;
}
// Load from expanded access pattern if filled, identity otherwise.
Value index = genIndex(codegen, op, t);
Value isFilled =
    builder.create<memref::LoadOp>(loc, codegen.expFilled, index);
Value valAtIndex =
    builder.create<memref::LoadOp>(loc, codegen.expValues, index);
return builder.create<arith::SelectOp>(loc, isFilled, valAtIndex, identity);
788}

790/// Generates insertion code to implement dynamic tensor store.
791static void genInsertionStore(CodeGen &codegen, OpBuilder &builder,
                            linalg::GenericOp op, OpOperand *t, Value rhs) {
Location loc = op.getLoc();
// Direct insertion in lexicographic index order.
if (!codegen.expValues) {
  unsigned rank = op.getRank(t);
  SmallVector<Value, 4> indices;
  for (unsigned i = 0; i < rank; i++) {
    assert(codegen.loops[codegen.topSort[i]])(static_cast <bool> (codegen.loops[codegen.topSort[i]])
 ? void (0) : __assert_fail ("codegen.loops[codegen.topSort[i]]"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 799, __extension__ __PRETTY_FUNCTION__));
    indices.push_back(codegen.loops[codegen.topSort[i]]);
  }
  builder.create<InsertOp>(loc, rhs, t->get(), indices);
  return;
}
// Generates insertion code along expanded access pattern.
//   if (!expFilled[i]) then
//     expFilled[i] = true
//     expAdded[inserts++] = i
//   endif
//   values[i] = rhs
Value index = genIndex(codegen, op, t);
Value fval = constantI1(builder, loc, false);
Value tval = constantI1(builder, loc, true);
// If statement.
Value filled = builder.create<memref::LoadOp>(loc, codegen.expFilled, index);
Value cond = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
                                           filled, fval);
scf::IfOp ifOp = builder.create<scf::IfOp>(loc, builder.getIndexType(), cond,
                                           /*else=*/true);
// True branch.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
builder.create<memref::StoreOp>(loc, tval, codegen.expFilled, index);
builder.create<memref::StoreOp>(loc, index, codegen.expAdded,
                                codegen.expCount);
Value one = constantIndex(builder, loc, 1);
Value add = builder.create<arith::AddIOp>(loc, codegen.expCount, one);
builder.create<scf::YieldOp>(loc, add);
// False branch.
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
builder.create<scf::YieldOp>(loc, codegen.expCount);
builder.setInsertionPointAfter(ifOp);
// Value assignment.
codegen.expCount = ifOp.getResult(0);
builder.create<memref::StoreOp>(loc, rhs, codegen.expValues, index);
835}

837/// Generates a load on a dense or sparse tensor.
838static Value genTensorLoad(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                         linalg::GenericOp op, unsigned exp) {
// Test if the load was hoisted to a higher loop nest.
Value val = merger.exp(exp).val;
if (val) {
22
←
Assuming the condition is false→
23
←
Taking false branch→
  if (codegen.curVecLength > 1 && !val.getType().isa<VectorType>())
    return genVectorInvariantValue(codegen, builder, val);
  return val;
}
// Load during insertion.
OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor];
24
←
't' initialized here→
if (t == codegen.sparseOut) {
25
←
Assuming 't' is equal to field 'sparseOut'→
26
←
Taking true branch→
  if (codegen.redCustom != -1u)
27
←
Taking false branch→
    return genInsertionLoadReduce(merger, codegen, builder, op, t);
  return genInsertionLoad(codegen, builder, op, t);
28
←
Passing null pointer value via 4th parameter 't'→
29
←
Calling 'genInsertionLoad'→
}
// Actual load.
SmallVector<Value, 4> args;
Value ptr = genSubscript(codegen, builder, op, t, args);
if (codegen.curVecLength > 1)
  return genVectorLoad(codegen, builder, ptr, args);
return builder.create<memref::LoadOp>(op.getLoc(), ptr, args);
860}

862/// Generates a store on a dense or sparse tensor.
863static void genTensorStore(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                         linalg::GenericOp op, unsigned exp, Value rhs) {
Location loc = op.getLoc();
// Test if this is a scalarized reduction.
if (codegen.redVal) {
  if (codegen.curVecLength > 1)
    rhs = builder.create<arith::SelectOp>(loc, codegen.curVecMask, rhs,
                                          codegen.redVal);
  updateReduc(merger, codegen, rhs);
  return;
}
// Store during insertion.
OpOperand *t = op.getOutputOperand(0);
if (t == codegen.sparseOut) {
  if (!rhs) {
    // Only unary and binary are allowed to return uninitialized rhs
    // to indicate missing output.
    assert(merger.exp(exp).kind == kUnary || merger.exp(exp).kind == kBinary)(static_cast <bool> (merger.exp(exp).kind == kUnary || merger
.exp(exp).kind == kBinary) ? void (0) : __assert_fail ("merger.exp(exp).kind == kUnary || merger.exp(exp).kind == kBinary"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 880, __extension__ __PRETTY_FUNCTION__));
  } else {
    genInsertionStore(codegen, builder, op, t, rhs);
  }
  return;
}
// Actual store.
SmallVector<Value, 4> args;
Value ptr = genSubscript(codegen, builder, op, t, args);
if (codegen.curVecLength > 1)
  genVectorStore(codegen, builder, rhs, ptr, args);
else
  builder.create<memref::StoreOp>(loc, rhs, ptr, args);
893}

895/// Generates a pointer/index load from the sparse storage scheme. Narrower
896/// data types need to be zero extended before casting the value into the
897/// index type used for looping and indexing.
898static Value genLoad(CodeGen &codegen, OpBuilder &builder, Location loc,
                   Value ptr, Value s) {
// See https://llvm.org/docs/GetElementPtr.html for some background on
// the complications described below.
if (codegen.curVecLength > 1) {
  // Since the index vector is used in a subsequent gather/scatter operations,
  // which effectively defines an unsigned pointer + signed index, we must
  // zero extend the vector to an index width. For 8-bit and 16-bit values,
  // an 32-bit index width suffices. For 32-bit values, zero extending the
  // elements into 64-bit loses some performance since the 32-bit indexed
  // gather/scatter is more efficient than the 64-bit index variant (if the
  // negative 32-bit index space is unused, the enableSIMDIndex32 flag can
  // preserve this performance). For 64-bit values, there is no good way
  // to state that the indices are unsigned, with creates the potential of
  // incorrect address calculations in the unlikely case we need such
  // extremely large offsets.
  Type etp = ptr.getType().cast<MemRefType>().getElementType();
  Value vload = genVectorLoad(codegen, builder, ptr, {s});
  if (!etp.isa<IndexType>()) {
    if (etp.getIntOrFloatBitWidth() < 32)
      vload = builder.create<arith::ExtUIOp>(
          loc, vectorType(codegen, builder.getI32Type()), vload);
    else if (etp.getIntOrFloatBitWidth() < 64 &&
             !codegen.options.enableSIMDIndex32)
      vload = builder.create<arith::ExtUIOp>(
          loc, vectorType(codegen, builder.getI64Type()), vload);
  }
  return vload;
}
// For the scalar case, we simply zero extend narrower indices into 64-bit
// values before casting to index without a performance penalty. Here too,
// however, indices that already are 64-bit, in theory, cannot express the
// full range as explained above.
Value load = builder.create<memref::LoadOp>(loc, ptr, s);
if (!load.getType().isa<IndexType>()) {
  if (load.getType().getIntOrFloatBitWidth() < 64)
    load = builder.create<arith::ExtUIOp>(loc, builder.getI64Type(), load);
  load =
      builder.create<arith::IndexCastOp>(loc, builder.getIndexType(), load);
}
return load;
939}

941/// Generates an invariant value.
942static Value genInvariantValue(Merger &merger, CodeGen &codegen,
                             OpBuilder &builder, unsigned exp) {
Value val = merger.exp(exp).val;
if (codegen.curVecLength > 1)
  return genVectorInvariantValue(codegen, builder, val);
return val;
948}

950/// Generates an address computation "sz * p + i".
951static Value genAddress(CodeGen &codegen, OpBuilder &builder, Location loc,
                      Value size, Value p, Value i) {
Value mul = builder.create<arith::MulIOp>(loc, size, p);
if (auto vtp = i.getType().dyn_cast<VectorType>()) {
  Value inv =
      builder.create<arith::IndexCastOp>(loc, vtp.getElementType(), mul);
  mul = genVectorInvariantValue(codegen, builder, inv);
}
return builder.create<arith::AddIOp>(loc, mul, i);
960}

962/// Generates an index value.
963static Value genIndexValue(CodeGen &codegen, OpBuilder &builder, unsigned idx,
                         unsigned ldx) {
Value ival = codegen.loops[idx];
Type itype = ival.getType();
// During vectorization, we either encounter:
// (1) indices already in vector form, as in ... = ind[lo:hi], good to go, or
// (2) single index, as in ... = i, must convert to [i, i+1, ...] for inner i.
unsigned vl = codegen.curVecLength;
if (vl > 1 && !itype.isa<VectorType>()) {
  Location loc = ival.getLoc();
  VectorType vtp = vectorType(codegen, itype);
  ival = builder.create<vector::BroadcastOp>(loc, vtp, ival);
  if (idx == ldx) {
    Value incr;
    if (vtp.isScalable()) {
      Type stepvty = vectorType(codegen, builder.getI64Type());
      Value stepv = builder.create<LLVM::StepVectorOp>(loc, stepvty);
      incr = builder.create<arith::IndexCastOp>(loc, vtp, stepv);
    } else {
      SmallVector<APInt, 4> integers;
      for (unsigned i = 0; i < vl; i++)
        integers.push_back(APInt(/*width=*/64, i));
      auto values = DenseElementsAttr::get(vtp, integers);
      incr = builder.create<arith::ConstantOp>(loc, vtp, values);
    }
    ival = builder.create<arith::AddIOp>(loc, ival, incr);
  }
}
return ival;
992}

994/// Semi-ring branches are simply inlined by the sparse compiler. Prior
995/// analysis has verified that all computations are "local" to the inlined
996/// branch or otherwise invariantly defined outside the loop nest, with the
997/// exception of index computations, which need to be relinked to actual
998/// inlined cloned code.
999static Value relinkBranch(CodeGen &codegen, RewriterBase &rewriter,
                        Block *block, Value e, unsigned ldx) {
if (Operation *def = e.getDefiningOp()) {
  if (auto indexOp = dyn_cast<linalg::IndexOp>(def))
    return genIndexValue(codegen, rewriter, indexOp.getDim(), ldx);
  if (def->getBlock() == block) {
    for (unsigned i = 0, n = def->getNumOperands(); i < n; i++)
      def->setOperand(
          i, relinkBranch(codegen, rewriter, block, def->getOperand(i), ldx));
  }
}
return e;
1011}

1013/// Recursively generates tensor expression.
1014static Value genExp(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
                  linalg::GenericOp op, unsigned exp, unsigned ldx) {
Location loc = op.getLoc();
if (exp == -1u)
17
←
Assuming the condition is false→
18
←
Taking false branch→
  return Value();
if (merger.exp(exp).kind == Kind::kTensor)
19
←
Assuming field 'kind' is equal to kTensor→
20
←
Taking true branch→
  return genTensorLoad(merger, codegen, rewriter, op, exp);
21
←
Calling 'genTensorLoad'→
if (merger.exp(exp).kind == Kind::kInvariant)
  return genInvariantValue(merger, codegen, rewriter, exp);
if (merger.exp(exp).kind == Kind::kIndex)
  return genIndexValue(codegen, rewriter, merger.exp(exp).index, ldx);

if (merger.exp(exp).kind == Kind::kReduce) {
  // Make custom reduction identity accessible for expanded access pattern.
  assert(codegen.redCustom == -1u)(static_cast <bool> (codegen.redCustom == -1u) ? void (
0) : __assert_fail ("codegen.redCustom == -1u", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1028, __extension__ __PRETTY_FUNCTION__));
  codegen.redCustom = exp;
}

Value v0 =
    genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0, ldx);
Value v1 =
    genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1, ldx);
Value ee = merger.buildExp(rewriter, loc, exp, v0, v1);
if (ee && (merger.exp(exp).kind == Kind::kUnary ||
           merger.exp(exp).kind == Kind::kBinary ||
           merger.exp(exp).kind == Kind::kBinaryBranch ||
           merger.exp(exp).kind == Kind::kReduce))
  ee = relinkBranch(codegen, rewriter, ee.getParentBlock(), ee, ldx);

if (merger.exp(exp).kind == Kind::kReduce) {
  assert(codegen.redCustom != -1u)(static_cast <bool> (codegen.redCustom != -1u) ? void (
0) : __assert_fail ("codegen.redCustom != -1u", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1044, __extension__ __PRETTY_FUNCTION__));
  codegen.redCustom = -1u;
}

return ee;
1049}

1051/// Determines if affine expression is invariant.
1052static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a,
                            unsigned ldx, bool &atLevel) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
  unsigned idx = a.cast<AffineDimExpr>().getPosition();
  if (idx == ldx)
    atLevel = true;
  return codegen.loops[idx] != nullptr; // no longer in play?
}
case AffineExprKind::Add:
case AffineExprKind::Mul: {
  auto binOp = a.cast<AffineBinaryOpExpr>();
  return isInvariantAffine(codegen, binOp.getLHS(), ldx, atLevel) &&
         isInvariantAffine(codegen, binOp.getRHS(), ldx, atLevel);
}
default:
  return true;
}
1070}

1072/// Hoists loop invariant tensor loads for which indices have been exhausted.
1073static void genInvariants(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                        linalg::GenericOp op, unsigned exp, unsigned ldx,
                        bool atStart, unsigned last = -1u) {
if (exp == -1u)
  return;
if (merger.exp(exp).kind == Kind::kTensor) {
  // Inspect tensor indices.
  bool atLevel = ldx == -1u;
  OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor];
  auto map = op.getMatchingIndexingMap(t);
  auto enc = getSparseTensorEncoding(t->get().getType());
  for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
    AffineExpr a = map.getResult(toOrigDim(enc, d));
    if (!isInvariantAffine(codegen, a, ldx, atLevel))
      return; // still in play
  }
  // All exhausted at this level (atLevel denotes exactly at this level).
  if (!atLevel)
    return;
  OpOperand *lhs = op.getOutputOperand(0);
  if (lhs == t) {
    // Start or end a scalarized reduction
    if (atStart) {
      Kind kind = merger.exp(last).kind;
      Value load = kind == Kind::kReduce
                       ? getCustomRedId(merger.exp(last).op)
                       : genTensorLoad(merger, codegen, builder, op, exp);
      codegen.redKind = getReduction(kind);
      codegen.redExp = exp;
      updateReduc(merger, codegen, load);
    } else {
      Value redVal = codegen.redVal;
      updateReduc(merger, codegen, Value());
      codegen.redExp = -1u;
      codegen.redKind = kNoReduc;
      genTensorStore(merger, codegen, builder, op, exp, redVal);
    }
  } else {
    // Start or end loop invariant hoisting of a tensor load.
    merger.exp(exp).val =
        atStart ? genTensorLoad(merger, codegen, builder, op, exp) : Value();
  }
} else if (merger.exp(exp).kind != Kind::kInvariant &&
           merger.exp(exp).kind != Kind::kIndex) {
  // Traverse into the binary operations. Note that we only hoist
  // tensor loads, since subsequent MLIR/LLVM passes know how to
  // deal with all other kinds of derived loop invariants.
  unsigned e0 = merger.exp(exp).children.e0;
  unsigned e1 = merger.exp(exp).children.e1;
  genInvariants(merger, codegen, builder, op, e0, ldx, atStart, exp);
  genInvariants(merger, codegen, builder, op, e1, ldx, atStart, exp);
}
1125}

1127/// Generates an expanded access pattern in innermost dimension.
1128static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                       linalg::GenericOp op, unsigned at, bool atStart) {
OpOperand *lhs = codegen.sparseOut;
if (!lhs || codegen.outerParNest != op.getRank(lhs) - 1 ||
    at != codegen.outerParNest)
  return; // not needed at this level
// Generate start or end of an expanded access pattern.
Value tensor = lhs->get();
Location loc = op.getLoc();
if (atStart) {
  auto dynShape = {ShapedType::kDynamicSize};
  Type etp = tensor.getType().cast<ShapedType>().getElementType();
  Type t1 = MemRefType::get(dynShape, etp);
  Type t2 = MemRefType::get(dynShape, builder.getI1Type());
  Type t3 = MemRefType::get(dynShape, builder.getIndexType());
  Type t4 = builder.getIndexType();
  auto res =
      builder.create<ExpandOp>(loc, TypeRange({t1, t2, t3, t4}), tensor);
  assert(res.getNumResults() == 4)(static_cast <bool> (res.getNumResults() == 4) ? void (
0) : __assert_fail ("res.getNumResults() == 4", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1146, __extension__ __PRETTY_FUNCTION__));
  assert(!codegen.expValues)(static_cast <bool> (!codegen.expValues) ? void (0) : __assert_fail
 ("!codegen.expValues", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1147, __extension__ __PRETTY_FUNCTION__));
  codegen.expValues = res.getResult(0);
  codegen.expFilled = res.getResult(1);
  codegen.expAdded = res.getResult(2);
  codegen.expCount = res.getResult(3);
} else {
  assert(codegen.expValues)(static_cast <bool> (codegen.expValues) ? void (0) : __assert_fail
 ("codegen.expValues", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1153, __extension__ __PRETTY_FUNCTION__));
  SmallVector<Value, 4> indices;
  for (unsigned i = 0; i < at; i++) {
    assert(codegen.loops[codegen.topSort[i]])(static_cast <bool> (codegen.loops[codegen.topSort[i]])
 ? void (0) : __assert_fail ("codegen.loops[codegen.topSort[i]]"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1156, __extension__ __PRETTY_FUNCTION__));
    indices.push_back(codegen.loops[codegen.topSort[i]]);
  }
  builder.create<CompressOp>(loc, codegen.expValues, codegen.expFilled,
                             codegen.expAdded, codegen.expCount, tensor,
                             indices);
  codegen.expValues = codegen.expFilled = codegen.expAdded =
      codegen.expCount = Value();
}
1165}

1167/// Generates initialization code for the subsequent loop sequence at
1168/// current index level. Returns true if the loop sequence needs to
1169/// maintain the universal index.
1170static bool genInit(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                  linalg::GenericOp op, unsigned at, BitVector &inits) {
std::vector<unsigned> &topSort(codegen.topSort);
bool needsUniv = false;
Location loc = op.getLoc();
unsigned idx = topSort[at];

// Initialize sparse positions.
for (unsigned b = 0, be = inits.size(); b < be; b++) {
  if (!inits[b])
    continue;
  unsigned tensor = merger.tensor(b);
  assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1182, __extension__ __PRETTY_FUNCTION__));
  if (merger.isDimLevelType(b, DimLvlType::kCompressed)) {
    // Initialize sparse index that will implement the iteration:
    //   for pidx_idx = pointers(pidx_idx-1), pointers(1+pidx_idx-1)
    unsigned pat = at;
    for (; pat != 0; pat--) {
      if (codegen.pidxs[tensor][topSort[pat - 1]])
        break;
    }
    Value ptr = codegen.pointers[tensor][idx];
    Value one = constantIndex(builder, loc, 1);
    Value p0 = (pat == 0) ? constantIndex(builder, loc, 0)
                          : codegen.pidxs[tensor][topSort[pat - 1]];
    codegen.pidxs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p0);
    Value p1 = builder.create<arith::AddIOp>(loc, p0, one);
    codegen.highs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p1);
  } else if (merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    // Initialize sparse index that will implement the "iteration":
    //   for pidx_idx = pidx_idx-1, 1+pidx_idx-1
    // We rely on subsequent loop unrolling to get rid of the loop
    // if it is not involved in co-iteration with anything else.
    unsigned pat = at;
    for (; pat != 0; pat--) {
      if (codegen.pidxs[tensor][topSort[pat - 1]])
        break;
    }
    Value one = constantIndex(builder, loc, 1);
    Value p0 = (pat == 0) ? constantIndex(builder, loc, 0)
                          : codegen.pidxs[tensor][topSort[pat - 1]];
    codegen.pidxs[tensor][idx] = p0;
    codegen.highs[tensor][idx] = builder.create<arith::AddIOp>(loc, p0, one);
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1215, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1215, __extension__ __PRETTY_FUNCTION__));
    // Dense index still in play.
    needsUniv = true;
  }
}

// Initialize the universal dense index.
codegen.loops[idx] = constantIndex(builder, loc, 0);
return needsUniv;
1224}

1226/// Returns vectorization strategy. Any implicit inner loop in the Linalg
1227/// operation is a candidate. Whether it is actually converted to SIMD code
1228/// depends on the requested strategy.
1229static bool isVectorFor(CodeGen &codegen, bool isInner, bool isReduction,
                      bool isSparse) {
// Reject vectorization of sparse output, unless innermost is reduction.
if (codegen.sparseOut && !isReduction)
  return false;
// Inspect strategy.
switch (codegen.options.vectorizationStrategy) {
case SparseVectorizationStrategy::kNone:
  return false;
case SparseVectorizationStrategy::kDenseInnerLoop:
  return isInner && !isSparse;
case SparseVectorizationStrategy::kAnyStorageInnerLoop:
  return isInner;
}
llvm_unreachable("unexpected vectorization strategy")::llvm::llvm_unreachable_internal("unexpected vectorization strategy"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1243);
1244}

1246/// Returns parallelization strategy. Any implicit loop in the Linalg operation
1247/// that is marked "parallel" is a candidate. Whether it is actually converted
1248/// to a parallel operation depends on the requested strategy.
1249static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction,
                        bool isSparse, bool isVector) {
// Reject parallelization of sparse output.
if (codegen.sparseOut)
  return false;
// Inspect strategy.
switch (codegen.options.parallelizationStrategy) {
case SparseParallelizationStrategy::kNone:
  return false;
case SparseParallelizationStrategy::kDenseOuterLoop:
  return isOuter && !isSparse && !isReduction && !isVector;
case SparseParallelizationStrategy::kAnyStorageOuterLoop:
  return isOuter && !isReduction && !isVector;
case SparseParallelizationStrategy::kDenseAnyLoop:
  return !isSparse && !isReduction && !isVector;
case SparseParallelizationStrategy::kAnyStorageAnyLoop:
  return !isReduction && !isVector;
}
llvm_unreachable("unexpected parallelization strategy")::llvm::llvm_unreachable_internal("unexpected parallelization strategy"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1267);
1268}

1270/// Checks unit stride for dense tensors. The iteration graph may have ignored
1271/// dense access patterns in order to avoid cycles (sparse access patterns are
1272/// always placed innermost), but that means dense access has become strided.
1273/// This prevents effective vectorization.
1274static bool denseUnitStrides(Merger &merger, linalg::GenericOp op,
                           unsigned idx) {
for (OpOperand *t : op.getInputAndOutputOperands()) {
  if (!getSparseTensorEncoding(t->get().getType())) {
    auto map = op.getMatchingIndexingMap(t);
    for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
      AffineExpr a = map.getResult(d);
      // Report non-unit stride if innermost index appears at an outer
      // dimension (true non-unit stride) or if the innermost index appears
      // in a compound subscript in the innermost dimension. Even if the
      // latter is unit stride, it does not play well with scatter/gather.
      // TODO: accept unit stride affine innermost like a[i,j+k+1]?
      if (a.isFunctionOfDim(idx) &&
          ((d != rank - 1) || (a.getKind() != AffineExprKind::DimId)))
        return false;
    }
  }
}
return true;
1293}

1295/// Generates a for-loop on a single index.
1296static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                       linalg::GenericOp op, bool isOuter, bool isInner,
                       unsigned idx, BitVector &indices) {
unsigned fb = indices.find_first();
unsigned tensor = merger.tensor(fb);
assert(idx == merger.index(fb))(static_cast <bool> (idx == merger.index(fb)) ? void (0
) : __assert_fail ("idx == merger.index(fb)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1301, __extension__ __PRETTY_FUNCTION__));
auto iteratorTypes = op.getIteratorTypesArray();
bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]);
bool isSparse = merger.isDimLevelType(fb, DimLvlType::kCompressed) ||
                merger.isDimLevelType(fb, DimLvlType::kSingleton);
bool isVector = isVectorFor(codegen, isInner, isReduction, isSparse) &&
                denseUnitStrides(merger, op, idx);
bool isParallel =
    isParallelFor(codegen, isOuter, isReduction, isSparse, isVector);

// Prepare vector length.
if (isVector)
  codegen.curVecLength = codegen.options.vectorLength;

// Loop bounds and increment.
Location loc = op.getLoc();
Value lo = isSparse ? codegen.pidxs[tensor][idx] : codegen.loops[idx];
Value hi = isSparse ? codegen.highs[tensor][idx] : codegen.sizes[idx];
Value step = constantIndex(builder, loc, codegen.curVecLength);
if (isVector && codegen.options.enableVLAVectorization) {
  Value vscale = builder.create<vector::VectorScaleOp>(
      loc, IndexType::get(builder.getContext()));
  step = builder.create<arith::MulIOp>(loc, vscale, step);
}

// Emit a parallel loop.
if (isParallel) {
  assert(!isVector)(static_cast <bool> (!isVector) ? void (0) : __assert_fail
 ("!isVector", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1328, __extension__ __PRETTY_FUNCTION__));
  scf::ParallelOp parOp = builder.create<scf::ParallelOp>(loc, lo, hi, step);
  if (isSparse)
    codegen.pidxs[tensor][idx] = parOp.getInductionVars()[0];
  else
    codegen.loops[idx] = parOp.getInductionVars()[0];
  builder.setInsertionPointToStart(parOp.getBody());
  return parOp;
}

// Emit a sequential or vector loop.
SmallVector<Value, 4> operands;
if (codegen.redVal) {
  // In a vector loop, bring reduction into SIMD form, if not already.
  if (isVector && !codegen.redVal.getType().isa<VectorType>()) {
    VectorType vtp = vectorType(codegen, codegen.redVal.getType());
    Value vred = genVectorReducInit(codegen, builder, loc, vtp);
    updateReduc(merger, codegen, vred);
  }
  operands.push_back(codegen.redVal);
}
if (codegen.expValues)
  operands.push_back(codegen.expCount);
scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, operands);
if (codegen.redVal)
  updateReduc(merger, codegen, forOp.getRegionIterArgs().front());
if (codegen.expValues)
  codegen.expCount = forOp.getRegionIterArgs().back();
// Assign induction variable to sparse or dense index.
Value iv = forOp.getInductionVar();
if (isSparse)
  codegen.pidxs[tensor][idx] = iv;
else
  codegen.loops[idx] = iv;
builder.setInsertionPointToStart(forOp.getBody());
// Share vector iteration mask between all subsequent loads/stores.
if (isVector)
  codegen.curVecMask = genVectorMask(codegen, builder, iv, lo, hi, step);
return forOp;
1367}

1369/// Emit a while-loop for co-iteration over multiple indices.
1370static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                         linalg::GenericOp op, unsigned idx, bool needsUniv,
                         BitVector &indices) {
SmallVector<Type, 4> types;
SmallVector<Value, 4> operands;
// Construct the while-loop with a parameter for each index.
Type indexType = builder.getIndexType();
for (unsigned b = 0, be = indices.size(); b < be; b++) {
  if (!indices[b])
    continue;
  if (merger.isDimLevelType(b, DimLvlType::kCompressed) ||
      merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    unsigned tensor = merger.tensor(b);
    assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1383, __extension__ __PRETTY_FUNCTION__));
    types.push_back(indexType);
    operands.push_back(codegen.pidxs[tensor][idx]);
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1388, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1388, __extension__ __PRETTY_FUNCTION__));
  }
}
if (codegen.redVal) {
  types.push_back(codegen.redVal.getType());
  operands.push_back(codegen.redVal);
}
if (codegen.expValues) {
  types.push_back(indexType);
  operands.push_back(codegen.expCount);
}
if (needsUniv) {
  types.push_back(indexType);
  operands.push_back(codegen.loops[idx]);
}
assert(types.size() == operands.size())(static_cast <bool> (types.size() == operands.size()) ?
 void (0) : __assert_fail ("types.size() == operands.size()",
 "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1403, __extension__ __PRETTY_FUNCTION__));
Location loc = op.getLoc();
scf::WhileOp whileOp = builder.create<scf::WhileOp>(loc, types, operands);

SmallVector<Location> locs(types.size(), loc);
Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);
Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);

// Build the "before" region, which effectively consists
// of a conjunction of "i < upper" tests on all induction.
builder.setInsertionPointToStart(&whileOp.getBefore().front());
Value cond;
unsigned o = 0;
for (unsigned b = 0, be = indices.size(); b < be; b++) {
  if (!indices[b])
    continue;
  if (merger.isDimLevelType(b, DimLvlType::kCompressed) ||
      merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    unsigned tensor = merger.tensor(b);
    assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1422, __extension__ __PRETTY_FUNCTION__));
    Value op1 = before->getArgument(o);
    Value op2 = codegen.highs[tensor][idx];
    Value opc = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ult,
                                              op1, op2);
    cond = cond ? builder.create<arith::AndIOp>(loc, cond, opc) : opc;
    codegen.pidxs[tensor][idx] = after->getArgument(o++);
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1431, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1431, __extension__ __PRETTY_FUNCTION__));
  }
}
if (codegen.redVal)
  updateReduc(merger, codegen, after->getArgument(o++));
if (codegen.expValues)
  codegen.expCount = after->getArgument(o++);
if (needsUniv)
  codegen.loops[idx] = after->getArgument(o++);
assert(o == operands.size())(static_cast <bool> (o == operands.size()) ? void (0) :
 __assert_fail ("o == operands.size()", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1440, __extension__ __PRETTY_FUNCTION__));
builder.create<scf::ConditionOp>(loc, cond, before->getArguments());
builder.setInsertionPointToStart(&whileOp.getAfter().front());
return whileOp;
1444}

1446/// Generates a for-loop or a while-loop, depending on whether it implements
1447/// singleton iteration or co-iteration over the given conjunction.
1448static Operation *genLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                        linalg::GenericOp op, unsigned at, bool needsUniv,
                        BitVector &indices) {
unsigned idx = codegen.topSort[at];
if (indices.count() == 1) {
  bool isOuter = at == 0;
  bool isInner = at == codegen.topSort.size() - 1;
  return genFor(merger, codegen, builder, op, isOuter, isInner, idx, indices);
}
return genWhile(merger, codegen, builder, op, idx, needsUniv, indices);
1458}

1460/// Generates the local variables for this loop, consisting of the sparse
1461/// indices, restored universal dense index, and dense positions.
1462static void genLocals(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                    linalg::GenericOp op, unsigned at, bool needsUniv,
                    BitVector &locals) {
std::vector<unsigned> &topSort(codegen.topSort);
Location loc = op.getLoc();
unsigned idx = topSort[at];

// Initialize sparse indices.
Value min;
for (unsigned b = 0, be = locals.size(); b < be; b++) {
  if (!locals[b])
    continue;
  if (merger.isDimLevelType(b, DimLvlType::kCompressed) ||
      merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    unsigned tensor = merger.tensor(b);
    assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1477, __extension__ __PRETTY_FUNCTION__));
    Value ptr = codegen.indices[tensor][idx];
    Value s = codegen.pidxs[tensor][idx];
    Value load = genLoad(codegen, builder, loc, ptr, s);
    codegen.idxs[tensor][idx] = load;
    if (!needsUniv) {
      if (min) {
        Value cmp = builder.create<arith::CmpIOp>(
            loc, arith::CmpIPredicate::ult, load, min);
        min = builder.create<arith::SelectOp>(loc, cmp, load, min);
      } else {
        min = load;
      }
    }
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1493, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1493, __extension__ __PRETTY_FUNCTION__));
  }
}

// Merge dense universal index over minimum.
if (min) {
  assert(!needsUniv)(static_cast <bool> (!needsUniv) ? void (0) : __assert_fail
 ("!needsUniv", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1499, __extension__ __PRETTY_FUNCTION__));
  codegen.loops[idx] = min;
}

// Initialize dense positions. Note that we generate dense indices of the
// output tensor unconditionally, since they may not appear in the lattice,
// but may be needed for linearized codegen.
for (unsigned b = 0, be = locals.size(); b < be; b++) {
  if ((locals[b] || merger.isOutTensor(b, idx)) &&
      merger.isDimLevelType(b, DimLvlType::kDense)) {
    unsigned tensor = merger.tensor(b);
    assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1510, __extension__ __PRETTY_FUNCTION__));
    unsigned pat = at;
    for (; pat != 0; pat--)
      if (codegen.pidxs[tensor][topSort[pat - 1]])
        break;
    Value p = (pat == 0) ? constantIndex(builder, loc, 0)
                         : codegen.pidxs[tensor][topSort[pat - 1]];
    codegen.pidxs[tensor][idx] = genAddress(
        codegen, builder, loc, codegen.sizes[idx], p, codegen.loops[idx]);
  }
}
1521}

1523/// Generates the induction structure for a while-loop.
1524static void genWhileInduction(Merger &merger, CodeGen &codegen,
                            OpBuilder &builder, linalg::GenericOp op,
                            unsigned idx, bool needsUniv,
                            BitVector &induction, scf::WhileOp whileOp) {
Location loc = op.getLoc();
// Finalize each else branch of all if statements.
if (codegen.redVal || codegen.expValues) {
  while (auto ifOp = dyn_cast_or_null<scf::IfOp>(
             builder.getInsertionBlock()->getParentOp())) {
    unsigned y = 0;
    SmallVector<Value, 4> yields;
    if (codegen.redVal) {
      yields.push_back(codegen.redVal);
      updateReduc(merger, codegen, ifOp.getResult(y++));
    }
    if (codegen.expValues) {
      yields.push_back(codegen.expCount);
      codegen.expCount = ifOp->getResult(y++);
    }
    assert(y == yields.size())(static_cast <bool> (y == yields.size()) ? void (0) : __assert_fail
 ("y == yields.size()", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1543, __extension__ __PRETTY_FUNCTION__));
    builder.create<scf::YieldOp>(loc, yields);
    builder.setInsertionPointAfter(ifOp);
  }
}
builder.setInsertionPointToEnd(&whileOp.getAfter().front());
// Finalize the induction. Note that the induction could be performed
// in the individual if-branches to avoid re-evaluating the conditions.
// However, that would result in a rather elaborate forest of yield
// instructions during code generation. Moreover, performing the induction
// after the if-statements more closely resembles code generated by TACO.
unsigned o = 0;
SmallVector<Value, 4> operands;
Value one = constantIndex(builder, loc, 1);
for (unsigned b = 0, be = induction.size(); b < be; b++) {
  if (!induction[b])
    continue;
  if (merger.isDimLevelType(b, DimLvlType::kCompressed) ||
      merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    unsigned tensor = merger.tensor(b);
    assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1563, __extension__ __PRETTY_FUNCTION__));
    Value op1 = codegen.idxs[tensor][idx];
    Value op2 = codegen.loops[idx];
    Value op3 = codegen.pidxs[tensor][idx];
    Value cmp = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
                                              op1, op2);
    Value add = builder.create<arith::AddIOp>(loc, op3, one);
    operands.push_back(builder.create<arith::SelectOp>(loc, cmp, add, op3));
    codegen.pidxs[tensor][idx] = whileOp->getResult(o++);
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1574, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1574, __extension__ __PRETTY_FUNCTION__));
  }
}
if (codegen.redVal) {
  operands.push_back(codegen.redVal);
  updateReduc(merger, codegen, whileOp->getResult(o++));
}
if (codegen.expValues) {
  operands.push_back(codegen.expCount);
  codegen.expCount = whileOp->getResult(o++);
}
if (needsUniv) {
  operands.push_back(
      builder.create<arith::AddIOp>(loc, codegen.loops[idx], one));
  codegen.loops[idx] = whileOp->getResult(o++);
}
assert(o == operands.size())(static_cast <bool> (o == operands.size()) ? void (0) :
 __assert_fail ("o == operands.size()", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1590, __extension__ __PRETTY_FUNCTION__));
builder.create<scf::YieldOp>(loc, operands);
builder.setInsertionPointAfter(whileOp);
1593}

1595/// Generates the induction structure for a for-loop.
1596static void genForInduction(Merger &merger, CodeGen &codegen,
                          OpBuilder &builder, linalg::GenericOp op,
                          Operation *loop) {
Location loc = op.getLoc();
unsigned o = 0;
SmallVector<Value, 4> operands;
if (codegen.redVal) {
  operands.push_back(codegen.redVal);
  updateReduc(merger, codegen, loop->getResult(o++));
}
if (codegen.expValues) {
  operands.push_back(codegen.expCount);
  codegen.expCount = loop->getResult(o++);
}
assert(o == operands.size())(static_cast <bool> (o == operands.size()) ? void (0) :
 __assert_fail ("o == operands.size()", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1610, __extension__ __PRETTY_FUNCTION__));
if (o > 0)
  builder.create<scf::YieldOp>(loc, operands);
builder.setInsertionPointAfter(loop);
1614}

1616/// Generates a single if-statement within a while-loop.
1617static scf::IfOp genIf(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                     linalg::GenericOp op, unsigned idx,
                     BitVector &conditions) {
Location loc = op.getLoc();
SmallVector<Type, 4> types;
Value cond;
for (unsigned b = 0, be = conditions.size(); b < be; b++) {
  if (!conditions[b])
    continue;
  unsigned tensor = merger.tensor(b);
  assert(idx == merger.index(b))(static_cast <bool> (idx == merger.index(b)) ? void (0)
 : __assert_fail ("idx == merger.index(b)", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1627, __extension__ __PRETTY_FUNCTION__));
  Value clause;
  if (merger.isDimLevelType(b, DimLvlType::kCompressed) ||
      merger.isDimLevelType(b, DimLvlType::kSingleton)) {
    Value op1 = codegen.idxs[tensor][idx];
    Value op2 = codegen.loops[idx];
    clause = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, op1,
                                           op2);
  } else {
    assert(merger.isDimLevelType(b, DimLvlType::kDense) ||(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1637, __extension__ __PRETTY_FUNCTION__))
           merger.isDimLevelType(b, DimLvlType::kUndef))(static_cast <bool> (merger.isDimLevelType(b, DimLvlType
::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)) ? void
 (0) : __assert_fail ("merger.isDimLevelType(b, DimLvlType::kDense) || merger.isDimLevelType(b, DimLvlType::kUndef)"
, "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1637, __extension__ __PRETTY_FUNCTION__));
    clause = constantI1(builder, loc, true);
  }
  cond = cond ? builder.create<arith::AndIOp>(loc, cond, clause) : clause;
}
if (codegen.redVal)
  types.push_back(codegen.redVal.getType());
if (codegen.expValues)
  types.push_back(builder.getIndexType());
scf::IfOp ifOp = builder.create<scf::IfOp>(loc, types, cond, /*else=*/true);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
return ifOp;
1649}

1651/// Generates end of true branch of if-statement within a while-loop.
1652static void endIf(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                linalg::GenericOp op, scf::IfOp ifOp, Operation *loop,
                Value redInput, Value cntInput) {
SmallVector<Value, 4> operands;
if (codegen.redVal) {
  operands.push_back(codegen.redVal);
  updateReduc(merger, codegen, redInput);
}
if (codegen.expValues) {
  operands.push_back(codegen.expCount);
  codegen.expCount = cntInput;
}
if (!operands.empty())
  builder.create<scf::YieldOp>(op.getLoc(), operands);
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
1667}

1669//===----------------------------------------------------------------------===//
1670// Sparse compiler synthesis methods (loop sequence).
1671//===----------------------------------------------------------------------===//

1673/// Starts a loop sequence at given level. Returns true if
1674/// the universal loop index must be maintained at this level.
1675static bool startLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                       linalg::GenericOp op, unsigned exp, unsigned at,
                       unsigned idx, unsigned ldx, unsigned lts) {
assert(codegen.curVecLength == 1)(static_cast <bool> (codegen.curVecLength == 1) ? void (
0) : __assert_fail ("codegen.curVecLength == 1", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1678, __extension__ __PRETTY_FUNCTION__));
assert(!codegen.loops[idx])(static_cast <bool> (!codegen.loops[idx]) ? void (0) : __assert_fail
 ("!codegen.loops[idx]", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1679, __extension__ __PRETTY_FUNCTION__));
// Emit invariants at this loop sequence level.
genInvariants(merger, codegen, builder, op, exp, ldx, /*atStart=*/true);
// Emit access pattern expansion for sparse tensor output.
genExpansion(merger, codegen, builder, op, at, /*atStart=*/true);
// Emit further intitialization at this loop sequence level.
unsigned l0 = merger.set(lts)[0];
bool needsUniv =
    genInit(merger, codegen, builder, op, at, merger.lat(l0).bits);
// Maintain the universal index only if it is actually
// consumed by a subsequent lattice point.
if (needsUniv) {
  unsigned lsize = merger.set(lts).size();
  for (unsigned i = 1; i < lsize; i++) {
    unsigned li = merger.set(lts)[i];
    if (!merger.hasAnySparse(merger.lat(li).simple))
      return true;
  }
}
return false;
1699}

1701/// Starts a single loop in current sequence.
1702static Operation *startLoop(Merger &merger, CodeGen &codegen,
                          OpBuilder &builder, linalg::GenericOp op,
                          unsigned at, unsigned li, bool needsUniv) {
assert(codegen.curVecLength == 1)(static_cast <bool> (codegen.curVecLength == 1) ? void (
0) : __assert_fail ("codegen.curVecLength == 1", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1705, __extension__ __PRETTY_FUNCTION__));
// Emit the for/while-loop control.
Operation *loop = genLoop(merger, codegen, builder, op, at, needsUniv,
                          merger.lat(li).simple);
// Emit the locals for this loop.
genLocals(merger, codegen, builder, op, at, needsUniv, merger.lat(li).bits);
return loop;
1712}

1714/// Ends a single loop in current sequence. Returns new values for needsUniv.
1715static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                  linalg::GenericOp op, Operation *loop, unsigned idx,
                  unsigned li, bool needsUniv) {
codegen.curVecLength = 1;
// End a while-loop.
if (auto whileOp = dyn_cast<scf::WhileOp>(loop)) {
  genWhileInduction(merger, codegen, builder, op, idx, needsUniv,
                    merger.lat(li).bits, whileOp);
  return needsUniv;
}
// End a for-loop.
genForInduction(merger, codegen, builder, op, loop);
return false;
1728}

1730/// Ends a loop sequence at given level.
1731static void endLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
                     linalg::GenericOp op, unsigned exp, unsigned at,
                     unsigned idx, unsigned ldx) {
assert(codegen.curVecLength == 1)(static_cast <bool> (codegen.curVecLength == 1) ? void (
0) : __assert_fail ("codegen.curVecLength == 1", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1734, __extension__ __PRETTY_FUNCTION__));
assert(codegen.loops[idx])(static_cast <bool> (codegen.loops[idx]) ? void (0) : __assert_fail
 ("codegen.loops[idx]", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__));
codegen.loops[idx] = Value();
// Bring a pending reduction back from SIMD form when sequence ends.
if (codegen.redVal)
  if (auto vtp = codegen.redVal.getType().dyn_cast<VectorType>())
    updateReduc(merger, codegen,
                genVectorReducEnd(codegen, builder, op.getLoc(), vtp));
// Unmark bookkeeping of invariants and loop index.
genInvariants(merger, codegen, builder, op, exp, ldx, /*atStart=*/false);
// Finalize access pattern expansion for sparse tensor output.
genExpansion(merger, codegen, builder, op, at, /*atStart=*/false);
1746}

1748/// Recursively generates code while computing iteration lattices in order
1749/// to manage the complexity of implementing co-iteration over unions
1750/// and intersections of sparse iterations spaces.
1751static void genStmt(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
                  linalg::GenericOp op, unsigned exp, unsigned at) {
// At each leaf, assign remaining tensor (sub)expression to output tensor.
if (at == codegen.topSort.size()) {
14
←
Assuming the condition is true→
15
←
Taking true branch→
  unsigned ldx = codegen.topSort[at - 1];
  Value rhs = genExp(merger, codegen, rewriter, op, exp, ldx);
16
←
Calling 'genExp'→
  genTensorStore(merger, codegen, rewriter, op, exp, rhs);
  return;
}

// Construct iteration lattices for current loop index, with L0 at top.
unsigned idx = codegen.topSort[at];
unsigned ldx = at == 0 ? -1u : codegen.topSort[at - 1];
unsigned lts = merger.optimizeSet(merger.buildLattices(exp, idx));

// Start a loop sequence.
bool needsUniv =
    startLoopSeq(merger, codegen, rewriter, op, exp, at, idx, ldx, lts);

// Emit a loop for every lattice point L0 >= Li in this loop sequence.
unsigned lsize = merger.set(lts).size();
for (unsigned i = 0; i < lsize; i++) {
  // Start a loop.
  unsigned li = merger.set(lts)[i];
  Operation *loop =
      startLoop(merger, codegen, rewriter, op, at, li, needsUniv);

  // Visit all lattices points with Li >= Lj to generate the
  // loop-body, possibly with if statements for coiteration.
  Value redInput = codegen.redVal;
  Value cntInput = codegen.expCount;
  bool isWhile = dyn_cast<scf::WhileOp>(loop) != nullptr;
  for (unsigned j = 0; j < lsize; j++) {
    unsigned lj = merger.set(lts)[j];
    unsigned ej = merger.lat(lj).exp;
    if (li == lj || merger.latGT(li, lj)) {
      // Recurse into body of each branch.
      if (isWhile) {
        scf::IfOp ifOp =
            genIf(merger, codegen, rewriter, op, idx, merger.lat(lj).simple);
        genStmt(merger, codegen, rewriter, op, ej, at + 1);
        endIf(merger, codegen, rewriter, op, ifOp, loop, redInput, cntInput);
      } else {
        genStmt(merger, codegen, rewriter, op, ej, at + 1);
      }
    }
  }

  // End a loop.
  needsUniv =
      endLoop(merger, codegen, rewriter, op, loop, idx, li, needsUniv);
}

// End a loop sequence.
endLoopSeq(merger, codegen, rewriter, op, exp, at, idx, ldx);
1806}

1808/// Converts the result computed by the sparse kernel into the required form.
1809static void genResult(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
                    linalg::GenericOp op) {
OpOperand *lhs = op.getOutputOperand(0);
Type resType = lhs->get().getType();
if (getSparseTensorEncoding(resType)) {
  // The sparse tensor rematerializes from the original sparse tensor's
  // underlying sparse storage format.
  rewriter.replaceOpWithNewOp<LoadOp>(op, resType, lhs->get(),
                                      codegen.sparseOut == lhs);
} else {
  // To rematerialize an non-annotated tensor, simply load it
  // from the bufferized value.
  Value val = codegen.buffers.back(); // value array
  rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, val);
}
1824}

1826//===----------------------------------------------------------------------===//
1827// Sparse compiler rewriting methods.
1828//===----------------------------------------------------------------------===//

1830namespace {

1832/// Sparse rewriting rule for generic Lingalg operation.
1833struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
1834public:
GenericOpSparsifier(MLIRContext *context, SparsificationOptions o)
    : OpRewritePattern<linalg::GenericOp>(context), options(o) {}

LogicalResult matchAndRewrite(linalg::GenericOp op,
                              PatternRewriter &rewriter) const override {
  // Detects sparse annotations and translate the per-dimension sparsity
  // information for all tensors to loop indices in the kernel.
  assert(op.getNumOutputs() == 1)(static_cast <bool> (op.getNumOutputs() == 1) ? void (0
) : __assert_fail ("op.getNumOutputs() == 1", "mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp"
, 1842, __extension__ __PRETTY_FUNCTION__));
1
Assuming the condition is true→
2
←
'?' condition is true→
  unsigned numTensors = op.getNumInputsAndOutputs();
  unsigned numLoops = op.iterator_types().getValue().size();
  Merger merger(numTensors, numLoops);
  if (!findSparseAnnotations(merger, op))
3
←
Assuming the condition is false→
4
←
Taking false branch→
    return failure();

  // Builds the tensor expression for the Linalg operation in SSA form.
  Optional<unsigned> optExp = merger.buildTensorExpFromLinalg(op);
  if (!optExp.has_value())
5
←
Assuming the condition is false→
6
←
Taking false branch→
    return failure();

  unsigned exp = optExp.value();
  OpOperand *sparseOut = nullptr;
  unsigned outerParNest = 0;
  // Computes a topologically sorted iteration graph to ensure tensors
  // are visited in natural index order. Gradually relaxes the considered
  // constraints until an acyclic iteration graph results, such that sparse
  // code generation can proceed. As a last resort, an attempt is made
  // to resolve cycles by inserting a conversion.
  std::vector<unsigned> topSort;
  // Whether the current GenericOp is admissible.
  bool isAdmissible = false;
  bool hasCycle = true;
  // An const list of all masks that we used for interation graph
  // computation. Must be ordered from strict -> loose.
  const auto allMask = {SortMask::kIncludeAll, SortMask::kIncludeUndef,
                        SortMask::kIncludeDense, SortMask::kSparseOnly};
  for (auto mask : allMask)
7
←
Assuming '__begin2' is not equal to '__end2'→
    if (computeIterationGraph(merger, op, topSort, mask)) {
8
←
Taking true branch→
      hasCycle = false;
      if (isAdmissibleTensorExp(merger, op, topSort, exp, &sparseOut,
9
←
Taking true branch→
                                outerParNest)) {
        isAdmissible = true;
        break;
      }
      // else try a set of less strict constraints.
    }

  if (hasCycle10.1
'hasCycle' is false
)
10
←
 Execution continues on line 1881→
11
←
Taking false branch→
    // Give it one last shot to resolve the cycle.
    return resolveCycle(merger, rewriter, op);
  if (!isAdmissible11.1
'isAdmissible' is true
)
12
←
Taking false branch→
    // Inadmissible expression, reject.
    return failure();

  // Recursively generates code if admissible.
  merger.setHasSparseOut(sparseOut != nullptr);
  CodeGen codegen(options, numTensors, numLoops, sparseOut, outerParNest,
                  topSort);
  genBuffers(merger, codegen, rewriter, op);
  genStmt(merger, codegen, rewriter, op, exp, 0);
13
←
Calling 'genStmt'→
  genResult(merger, codegen, rewriter, op);
  return success();
}

1898private:
// Last resort cycle resolution.
LogicalResult resolveCycle(Merger &merger, PatternRewriter &rewriter,
                           linalg::GenericOp op) const {
  // Compute topological sort while leaving out every
  // sparse input tensor in succession until an acylic
  // iteration graph results.
  std::vector<unsigned> topSort;
  for (OpOperand *t : op.getInputOperands()) {
    unsigned tensor = t->getOperandNumber();
    Value tval = t->get();
    auto srcEnc = getSparseTensorEncoding(tval.getType());
    if (!srcEnc ||
        !computeIterationGraph(merger, op, topSort, SortMask::kSparseOnly, t))
      continue;
    // Found an input tensor that resolves the cycle by inserting a
    // conversion into a sparse tensor that adheres to the iteration
    // graph order. Also releases the temporary sparse tensor.
    //
    // TODO: investigate fusing the conversion with computation,
    //       especially if it is a direct yield!
    //
    auto srcTp = tval.getType().cast<RankedTensorType>();
    auto dstEnc = SparseTensorEncodingAttr::get(
        op->getContext(), srcEnc.getDimLevelType(),
        permute(getContext(), op.getMatchingIndexingMap(t),
                topSort), // new order
        srcEnc.getPointerBitWidth(), srcEnc.getIndexBitWidth());
    auto dstTp = RankedTensorType::get(srcTp.getShape(),
                                       srcTp.getElementType(), dstEnc);
    auto convert = rewriter.create<ConvertOp>(tval.getLoc(), dstTp, tval);
    op->setOperand(tensor, convert);
    rewriter.setInsertionPointAfter(op);
    rewriter.create<bufferization::DeallocTensorOp>(tval.getLoc(), convert);
    return success();
  }
  // Cannot be resolved with a single conversion.
  // TODO: convert more than one?
  return failure();
}

/// Options to control sparse code generation.
SparsificationOptions options;
1941};

1943} // namespace

1945/// Populates the given patterns list with rewriting rules required for
1946/// the sparsification of linear algebra operations.
1947void mlir::populateSparsificationPatterns(
  RewritePatternSet &patterns, const SparsificationOptions &options) {
patterns.add<GenericOpSparsifier>(patterns.getContext(), options);
1950}