LCOV - code coverage report
Current view: top level - clang/tools/extra/clangd/index - Index.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 0 18 0.0 %
Date: 2018-07-13 00:08:38 Functions: 0 9 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===--- Index.h ------------------------------------------------*- C++-*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===---------------------------------------------------------------------===//
       9             : 
      10             : #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
      11             : #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
      12             : 
      13             : #include "clang/Index/IndexSymbol.h"
      14             : #include "clang/Lex/Lexer.h"
      15             : #include "llvm/ADT/DenseMap.h"
      16             : #include "llvm/ADT/DenseSet.h"
      17             : #include "llvm/ADT/Hashing.h"
      18             : #include "llvm/ADT/Optional.h"
      19             : #include "llvm/ADT/StringExtras.h"
      20             : #include <array>
      21             : #include <string>
      22             : 
      23             : namespace clang {
      24             : namespace clangd {
      25             : 
      26             : struct SymbolLocation {
      27             :   // Specify a position (Line, Column) of symbol. Using Line/Column allows us to
      28             :   // build LSP responses without reading the file content.
      29           0 :   struct Position {
      30             :     uint32_t Line = 0; // 0-based
      31             :     // Using UTF-16 code units.
      32             :     uint32_t Column = 0; // 0-based
      33             :   };
      34             : 
      35             :   // The URI of the source file where a symbol occurs.
      36             :   llvm::StringRef FileURI;
      37             : 
      38             :   /// The symbol range, using half-open range [Start, End).
      39             :   Position Start;
      40             :   Position End;
      41             : 
      42             :   operator bool() const { return !FileURI.empty(); }
      43             : };
      44             : llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
      45             : 
      46             : // The class identifies a particular C++ symbol (class, function, method, etc).
      47             : //
      48             : // As USRs (Unified Symbol Resolution) could be large, especially for functions
      49             : // with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
      50             : // guarantee the uniqueness of symbols while using a relatively small amount of
      51             : // memory (vs storing USRs directly).
      52             : //
      53             : // SymbolID can be used as key in the symbol indexes to lookup the symbol.
      54             : class SymbolID {
      55             : public:
      56             :   SymbolID() = default;
      57             :   explicit SymbolID(llvm::StringRef USR);
      58             : 
      59             :   bool operator==(const SymbolID &Sym) const {
      60             :     return HashValue == Sym.HashValue;
      61             :   }
      62             :   bool operator<(const SymbolID &Sym) const {
      63             :     return HashValue < Sym.HashValue;
      64             :   }
      65             : 
      66             :   // Returns a 40-bytes hex encoded string.
      67             :   std::string str() const;
      68             : 
      69             : private:
      70             :   static constexpr unsigned HashByteLength = 20;
      71             : 
      72             :   friend llvm::hash_code hash_value(const SymbolID &ID) {
      73             :     // We already have a good hash, just return the first bytes.
      74             :     static_assert(sizeof(size_t) <= HashByteLength, "size_t longer than SHA1!");
      75             :     size_t Result;
      76           0 :     memcpy(&Result, ID.HashValue.data(), sizeof(size_t));
      77             :     return llvm::hash_code(Result);
      78             :   }
      79             :   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
      80             :                                        const SymbolID &ID);
      81             :   friend void operator>>(llvm::StringRef Str, SymbolID &ID);
      82             : 
      83             :   std::array<uint8_t, HashByteLength> HashValue;
      84             : };
      85             : 
      86             : // Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
      87             : // hex string.
      88             : llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
      89             : 
      90             : // Construct SymbolID from a hex string.
      91             : // The HexStr is required to be a 40-bytes hex string, which is encoded from the
      92             : // "<<" operator.
      93             : void operator>>(llvm::StringRef HexStr, SymbolID &ID);
      94             : 
      95             : } // namespace clangd
      96             : } // namespace clang
      97             : namespace llvm {
      98             : // Support SymbolIDs as DenseMap keys.
      99             : template <> struct DenseMapInfo<clang::clangd::SymbolID> {
     100           0 :   static inline clang::clangd::SymbolID getEmptyKey() {
     101           0 :     static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
     102           0 :     return EmptyKey;
     103             :   }
     104           0 :   static inline clang::clangd::SymbolID getTombstoneKey() {
     105           0 :     static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
     106           0 :     return TombstoneKey;
     107             :   }
     108             :   static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
     109           0 :     return hash_value(Sym);
     110             :   }
     111             :   static bool isEqual(const clang::clangd::SymbolID &LHS,
     112             :                       const clang::clangd::SymbolID &RHS) {
     113             :     return LHS == RHS;
     114             :   }
     115             : };
     116             : } // namespace llvm
     117             : namespace clang {
     118             : namespace clangd {
     119             : 
     120             : // Describes the source of information about a symbol.
     121             : // Mainly useful for debugging, e.g. understanding code completion reuslts.
     122             : // This is a bitfield as information can be combined from several sources.
     123             : enum class SymbolOrigin : uint8_t {
     124             :   Unknown = 0,
     125             :   AST = 1 << 0,     // Directly from the AST (indexes should not set this).
     126             :   Dynamic = 1 << 1, // From the dynamic index of opened files.
     127             :   Static = 1 << 2,  // From the static, externally-built index.
     128             :   Merge = 1 << 3,   // A non-trivial index merge was performed.
     129             :   // Remaining bits reserved for index implementations.
     130             : };
     131             : inline SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B) {
     132             :   return static_cast<SymbolOrigin>(static_cast<uint8_t>(A) |
     133             :                                    static_cast<uint8_t>(B));
     134             : }
     135             : inline SymbolOrigin &operator|=(SymbolOrigin &A, SymbolOrigin B) {
     136             :   return A = A | B;
     137             : }
     138             : inline SymbolOrigin operator&(SymbolOrigin A, SymbolOrigin B) {
     139             :   return static_cast<SymbolOrigin>(static_cast<uint8_t>(A) &
     140             :                                    static_cast<uint8_t>(B));
     141             : }
     142             : raw_ostream &operator<<(raw_ostream &, SymbolOrigin);
     143             : 
     144             : // The class presents a C++ symbol, e.g. class, function.
     145             : //
     146             : // WARNING: Symbols do not own much of their underlying data - typically strings
     147             : // are owned by a SymbolSlab. They should be treated as non-owning references.
     148             : // Copies are shallow.
     149             : // When adding new unowned data fields to Symbol, remember to update:
     150             : //   - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
     151             : //   - mergeSymbol in Merge.cpp, to properly combine two Symbols.
     152             : //
     153             : // A fully documented symbol can be split as:
     154             : // size_type std::map<k, t>::count(const K& key) const
     155             : // | Return  |     Scope     |Name|    Signature     |
     156             : // We split up these components to allow display flexibility later.
     157           0 : struct Symbol {
     158             :   // The ID of the symbol.
     159             :   SymbolID ID;
     160             :   // The symbol information, like symbol kind.
     161             :   index::SymbolInfo SymInfo;
     162             :   // The unqualified name of the symbol, e.g. "bar" (for ns::bar).
     163             :   llvm::StringRef Name;
     164             :   // The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
     165             :   llvm::StringRef Scope;
     166             :   // The location of the symbol's definition, if one was found.
     167             :   // This just covers the symbol name (e.g. without class/function body).
     168             :   SymbolLocation Definition;
     169             :   // The location of the preferred declaration of the symbol.
     170             :   // This just covers the symbol name.
     171             :   // This may be the same as Definition.
     172             :   //
     173             :   // A C++ symbol may have multiple declarations, and we pick one to prefer.
     174             :   //   * For classes, the canonical declaration should be the definition.
     175             :   //   * For non-inline functions, the canonical declaration typically appears
     176             :   //     in the ".h" file corresponding to the definition.
     177             :   SymbolLocation CanonicalDeclaration;
     178             :   // The number of translation units that reference this symbol from their main
     179             :   // file. This number is only meaningful if aggregated in an index.
     180             :   unsigned References = 0;
     181             :   /// Whether or not this symbol is meant to be used for the code completion.
     182             :   /// See also isIndexedForCodeCompletion().
     183             :   bool IsIndexedForCodeCompletion = false;
     184             :   /// Where this symbol came from. Usually an index provides a constant value.
     185             :   SymbolOrigin Origin = SymbolOrigin::Unknown;
     186             :   /// A brief description of the symbol that can be appended in the completion
     187             :   /// candidate list. For example, "(X x, Y y) const" is a function signature.
     188             :   llvm::StringRef Signature;
     189             :   /// What to insert when completing this symbol, after the symbol name.
     190             :   /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
     191             :   /// (When snippets are disabled, the symbol name alone is used).
     192             :   llvm::StringRef CompletionSnippetSuffix;
     193             : 
     194             :   /// Optional symbol details that are not required to be set. For example, an
     195             :   /// index fuzzy match can return a large number of symbol candidates, and it
     196             :   /// is preferable to send only core symbol information in the batched results
     197             :   /// and have clients resolve full symbol information for a specific candidate
     198             :   /// if needed.
     199             :   struct Details {
     200             :     /// Documentation including comment for the symbol declaration.
     201             :     llvm::StringRef Documentation;
     202             :     /// Type when this symbol is used in an expression. (Short display form).
     203             :     /// e.g. return type of a function, or type of a variable.
     204             :     llvm::StringRef ReturnType;
     205             :     /// This can be either a URI of the header to be #include'd for this symbol,
     206             :     /// or a literal header quoted with <> or "" that is suitable to be included
     207             :     /// directly. When this is a URI, the exact #include path needs to be
     208             :     /// calculated according to the URI scheme.
     209             :     ///
     210             :     /// This is a canonical include for the symbol and can be different from
     211             :     /// FileURI in the CanonicalDeclaration.
     212             :     llvm::StringRef IncludeHeader;
     213             :   };
     214             : 
     215             :   // Optional details of the symbol.
     216             :   const Details *Detail = nullptr;
     217             : 
     218             :   // FIXME: add all occurrences support.
     219             :   // FIXME: add extra fields for index scoring signals.
     220             : };
     221             : llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
     222             : 
     223             : // Computes query-independent quality score for a Symbol.
     224             : // This currently falls in the range [1, ln(#indexed documents)].
     225             : // FIXME: this should probably be split into symbol -> signals
     226             : //        and signals -> score, so it can be reused for Sema completions.
     227             : double quality(const Symbol &S);
     228             : 
     229             : // An immutable symbol container that stores a set of symbols.
     230             : // The container will maintain the lifetime of the symbols.
     231           0 : class SymbolSlab {
     232             : public:
     233             :   using const_iterator = std::vector<Symbol>::const_iterator;
     234             :   using iterator = const_iterator;
     235             : 
     236             :   SymbolSlab() = default;
     237             : 
     238           0 :   const_iterator begin() const { return Symbols.begin(); }
     239           0 :   const_iterator end() const { return Symbols.end(); }
     240             :   const_iterator find(const SymbolID &SymID) const;
     241             : 
     242             :   size_t size() const { return Symbols.size(); }
     243             :   // Estimates the total memory usage.
     244             :   size_t bytes() const {
     245             :     return sizeof(*this) + Arena.getTotalMemory() +
     246             :            Symbols.capacity() * sizeof(Symbol);
     247             :   }
     248             : 
     249             :   // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
     250             :   // The frozen SymbolSlab will use less memory.
     251           0 :   class Builder {
     252             :   public:
     253             :     // Adds a symbol, overwriting any existing one with the same ID.
     254             :     // This is a deep copy: underlying strings will be owned by the slab.
     255             :     void insert(const Symbol &S);
     256             : 
     257             :     // Returns the symbol with an ID, if it exists. Valid until next insert().
     258           0 :     const Symbol *find(const SymbolID &ID) {
     259           0 :       auto I = SymbolIndex.find(ID);
     260           0 :       return I == SymbolIndex.end() ? nullptr : &Symbols[I->second];
     261             :     }
     262             : 
     263             :     // Consumes the builder to finalize the slab.
     264             :     SymbolSlab build() &&;
     265             : 
     266             :   private:
     267             :     llvm::BumpPtrAllocator Arena;
     268             :     // Intern table for strings. Contents are on the arena.
     269             :     llvm::DenseSet<llvm::StringRef> Strings;
     270             :     std::vector<Symbol> Symbols;
     271             :     // Values are indices into Symbols vector.
     272             :     llvm::DenseMap<SymbolID, size_t> SymbolIndex;
     273             :   };
     274             : 
     275             : private:
     276             :   SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
     277             :       : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
     278             : 
     279             :   llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
     280             :   std::vector<Symbol> Symbols;  // Sorted by SymbolID to allow lookup.
     281             : };
     282             : 
     283           0 : struct FuzzyFindRequest {
     284             :   /// \brief A query string for the fuzzy find. This is matched against symbols'
     285             :   /// un-qualified identifiers and should not contain qualifiers like "::".
     286             :   std::string Query;
     287             :   /// \brief If this is non-empty, symbols must be in at least one of the scopes
     288             :   /// (e.g. namespaces) excluding nested scopes. For example, if a scope "xyz::"
     289             :   /// is provided, the matched symbols must be defined in namespace xyz but not
     290             :   /// namespace xyz::abc.
     291             :   ///
     292             :   /// The global scope is "", a top level scope is "foo::", etc.
     293             :   std::vector<std::string> Scopes;
     294             :   /// \brief The number of top candidates to return. The index may choose to
     295             :   /// return more than this, e.g. if it doesn't know which candidates are best.
     296             :   size_t MaxCandidateCount = UINT_MAX;
     297             :   /// If set to true, only symbols for completion support will be considered.
     298             :   bool RestrictForCodeCompletion = false;
     299             :   /// Contextually relevant files (e.g. the file we're code-completing in).
     300             :   /// Paths should be absolute.
     301             :   std::vector<std::string> ProximityPaths;
     302             : };
     303             : 
     304             : struct LookupRequest {
     305             :   llvm::DenseSet<SymbolID> IDs;
     306             : };
     307             : 
     308             : /// \brief Interface for symbol indexes that can be used for searching or
     309             : /// matching symbols among a set of symbols based on names or unique IDs.
     310             : class SymbolIndex {
     311             : public:
     312             :   virtual ~SymbolIndex() = default;
     313             : 
     314             :   /// \brief Matches symbols in the index fuzzily and applies \p Callback on
     315             :   /// each matched symbol before returning.
     316             :   /// If returned Symbols are used outside Callback, they must be deep-copied!
     317             :   ///
     318             :   /// Returns true if there may be more results (limited by MaxCandidateCount).
     319             :   virtual bool
     320             :   fuzzyFind(const FuzzyFindRequest &Req,
     321             :             llvm::function_ref<void(const Symbol &)> Callback) const = 0;
     322             : 
     323             :   /// Looks up symbols with any of the given symbol IDs and applies \p Callback
     324             :   /// on each matched symbol.
     325             :   /// The returned symbol must be deep-copied if it's used outside Callback.
     326             :   virtual void
     327             :   lookup(const LookupRequest &Req,
     328             :          llvm::function_ref<void(const Symbol &)> Callback) const = 0;
     329             : 
     330             :   // FIXME: add interfaces for more index use cases:
     331             :   //  - getAllOccurrences(SymbolID);
     332             : };
     333             : 
     334             : } // namespace clangd
     335             : } // namespace clang
     336             : #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H

Generated by: LCOV version 1.13