From 8c73bbc3c721afe1e0f6f6e27b588579a857e001 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 17 Nov 2018 10:23:12 -0800
Subject: [PATCH] Use clang::isIdentifierBody and clean up utils/working_files

---
 src/clang_tu.cc         |   2 +-
 src/include_complete.cc |  13 ++++-
 src/indexer.cc          |  11 ++--
 src/pipeline.cc         |   2 +-
 src/project.cc          |   2 +-
 src/serializer.cc       |   2 +-
 src/test.cc             |  56 ++++++++++--------
 src/utils.cc            |  47 +--------------
 src/utils.hh            |  11 ----
 src/working_files.cc    | 124 +++++++---------------------------------
 src/working_files.hh    |  27 ++-------
 11 files changed, 76 insertions(+), 221 deletions(-)

diff --git a/src/clang_tu.cc b/src/clang_tu.cc
index 5a87153a..e6fe3ef9 100644
--- a/src/clang_tu.cc
+++ b/src/clang_tu.cc
@@ -32,7 +32,7 @@ std::string PathFromFileEntry(const FileEntry &file) {
   std::string ret = NormalizePath(Name);
   // Resolve /usr/include/c++/7.3.0 symlink.
   if (!llvm::any_of(g_config->workspaceFolders, [&](const std::string &root) {
-        return StartsWith(ret, root);
+        return StringRef(ret).startswith(root);
       })) {
     SmallString<256> dest;
     llvm::sys::fs::real_path(ret, dest);
diff --git a/src/include_complete.cc b/src/include_complete.cc
index fab2279b..7f293336 100644
--- a/src/include_complete.cc
+++ b/src/include_complete.cc
@@ -156,7 +156,11 @@ void IncludeComplete::InsertCompletionItem(const std::string &absolute_path,
 }
 
 void IncludeComplete::AddFile(const std::string &path) {
-  if (!EndsWithAny(path, g_config->completion.include.suffixWhitelist))
+  bool ok = false;
+  for (StringRef suffix : g_config->completion.include.suffixWhitelist)
+    if (StringRef(path).endswith(suffix))
+      ok = true;
+  if (!ok)
     return;
   if (match_ && !match_->IsMatch(path))
     return;
@@ -183,8 +187,11 @@ void IncludeComplete::InsertIncludesFromDirectory(std::string directory,
   GetFilesInFolder(
       directory, true /*recursive*/, false /*add_folder_to_path*/,
       [&](const std::string &path) {
-        if (!include_cpp &&
-            !EndsWithAny(path, g_config->completion.include.suffixWhitelist))
+        bool ok = include_cpp;
+        for (StringRef suffix : g_config->completion.include.suffixWhitelist)
+          if (StringRef(path).endswith(suffix))
+            ok = true;
+        if (!ok)
           return;
         if (match_ && !match_->IsMatch(directory + path))
           return;
diff --git a/src/indexer.cc b/src/indexer.cc
index f980ad62..203b463c 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -451,11 +451,10 @@ public:
     }
     while (ret.size() && isspace(ret.back()))
       ret.pop_back();
-    if (EndsWith(ret, "*/")) {
+    if (StringRef(ret).endswith("*/"))
       ret.resize(ret.size() - 2);
-    } else if (EndsWith(ret, "\n/")) {
+    else if (StringRef(ret).endswith("\n/"))
       ret.resize(ret.size() - 2);
-    }
     while (ret.size() && isspace(ret.back()))
       ret.pop_back();
     return ret;
@@ -521,8 +520,8 @@ public:
     }
     auto i = name.find(short_name);
     if (short_name.size())
-      while (i != std::string::npos && ((i && isalnum(name[i - 1])) ||
-                                        isalnum(name[i + short_name.size()])))
+      while (i != std::string::npos && ((i && isIdentifierBody(name[i - 1])) ||
+                                        isIdentifierBody(name[i + short_name.size()])))
         i = name.find(short_name, i + short_name.size());
     if (i == std::string::npos) {
       // e.g. operator type-parameter-1
@@ -541,7 +540,7 @@ public:
         paren++;
       else if (name[i - 1] == '(')
         paren--;
-      else if (!(paren > 0 || isalnum(name[i - 1]) || name[i - 1] == '_' ||
+      else if (!(paren > 0 || isIdentifierBody(name[i - 1]) ||
                  name[i - 1] == ':'))
         break;
     }
diff --git a/src/pipeline.cc b/src/pipeline.cc
index 5baab1c0..054584ac 100644
--- a/src/pipeline.cc
+++ b/src/pipeline.cc
@@ -141,7 +141,7 @@ std::string AppendSerializationFormat(const std::string &base) {
 
 std::string GetCachePath(const std::string &source_file) {
   for (auto &root : g_config->workspaceFolders)
-    if (StartsWith(source_file, root)) {
+    if (StringRef(source_file).startswith(root)) {
       auto len = root.size();
       return g_config->cacheDirectory +
              EscapeFileName(root.substr(0, len - 1)) + '/' +
diff --git a/src/project.cc b/src/project.cc
index 8ea71643..f899ec75 100644
--- a/src/project.cc
+++ b/src/project.cc
@@ -447,7 +447,7 @@ Project::Entry Project::FindEntry(const std::string &path,
         best_entry = &entry;
       }
     }
-    if (StartsWith(path, root))
+    if (StringRef(path).startswith(root))
       result.root = root;
   }
   if (result.root.empty())
diff --git a/src/serializer.cc b/src/serializer.cc
index 3c152ebc..97af7271 100644
--- a/src/serializer.cc
+++ b/src/serializer.cc
@@ -213,7 +213,7 @@ void Reflect(Writer &visitor, IndexInclude &value) {
   REFLECT_MEMBER(line);
   if (gTestOutputMode) {
     std::string basename = llvm::sys::path::filename(value.resolved_path);
-    if (!StartsWith(value.resolved_path, "&"))
+    if (value.resolved_path[0] != '&')
       basename = "&" + basename;
     REFLECT_MEMBER2("resolved_path", basename);
   } else {
diff --git a/src/test.cc b/src/test.cc
index 44ebe7e1..49183339 100644
--- a/src/test.cc
+++ b/src/test.cc
@@ -25,7 +25,6 @@ limitations under the License.
 
 #include <llvm/Config/llvm-config.h>
 #include <llvm/ADT/StringRef.h>
-using namespace llvm;
 
 #include <rapidjson/document.h>
 #include <rapidjson/prettywriter.h>
@@ -42,6 +41,8 @@ using namespace llvm;
 #include <unistd.h>
 #endif
 
+using namespace llvm;
+
 extern bool gTestOutputMode;
 
 namespace ccls {
@@ -90,6 +91,24 @@ void TrimInPlace(std::string &s) {
   s.erase(std::find_if(s.rbegin(), s.rend(), f).base(), s.end());
 }
 
+std::vector<std::string> SplitString(const std::string &str,
+                                     const std::string &delimiter) {
+  // http://stackoverflow.com/a/13172514
+  std::vector<std::string> strings;
+
+  std::string::size_type pos = 0;
+  std::string::size_type prev = 0;
+  while ((pos = str.find(delimiter, prev)) != std::string::npos) {
+    strings.push_back(str.substr(prev, pos - prev));
+    prev = pos + 1;
+  }
+
+  // To get the last substring (or only, if delimiter is not found)
+  strings.push_back(str.substr(prev));
+
+  return strings;
+}
+
 void ParseTestExpectation(
     const std::string &filename,
     const std::vector<std::string> &lines_with_endings, TextReplacer *replacer,
@@ -98,10 +117,10 @@ void ParseTestExpectation(
   // Scan for EXTRA_FLAGS:
   {
     bool in_output = false;
-    for (std::string line : lines_with_endings) {
-      line = StringRef(line).trim().str();
+    for (StringRef line : lines_with_endings) {
+      line = line.trim();
 
-      if (StartsWith(line, "EXTRA_FLAGS:")) {
+      if (line.startswith("EXTRA_FLAGS:")) {
         assert(!in_output && "multiple EXTRA_FLAGS sections");
         in_output = true;
         continue;
@@ -111,7 +130,7 @@ void ParseTestExpectation(
         break;
 
       if (in_output)
-        flags->push_back(line);
+        flags->push_back(line.str());
     }
   }
 
@@ -121,11 +140,11 @@ void ParseTestExpectation(
     std::string active_output_contents;
 
     bool in_output = false;
-    for (std::string line_with_ending : lines_with_endings) {
-      if (StartsWith(line_with_ending, "*/"))
+    for (StringRef line_with_ending : lines_with_endings) {
+      if (line_with_ending.startswith("*/"))
         break;
 
-      if (StartsWith(line_with_ending, "OUTPUT:")) {
+      if (line_with_ending.startswith("OUTPUT:")) {
         // Terminate the previous output section if we found a new one.
         if (in_output) {
           (*output_sections)[active_output_filename] = active_output_contents;
@@ -133,9 +152,10 @@ void ParseTestExpectation(
 
         // Try to tokenize OUTPUT: based one whitespace. If there is more than
         // one token assume it is a filename.
-        std::vector<std::string> tokens = SplitString(line_with_ending, " ");
+        SmallVector<StringRef, 2> tokens;
+        line_with_ending.split(tokens, ' ');
         if (tokens.size() > 1) {
-          active_output_filename = StringRef(tokens[1]).trim().str();
+          active_output_filename = tokens[1].str();
         } else {
           active_output_filename = filename;
         }
@@ -229,7 +249,7 @@ std::string FindExpectedOutputForFilename(
     std::string filename,
     const std::unordered_map<std::string, std::string> &expected) {
   for (const auto &entry : expected) {
-    if (EndsWith(entry.first, filename))
+    if (StringRef(entry.first).endswith(filename))
       return entry.second;
   }
 
@@ -243,7 +263,7 @@ IndexFile *
 FindDbForPathEnding(const std::string &path,
                     const std::vector<std::unique_ptr<IndexFile>> &dbs) {
   for (auto &db : dbs) {
-    if (EndsWith(db->path, path))
+    if (StringRef(db->path).endswith(path))
       return db.get();
   }
   return nullptr;
@@ -276,18 +296,6 @@ bool RunIndexTests(const std::string &filter_path, bool enable_update) {
       [&](const std::string &path) {
         bool is_fail_allowed = false;
 
-        if (EndsWithAny(path, {".m", ".mm"})) {
-#ifndef __APPLE__
-          return;
-#endif
-
-          // objective-c tests are often not updated right away. do not bring
-          // down
-          // CI if they fail.
-          if (!enable_update)
-            is_fail_allowed = true;
-        }
-
         if (path.find(filter_path) == std::string::npos)
           return;
 
diff --git a/src/utils.cc b/src/utils.cc
index 8e2eb4d6..3100c32d 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -34,7 +34,7 @@ using namespace llvm;
 #include <unordered_map>
 
 namespace ccls {
-uint64_t HashUsr(std::string_view s) {
+uint64_t HashUsr(llvm::StringRef s) {
   union {
     uint64_t ret;
     uint8_t out[8];
@@ -47,51 +47,6 @@ uint64_t HashUsr(std::string_view s) {
   return ret;
 }
 
-uint64_t HashUsr(llvm::StringRef s) {
-  return HashUsr(std::string_view(s.data(), s.size()));
-}
-
-bool EndsWith(std::string_view s, std::string_view suffix) {
-  return s.size() >= suffix.size() &&
-         std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
-}
-
-bool StartsWith(std::string_view s, std::string_view prefix) {
-  return s.size() >= prefix.size() &&
-         std::equal(prefix.begin(), prefix.end(), s.begin());
-}
-
-bool EndsWithAny(std::string_view s, const std::vector<std::string> &ss) {
-  return std::any_of(ss.begin(), ss.end(),
-                     std::bind(EndsWith, s, std::placeholders::_1));
-}
-
-bool FindAnyPartial(const std::string &value,
-                    const std::vector<std::string> &values) {
-  return std::any_of(std::begin(values), std::end(values),
-                     [&value](const std::string &v) {
-                       return value.find(v) != std::string::npos;
-                     });
-}
-
-std::vector<std::string> SplitString(const std::string &str,
-                                     const std::string &delimiter) {
-  // http://stackoverflow.com/a/13172514
-  std::vector<std::string> strings;
-
-  std::string::size_type pos = 0;
-  std::string::size_type prev = 0;
-  while ((pos = str.find(delimiter, prev)) != std::string::npos) {
-    strings.push_back(str.substr(prev, pos - prev));
-    prev = pos + 1;
-  }
-
-  // To get the last substring (or only, if delimiter is not found)
-  strings.push_back(str.substr(prev));
-
-  return strings;
-}
-
 std::string LowerPathIfInsensitive(const std::string &path) {
 #if defined(_WIN32)
   std::string ret = path;
diff --git a/src/utils.hh b/src/utils.hh
index 1bf9ed37..1c00b2a6 100644
--- a/src/utils.hh
+++ b/src/utils.hh
@@ -27,19 +27,8 @@ class StringRef;
 }
 
 namespace ccls {
-uint64_t HashUsr(std::string_view s);
 uint64_t HashUsr(llvm::StringRef s);
 
-// Returns true if |value| starts/ends with |start| or |ending|.
-bool StartsWith(std::string_view value, std::string_view start);
-bool EndsWith(std::string_view value, std::string_view ending);
-bool EndsWithAny(std::string_view s, const std::vector<std::string> &ss);
-bool FindAnyPartial(const std::string &value,
-                    const std::vector<std::string> &values);
-
-std::vector<std::string> SplitString(const std::string &str,
-                                     const std::string &delimiter);
-
 std::string LowerPathIfInsensitive(const std::string &path);
 
 // Ensures that |path| ends in a slash.
diff --git a/src/working_files.cc b/src/working_files.cc
index 6c7ac463..48004e9c 100644
--- a/src/working_files.cc
+++ b/src/working_files.cc
@@ -18,11 +18,16 @@ limitations under the License.
 #include "log.hh"
 #include "position.hh"
 
+#include <clang/Basic/CharInfo.h>
+
 #include <algorithm>
 #include <climits>
 #include <numeric>
 #include <sstream>
 
+using namespace clang;
+using namespace llvm;
+
 namespace ccls {
 namespace {
 
@@ -248,7 +253,7 @@ void WorkingFile::ComputeLineMapping() {
 
   // For index line i, set index_to_buffer[i] to -1 if line i is duplicated.
   int i = 0;
-  for (auto &line : index_lines) {
+  for (StringRef line : index_lines) {
     uint64_t h = HashUsr(line);
     auto it = hash_to_unique.find(h);
     if (it == hash_to_unique.end()) {
@@ -265,7 +270,7 @@ void WorkingFile::ComputeLineMapping() {
   // For buffer line i, set buffer_to_index[i] to -1 if line i is duplicated.
   i = 0;
   hash_to_unique.clear();
-  for (auto &line : buffer_lines) {
+  for (StringRef line : buffer_lines) {
     uint64_t h = HashUsr(line);
     auto it = hash_to_unique.find(h);
     if (it == hash_to_unique.end()) {
@@ -341,80 +346,22 @@ std::optional<int> WorkingFile::GetIndexPosFromBufferPos(int line, int *column,
                           index_lines, is_end);
 }
 
-std::string
-WorkingFile::FindClosestCallNameInBuffer(Position position,
-                                         int *active_parameter,
-                                         Position *completion_position) const {
-  *active_parameter = 0;
-
-  int offset = GetOffsetForPosition(position, buffer_content);
-
-  // If vscode auto-inserts closing ')' we will begin on ')' token in foo()
-  // which will make the below algorithm think it's a nested call.
-  if (offset > 0 && buffer_content[offset] == ')')
-    --offset;
-
-  // Scan back out of call context.
-  int balance = 0;
-  while (offset > 0) {
-    char c = buffer_content[offset];
-    if (c == ')')
-      ++balance;
-    else if (c == '(')
-      --balance;
-
-    if (balance == 0 && c == ',')
-      *active_parameter += 1;
-
-    --offset;
-
-    if (balance == -1)
-      break;
-  }
-
-  if (offset < 0)
-    return "";
-
-  // Scan back entire identifier.
-  int start_offset = offset;
-  while (offset > 0) {
-    char c = buffer_content[offset - 1];
-    if (isalnum(c) == false && c != '_')
-      break;
-    --offset;
-  }
-
-  if (completion_position)
-    *completion_position = GetPositionForOffset(buffer_content, offset);
-
-  return buffer_content.substr(offset, start_offset - offset + 1);
-}
-
 Position
 WorkingFile::FindStableCompletionSource(Position position,
                                         std::string *existing_completion,
                                         Position *replace_end_pos) const {
-  int start_offset = GetOffsetForPosition(position, buffer_content);
-  int offset = start_offset;
-
-  while (offset > 0) {
-    char c = buffer_content[offset - 1];
-    if (!isalnum(c) && c != '_')
-      break;
-    --offset;
-  }
+  int start = GetOffsetForPosition(position, buffer_content);
+  int i = start;
+  while (i > 0 && isIdentifierBody(buffer_content[i - 1]))
+    --i;
 
   *replace_end_pos = position;
-  for (int i = start_offset; i < buffer_content.size(); i++) {
-    char c = buffer_content[i];
-    if (!isalnum(c) && c != '_')
-      break;
-    // We know that replace_end_pos and position are on the same line.
+  for (int i = start;
+       i < buffer_content.size() && isIdentifierBody(buffer_content[i]); i++)
     replace_end_pos->character++;
-  }
 
-  *existing_completion = buffer_content.substr(offset, start_offset - offset);
-  return GetPositionForOffset(buffer_content, offset);
+  *existing_completion = buffer_content.substr(i, start - i);
+  return GetPositionForOffset(buffer_content, i);
 }
 
 WorkingFile *WorkingFiles::GetFileByFilename(const std::string &filename) {
@@ -439,19 +386,6 @@ std::string WorkingFiles::GetContent(const std::string &filename) {
   return "";
 }
 
-void WorkingFiles::DoAction(const std::function<void()> &action) {
-  std::lock_guard<std::mutex> lock(files_mutex);
-  action();
-}
-
-void WorkingFiles::DoActionOnFile(
-    const std::string &filename,
-    const std::function<void(WorkingFile *file)> &action) {
-  std::lock_guard<std::mutex> lock(files_mutex);
-  WorkingFile *file = GetFileByFilenameNoLock(filename);
-  action(file);
-}
-
 WorkingFile *WorkingFiles::OnOpen(const TextDocumentItem &open) {
   std::lock_guard<std::mutex> lock(files_mutex);
 
@@ -522,19 +456,6 @@ void WorkingFiles::OnClose(const TextDocumentIdentifier &close) {
                  << " because it was not open";
 }
 
-WorkingFiles::Snapshot
-WorkingFiles::AsSnapshot(const std::vector<std::string> &filter_paths) {
-  std::lock_guard<std::mutex> lock(files_mutex);
-
-  Snapshot result;
-  result.files.reserve(files.size());
-  for (const auto &file : files) {
-    if (filter_paths.empty() || FindAnyPartial(file->filename, filter_paths))
-      result.files.push_back({file->filename, file->buffer_content});
-  }
-  return result;
-}
-
 // VSCode (UTF-16) disagrees with Emacs lsp-mode (UTF-8) on how to represent
 // text documents.
 // We use a UTF-8 iterator to approximate UTF-16 in the specification (weird).
@@ -557,24 +478,19 @@ int GetOffsetForPosition(Position pos, std::string_view content) {
 
 std::string_view LexIdentifierAroundPos(Position position,
                                         std::string_view content) {
-  int start = GetOffsetForPosition(position, content);
-  int end = start + 1;
+  int start = GetOffsetForPosition(position, content), end = start + 1;
   char c;
 
   // We search for :: before the cursor but not after to get the qualifier.
   for (; start > 0; start--) {
     c = content[start - 1];
-    if (isalnum(c) || c == '_')
-      ;
-    else if (c == ':' && start > 1 && content[start - 2] == ':')
+    if (c == ':' && start > 1 && content[start - 2] == ':')
       start--;
-    else
+    else if (!isIdentifierBody(c))
       break;
   }
-
-  for (; end < (int)content.size(); end++)
-    if (c = content[end], !(isalnum(c) || c == '_'))
-      break;
+  for (; end < content.size() && isIdentifierBody(content[end]); end++)
+    ;
 
   return content.substr(start, end - start);
 }
diff --git a/src/working_files.hh b/src/working_files.hh
index f0df1c6f..95cdf957 100644
--- a/src/working_files.hh
+++ b/src/working_files.hh
@@ -62,19 +62,6 @@ struct WorkingFile {
   // Also resolves |column| if not NULL.
   std::optional<int> GetIndexPosFromBufferPos(int line, int *column,
                                               bool is_end);
-
-  // TODO: Move FindClosestCallNameInBuffer and FindStableCompletionSource into
-  // lex_utils.h/cc
-
-  // Finds the closest 'callable' name prior to position. This is used for
-  // signature help to filter code completion results.
-  //
-  // |completion_position| will be point to a good code completion location to
-  // for fetching signatures.
-  std::string
-  FindClosestCallNameInBuffer(Position position, int *active_parameter,
-                              Position *completion_position = nullptr) const;
-
   // Returns a relatively stable completion position (it jumps back until there
   // is a non-alphanumeric character).
   //
@@ -111,21 +98,15 @@ struct WorkingFiles {
   std::string GetContent(const std::string &filename);
 
   // Run |action| under the lock.
-  void DoAction(const std::function<void()> &action);
-  // Run |action| on the file identified by |filename|. This executes under the
-  // lock.
-  void DoActionOnFile(const std::string &filename,
-                      const std::function<void(WorkingFile *file)> &action);
+  template <typename Fn> void DoAction(Fn &&fn) {
+    std::lock_guard<std::mutex> lock(files_mutex);
+    fn();
+  }
 
   WorkingFile *OnOpen(const TextDocumentItem &open);
   void OnChange(const TextDocumentDidChangeParam &change);
   void OnClose(const TextDocumentIdentifier &close);
 
-  // If |filter_paths| is non-empty, only files which contain any of the given
-  // strings. For example, {"foo", "bar"} means that every result has either the
-  // string "foo" or "bar" contained within it.
-  Snapshot AsSnapshot(const std::vector<std::string> &filter_paths);
-
   // Use unique_ptrs so we can handout WorkingFile ptrs and not have them
   // invalidated if we resize files.
   std::vector<std::unique_ptr<WorkingFile>> files;