wip

2025-06-07 08:44:55 +00:00 · 2017-02-24 00:39:25 -08:00 · 2017-02-24 00:39:25 -08:00 · d59b7c7379
commit d59b7c7379
parent 5faf9d1f6b
6 changed files with 298 additions and 88 deletions
--- a/indexer.cpp
+++ b/indexer.cpp
@ -71,7 +71,26 @@ std::string IndexedFile::ToString() {
  return output.GetString();
 }

+IndexedTypeDef::IndexedTypeDef(TypeId id, const std::string& usr) : id(id), usr(usr) {
+  assert(usr.size() > 0);
+  //std::cout << "Creating type with usr " << usr << std::endl;
+}

+void IndexedTypeDef::AddUsage(Location loc, bool insert_if_not_present = true) {
+  if (is_system_def)
+    return;
+
+  for (int i = uses.size() - 1; i >= 0; --i) {
+    if (uses[i].IsEqualTo(loc)) {
+      if (loc.interesting)
+        uses[i].interesting = true;
+      return;
+    }
+  }
+
+  if (insert_if_not_present)
+    uses.push_back(loc);
+}


 template<typename T>
--- a/indexer.h
+++ b/indexer.h
@ -19,14 +19,13 @@
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/document.h>

-struct TypeDef;
-struct FuncDef;
-struct VarDef;
+struct IndexedTypeDef;
+struct IndexedFuncDef;
+struct IndexedVarDef;

 using FileId = int64_t;
 using namespace std::experimental;

-
 // TODO: Move off of this weird wrapper, use struct with custom wrappers
 //       directly.
 BEGIN_BITFIELD_TYPE(Location, uint64_t)
@ -145,9 +144,9 @@ bool operator==(const LocalId<T>& a, const LocalId<T>& b) {
  return a.local_id == b.local_id;
 }

-using TypeId = LocalId<TypeDef>;
-using FuncId = LocalId<FuncDef>;
-using VarId = LocalId<VarDef>;
+using TypeId = LocalId<IndexedTypeDef>;
+using FuncId = LocalId<IndexedFuncDef>;
+using VarId = LocalId<IndexedVarDef>;


 template<typename T>
@ -157,9 +156,9 @@ struct Ref {

  Ref(LocalId<T> id, Location loc) : id(id), loc(loc) {}
 };
-using TypeRef = Ref<TypeDef>;
-using FuncRef = Ref<FuncDef>;
-using VarRef = Ref<VarDef>;
+using TypeRef = Ref<IndexedTypeDef>;
+using FuncRef = Ref<IndexedFuncDef>;
+using VarRef = Ref<IndexedVarDef>;


 // TODO: skip as much forward-processing as possible when |is_system_def| is
@ -204,26 +203,8 @@ struct IndexedTypeDef {
  // NOTE: Do not insert directly! Use AddUsage instead.
  std::vector<Location> uses;

-  IndexedTypeDef(TypeId id, const std::string& usr) : id(id), usr(usr) {
-    assert(usr.size() > 0);
-    //std::cout << "Creating type with usr " << usr << std::endl;
-  }
-
-  void AddUsage(Location loc, bool insert_if_not_present = true) {
-    if (is_system_def)
-      return;
-
-    for (int i = uses.size() - 1; i >= 0; --i) {
-      if (uses[i].IsEqualTo(loc)) {
-        if (loc.interesting)
-          uses[i].interesting = true;
-        return;
-      }
-    }
-
-    if (insert_if_not_present)
-      uses.push_back(loc);
-  }
+  IndexedTypeDef(TypeId id, const std::string& usr);
+  void AddUsage(Location loc, bool insert_if_not_present = true);
 };

 struct IndexedFuncDef {
@ -322,4 +303,30 @@ struct IndexedFile {
  std::string ToString();
 };

+
+
+// TODO: Maybe instead of clearing/adding diffs, we should just clear out the
+//       entire previous index and readd the new one? That would be simpler.
+// TODO: ^^^ I don't think we can do this. It will probably stall the main
+//       indexer for far too long since we will have to iterate over tons of
+//       data.
+struct IndexedTypeDefDiff {};
+struct IndexedFuncDefDiff {};
+struct IndexedVarDefDiff {};
+
+struct IndexedFileDiff {
+  std::vector<IndexedTypeDefDiff> removed_types;
+  std::vector<IndexedFuncDefDiff> removed_funcs;
+  std::vector<IndexedVarDefDiff> removed_vars;
+
+  std::vector<IndexedTypeDefDiff> added_types;
+  std::vector<IndexedFuncDefDiff> added_funcs;
+  std::vector<IndexedVarDefDiff> added_vars;
+
+  // TODO: Instead of change, maybe we just remove and then add again? not sure.
+  std::vector<IndexedTypeDefDiff> changed_types;
+  std::vector<IndexedFuncDefDiff> changed_funcs;
+  std::vector<IndexedVarDefDiff> changed_vars;
+};
+
 IndexedFile Parse(std::string filename, std::vector<std::string> args);
--- a/query.cc
+++ b/query.cc
@ -24,6 +24,55 @@ struct SymbolIdx {
  };
 };

+template<typename T>
+struct TrackContributors {
+  std::vector<T> values;
+  std::vector<FileId> contributors;
+};
+
+// See comments in IndexedTypeDef for variable descriptions.
+struct QueryableTypeDef {
+  TypeId id;
+  std::string short_name;
+  std::string qualified_name;
+  optional<Location> definition;
+  optional<TypeId> alias_of;
+  std::vector<TypeId> parents;
+  TrackContributors<TypeId> derived;
+  std::vector<TypeId> types;
+  std::vector<FuncId> funcs;
+  std::vector<VarId> vars;
+  TrackContributors<Location> uses;
+};
+
+// See comments in IndexedFuncDef for variable descriptions.
+struct QueryableFuncDef {
+  FuncId id;
+  std::string short_name;
+  std::string qualified_name;
+  TrackContributors<Location> declarations;
+  optional<Location> definition;
+  optional<TypeId> declaring_type;
+  optional<FuncId> base;
+  TrackContributors<FuncId> derived;
+  std::vector<VarId> locals;
+  TrackContributors<FuncRef> callers;
+  std::vector<FuncRef> callees;
+  TrackContributors<Location> uses;
+};
+
+// See comments in IndexedVarDef for variable descriptions.
+struct QueryableVarDef {
+  VarId id;
+  std::string short_name;
+  std::string qualified_name;
+  TrackContributors<Location> declaration;
+  optional<Location> definition;
+  optional<TypeId> variable_type;
+  optional<TypeId> declaring_type;
+  TrackContributors<Location> uses;
+};
+
 struct QueryableFile {
  // Symbols declared in the file.
  std::vector<SymbolIdx> declared_symbols;
@ -44,9 +93,9 @@ struct QueryableDatabase {
  std::vector<SymbolIdx> symbols;

  // Raw data storage.
-  std::vector<TypeDef> types;
-  std::vector<FuncDef> funcs;
-  std::vector<VarDef> vars;
+  std::vector<QueryableTypeDef> types;
+  std::vector<QueryableFuncDef> funcs;
+  std::vector<QueryableVarDef> vars;

  // |files| is indexed by FileId. Retrieve a FileId from a path using
  // |file_locator|.
--- a/serializer.cc
+++ b/serializer.cc
@ -2,6 +2,115 @@

 #include "indexer.h"

+#if false
+template<typename T>
+void Emit(Reader& a, const char* key, T& v) {
+  static_assert(false); // Must be specialized.
+}
+template<typename T>
+void Emit(Writer& a, const char* key, T& v) {
+  static_assert(false); // Must be specialized.
+}
+
+template<>
+void Emit(Reader& r, const char* key, int& v) {
+  v = r[key].GetInt();
+}
+
+template<>
+void Emit(Writer& w, const char* key, int &v) {
+  w.Key(key);
+  w.Int(v);
+}
+
+void StartObject(Reader& r) {}
+void StartObject(Writer& w) {
+  w.StartObject();
+}
+
+void EndObject(Reader& r) {}
+void EndObject(Writer& w) {
+  w.EndObject();
+}
+
+void StartArray(Reader& r) {}
+void StartArray(Writer& w) {
+  w.StartArray();
+}
+
+void EndArray(Reader& r) {}
+void EndArray(Writer& w) {
+  w.EndArray();
+}
+
+struct Object {
+  //Location l;
+  int a = 0, b = 0, c = 0;
+};
+
+/*
+void EmitKey(Reader& r, const char* key) {
+  w.Key(key);
+}
+void EmitKey(Writer& w, const char* key) {
+  w = w[key];
+}
+*/
+
+template<typename S>
+void Serialize(S& stream, Object& obj) {
+  StartObject(stream);
+  Emit(stream, "a", obj.a);
+  Emit(stream, "b", obj.b);
+  Emit(stream, "b", obj.c);
+  EndObject(stream);
+}
+
+/*
+template <typename C, typename T>
+C& operator&(C& stream, T& t) {
+t.serialize(stream);
+}
+*/
+
+int main(int argc, char** argv) {
+
+  rapidjson::StringBuffer output;
+  rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(output);
+  writer.SetFormatOptions(
+    rapidjson::PrettyFormatOptions::kFormatSingleLineArray);
+  writer.SetIndent(' ', 2);
+
+  Object foo;
+  foo.a = 10;
+  Serialize(writer, foo);
+  std::cout << output.GetString() << std::endl;
+
+  std::cout << "----" << std::endl;
+
+  rapidjson::Document doc;
+  //doc = doc["foo"];
+  doc.Parse(output.GetString());
+  Object foo2;
+  Serialize(doc, foo2);
+
+  std::cin.get();
+  //Reader r;
+  //foo.Serialize(r);
+
+  return 0;
+}
+#endif
+
+
+
+
+
+
+
+
+
+
 void Serialize(Writer& writer, const char* key, Location location) {
  if (key) writer.Key(key);
  std::string s = location.ToString();
--- a/serializer.h
+++ b/serializer.h
@ -3,5 +3,6 @@

 struct IndexedFile;
 using Writer = rapidjson::PrettyWriter<rapidjson::StringBuffer>;
+using Reader = rapidjson::Document;

 void Serialize(Writer& writer, IndexedFile* file);
--- a/task.cc
+++ b/task.cc
@ -8,82 +8,107 @@

 #include "third_party/tiny-process-library/process.hpp"

-struct BaseTask {
+#include <queue>
+#include <mutex>
+#include <condition_variable>
+
+// A threadsafe-queue. http://stackoverflow.com/a/16075550
+template <class T>
+class SafeQueue {
+public:
+  // Add an element to the queue.
+  void enqueue(T t) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    queue_.push(t);
+    cv_.notify_one();
+  }
+
+  // Get the "front"-element.
+  // If the queue is empty, wait till a element is avaiable.
+  T dequeue() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    while (queue_.empty()) {
+      // release lock as long as the wait and reaquire it afterwards.
+      cv_.wait(lock);
+    }
+    T val = queue_.front();
+    queue_.pop();
+    return val;
+  }
+
+private:
+  std::queue<T> queue_;
+  mutable std::mutex mutex_;
+  std::condition_variable cv_;
+};
+
+struct Task {
  int priority = 0;
  bool writes_to_index = false;
-};
+  bool should_exit = false;

-// Task running in a separate process, parsing a file into something we can
-// import.
-struct IndexCreateTask : public BaseTask {
-  IndexCreateTask() {
-    writes_to_index = true;
-  }
-};
-
-// Completed parse task that wants to import content into the global database.
-// Runs in main process, primary thread. Stops all other threads.
-struct IndexImportTask : public BaseTask {
-  IndexImportTask() {
-    writes_to_index = true;
-  }
-};
-
-// Completed parse task that wants to update content previously imported into
-// the global database. Runs in main process, primary thread. Stops all other
-// threads.
-//
-// Note that this task just contains a set of operations to apply to the global
-// database. The operations come from a diff based on the previously indexed
-// state in comparison to the newly indexed state.
-//
-// TODO: We may be able to run multiple freshen and import tasks in parallel if
-//       we restrict what ranges of the db they may change.
-struct IndexFreshenTask : public BaseTask {
-  IndexFreshenTask() {
-    writes_to_index = true;
-  }
-};
-
-// Task running a query against the global database. Run in main process,
-// separate thread.
-struct QueryTask : public BaseTask {
-  QueryTask() {
-    writes_to_index = false;
+  static Task MakeExit() {
+    Task task;
+    task.should_exit = true;
+    return task;
  }

+  // TODO: Create index task.
+  // Task running in a separate process, parsing a file into something we can
+  // import.
+
+  // TODO: Index import task.
+  // Completed parse task that wants to import content into the global database.
+  // Runs in main process, primary thread. Stops all other threads.
+
+  // TODO: Index fresh task.
+  // Completed parse task that wants to update content previously imported into
+  // the global database. Runs in main process, primary thread. Stops all other
+  // threads.
+  //
+  // Note that this task just contains a set of operations to apply to the global
+  // database. The operations come from a diff based on the previously indexed
+  // state in comparison to the newly indexed state.
+  //
+  // TODO: We may be able to run multiple freshen and import tasks in parallel if
+  //       we restrict what ranges of the db they may change.
+
+  // TODO: QueryTask
+  // Task running a query against the global database. Run in main process,
+  // separate thread.
  Command query;
  Location location;
  std::string argument;
 };

-
 // NOTE: When something enters a value into master db, it will have to have a
 //       ref count, since multiple parsings could enter it (unless we require
 //       that it be defined in that declaration unit!)
 struct TaskManager {
-  // Tasks that are currently executing.
-  std::vector<BaseTask> running;
-  std::vector<BaseTask> pending;
+  SafeQueue<Task> queued_tasks;

  // Available threads.
  std::vector<std::thread> threads;
-  std::condition_variable wakeup_thread;
-  std::mutex mutex;

  TaskManager(int num_threads);
 };

-static void ThreadMain(int id, std::condition_variable* waiter, std::mutex* mutex) {
-  std::unique_lock<std::mutex> lock(*mutex);
-  waiter->wait(lock);
+static void ThreadMain(int id, TaskManager* tm) {
+  while (true) {
+    Task task = tm->queued_tasks.dequeue();
+    if (task.should_exit) {
+      std::cout << id << ": Exiting" << std::endl;
+      return;
+    }
+
+    std::cout << id << ": waking" << std::endl;
+  }

-  std::cout << id << ": running in thread main" << std::endl;
 }

 TaskManager::TaskManager(int num_threads) {
  for (int i = 0; i < num_threads; ++i) {
-    threads.push_back(std::thread(&ThreadMain, i, &wakeup_thread, &mutex));
+    threads.push_back(std::thread(&ThreadMain, i, this));
  }
 }

@ -91,8 +116,8 @@ void Pump(TaskManager* tm) {
  //tm->threads[0].
 }

-int main(int argc, char** argv) {
-  TaskManager tm(10);
+int main4(int argc, char** argv) {
+  TaskManager tm(5);

  // TODO: looks like we will have to write shared memory support.

@ -100,8 +125,8 @@ int main(int argc, char** argv) {
  //       Repeat until we encounter a writer, wait for all threads to signal
  //       they are done.
  // TODO: Let's use a thread safe queue/vector/etc instead.
-  tm.wakeup_thread.notify_one();
-  tm.wakeup_thread.notify_one();
+  for (int i = 0; i < 10; ++i)
+    tm.queued_tasks.enqueue(Task::MakeExit());

  for (std::thread& thread : tm.threads)
    thread.join();