From 79df82d4a169ac6ebd71988ae5b5185ba1f0b4a3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 14 Apr 2018 16:48:56 -0700 Subject: [PATCH] MessagePack -> custom binary format --- .gitmodules | 3 - CMakeLists.txt | 3 +- src/cache_manager.cc | 4 +- src/config.h | 8 +-- src/file_consumer.cc | 4 +- src/serializer.cc | 101 +++++++++++++---------------- src/serializer.h | 26 +++++--- src/serializers/binary.h | 131 ++++++++++++++++++++++++++++++++++++++ src/serializers/json.h | 12 ++-- src/serializers/msgpack.h | 84 ------------------------ third_party/msgpack-c | 1 - 11 files changed, 208 insertions(+), 169 deletions(-) create mode 100644 src/serializers/binary.h delete mode 100644 src/serializers/msgpack.h delete mode 160000 third_party/msgpack-c diff --git a/.gitmodules b/.gitmodules index e2c0733d..27ff39f0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,6 +10,3 @@ [submodule "third_party/loguru"] path = third_party/loguru url = https://github.com/emilk/loguru -[submodule "third_party/msgpack-c"] - path = third_party/msgpack-c - url = https://github.com/msgpack/msgpack-c diff --git a/CMakeLists.txt b/CMakeLists.txt index 36085bd0..f5183c7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,8 +135,7 @@ target_include_directories(ccls PRIVATE third_party/rapidjson/include third_party/sparsepp third_party/loguru - third_party/doctest - third_party/msgpack-c/include) + third_party/doctest) ### Install diff --git a/src/cache_manager.cc b/src/cache_manager.cc index 19e7bb59..beb9186b 100644 --- a/src/cache_manager.cc +++ b/src/cache_manager.cc @@ -59,10 +59,10 @@ struct RealCacheManager : ICacheManager { std::string AppendSerializationFormat(const std::string& base) { switch (g_config->cacheFormat) { + case SerializeFormat::Binary: + return base + ".blob"; case SerializeFormat::Json: return base + ".json"; - case SerializeFormat::MessagePack: - return base + ".mpack"; } } }; diff --git a/src/config.h b/src/config.h index d4cfe163..c0d3774a 100644 --- a/src/config.h +++ b/src/config.h @@ -38,12 +38,10 @@ struct Config { // "json" generates `cacheDirectory/.../xxx.json` files which can be pretty // printed with jq. // - // "msgpack" uses a compact binary serialization format (the underlying wire - // format is [MessagePack](https://msgpack.org/index.html)) which typically - // takes only 60% of the corresponding JSON size, but is difficult to inspect. - // msgpack does not store map keys and you need to re-index whenever a struct + // "binary" uses a compact binary serialization format. + // It is not schema-aware and you need to re-index whenever a struct // member has changed. - SerializeFormat cacheFormat = SerializeFormat::MessagePack; + SerializeFormat cacheFormat = SerializeFormat::Binary; // Value to use for clang -resource-dir if not present in // compile_commands.json. // diff --git a/src/file_consumer.cc b/src/file_consumer.cc index a85c666f..796bb9f0 100644 --- a/src/file_consumer.cc +++ b/src/file_consumer.cc @@ -83,8 +83,8 @@ IndexFile* FileConsumer::TryConsumeFile( if (clang_getFileUniqueID(file, &file_id) != 0) { std::string file_name = FileName(file); if (!file_name.empty()) { - LOG_S(ERROR) << "Could not get unique file id for " << file_name - << " when parsing " << parse_file_; + // LOG_S(ERROR) << "Could not get unique file id for " << file_name + // << " when parsing " << parse_file_; } return nullptr; } diff --git a/src/serializer.cc b/src/serializer.cc index 978e2550..2531c20e 100644 --- a/src/serializer.cc +++ b/src/serializer.cc @@ -1,8 +1,8 @@ #include "serializer.h" #include "filesystem.hh" +#include "serializers/binary.h" #include "serializers/json.h" -#include "serializers/msgpack.h" #include "indexer.h" @@ -15,12 +15,10 @@ bool gTestOutputMode = false; //// Elementary types void Reflect(Reader& visitor, uint8_t& value) { - if (!visitor.IsInt()) - throw std::invalid_argument("uint8_t"); - value = (uint8_t)visitor.GetInt(); + value = visitor.GetUInt8(); } void Reflect(Writer& visitor, uint8_t& value) { - visitor.Int(value); + visitor.UInt8(value); } void Reflect(Reader& visitor, short& value) { @@ -51,12 +49,12 @@ void Reflect(Writer& visitor, int& value) { } void Reflect(Reader& visitor, unsigned& value) { - if (!visitor.IsUint64()) + if (!visitor.IsUInt64()) throw std::invalid_argument("unsigned"); - value = visitor.GetUint32(); + value = visitor.GetUInt32(); } void Reflect(Writer& visitor, unsigned& value) { - visitor.Uint32(value); + visitor.UInt32(value); } void Reflect(Reader& visitor, long& value) { @@ -69,12 +67,12 @@ void Reflect(Writer& visitor, long& value) { } void Reflect(Reader& visitor, unsigned long& value) { - if (!visitor.IsUint64()) + if (!visitor.IsUInt64()) throw std::invalid_argument("unsigned long"); - value = (unsigned long)visitor.GetUint64(); + value = (unsigned long)visitor.GetUInt64(); } void Reflect(Writer& visitor, unsigned long& value) { - visitor.Uint64(value); + visitor.UInt64(value); } void Reflect(Reader& visitor, long long& value) { @@ -87,12 +85,12 @@ void Reflect(Writer& visitor, long long& value) { } void Reflect(Reader& visitor, unsigned long long& value) { - if (!visitor.IsUint64()) + if (!visitor.IsUInt64()) throw std::invalid_argument("unsigned long long"); - value = visitor.GetUint64(); + value = visitor.GetUInt64(); } void Reflect(Writer& visitor, unsigned long long& value) { - visitor.Uint64(value); + visitor.UInt64(value); } void Reflect(Reader& visitor, double& value) { @@ -302,6 +300,7 @@ void Reflect(TVisitor& visitor, IndexFile& value) { } void Reflect(Reader& visitor, std::monostate&) { + assert(visitor.Format() == SerializeFormat::Json); visitor.GetNull(); } @@ -311,22 +310,31 @@ void Reflect(Writer& visitor, std::monostate&) { void Reflect(Reader& visitor, SerializeFormat& value) { std::string fmt = visitor.GetString(); - value = fmt[0] == 'm' ? SerializeFormat::MessagePack : SerializeFormat::Json; + value = fmt[0] == 'b' ? SerializeFormat::Binary : SerializeFormat::Json; } void Reflect(Writer& visitor, SerializeFormat& value) { switch (value) { + case SerializeFormat::Binary: + visitor.String("binary"); + break; case SerializeFormat::Json: visitor.String("json"); break; - case SerializeFormat::MessagePack: - visitor.String("msgpack"); - break; } } std::string Serialize(SerializeFormat format, IndexFile& file) { switch (format) { + case SerializeFormat::Binary: { + BinaryWriter writer; + int major = IndexFile::kMajorVersion; + int minor = IndexFile::kMinorVersion; + Reflect(writer, major); + Reflect(writer, minor); + Reflect(writer, file); + return writer.Take(); + } case SerializeFormat::Json: { rapidjson::StringBuffer output; rapidjson::PrettyWriter writer(output); @@ -343,17 +351,6 @@ std::string Serialize(SerializeFormat format, IndexFile& file) { Reflect(json_writer, file); return output.GetString(); } - case SerializeFormat::MessagePack: { - msgpack::sbuffer buf; - msgpack::packer pk(&buf); - MessagePackWriter msgpack_writer(&pk); - uint64_t magic = IndexFile::kMajorVersion; - int version = IndexFile::kMinorVersion; - Reflect(msgpack_writer, magic); - Reflect(msgpack_writer, version); - Reflect(msgpack_writer, file); - return std::string(buf.data(), buf.size()); - } } return ""; } @@ -369,6 +366,26 @@ std::unique_ptr Deserialize( std::unique_ptr file; switch (format) { + case SerializeFormat::Binary: { + try { + int major, minor; + if (serialized_index_content.size() < 8) + throw std::invalid_argument("Invalid"); + BinaryReader reader(serialized_index_content); + Reflect(reader, major); + Reflect(reader, minor); + if (major != IndexFile::kMajorVersion || + minor != IndexFile::kMinorVersion) + throw std::invalid_argument("Invalid version"); + file = std::make_unique(path, file_content); + Reflect(reader, *file); + } catch (std::invalid_argument& e) { + LOG_S(INFO) << "Failed to deserialize '" << path + << "': " << e.what(); + return nullptr; + } + break; + } case SerializeFormat::Json: { rapidjson::Document reader; if (gTestOutputMode || !expected_version) { @@ -395,32 +412,6 @@ std::unique_ptr Deserialize( } break; } - - case SerializeFormat::MessagePack: { - try { - int major, minor; - if (serialized_index_content.size() < 8) - throw std::invalid_argument("Invalid"); - msgpack::unpacker upk; - upk.reserve_buffer(serialized_index_content.size()); - memcpy(upk.buffer(), serialized_index_content.data(), - serialized_index_content.size()); - upk.buffer_consumed(serialized_index_content.size()); - file = std::make_unique(path, file_content); - MessagePackReader reader(&upk); - Reflect(reader, major); - Reflect(reader, minor); - if (major != IndexFile::kMajorVersion || - minor != IndexFile::kMinorVersion) - throw std::invalid_argument("Invalid version"); - Reflect(reader, *file); - } catch (std::invalid_argument& e) { - LOG_S(INFO) << "Failed to deserialize msgpack '" << path - << "': " << e.what(); - return nullptr; - } - break; - } } // Restore non-serialized state. diff --git a/src/serializer.h b/src/serializer.h index d7a6ed9e..edb35e1f 100644 --- a/src/serializer.h +++ b/src/serializer.h @@ -16,7 +16,7 @@ #include #include -enum class SerializeFormat { Json, MessagePack }; +enum class SerializeFormat { Binary, Json }; class Reader { public: @@ -27,16 +27,17 @@ class Reader { virtual bool IsNull() = 0; virtual bool IsInt() = 0; virtual bool IsInt64() = 0; - virtual bool IsUint64() = 0; + virtual bool IsUInt64() = 0; virtual bool IsDouble() = 0; virtual bool IsString() = 0; virtual void GetNull() = 0; virtual bool GetBool() = 0; + virtual uint8_t GetUInt8() = 0; virtual int GetInt() = 0; - virtual uint32_t GetUint32() = 0; + virtual uint32_t GetUInt32() = 0; virtual int64_t GetInt64() = 0; - virtual uint64_t GetUint64() = 0; + virtual uint64_t GetUInt64() = 0; virtual double GetDouble() = 0; virtual std::string GetString() = 0; @@ -55,9 +56,10 @@ class Writer { virtual void Null() = 0; virtual void Bool(bool x) = 0; virtual void Int(int x) = 0; - virtual void Uint32(uint32_t x) = 0; virtual void Int64(int64_t x) = 0; - virtual void Uint64(uint64_t x) = 0; + virtual void UInt8(uint8_t x) = 0; + virtual void UInt32(uint32_t x) = 0; + virtual void UInt64(uint64_t x) = 0; virtual void Double(double x) = 0; virtual void String(const char* x) = 0; virtual void String(const char* x, size_t len) = 0; @@ -192,9 +194,11 @@ void Reflect(Reader& visitor, std::optional& value) { } template void Reflect(Writer& visitor, std::optional& value) { - if (value) + if (value) { + if (visitor.Format() != SerializeFormat::Json) + visitor.UInt8(1); Reflect(visitor, *value); - else + } else visitor.Null(); } @@ -211,9 +215,11 @@ void Reflect(Reader& visitor, Maybe& value) { } template void Reflect(Writer& visitor, Maybe& value) { - if (value) + if (value) { + if (visitor.Format() != SerializeFormat::Json) + visitor.UInt8(1); Reflect(visitor, *value); - else + } else visitor.Null(); } diff --git a/src/serializers/binary.h b/src/serializers/binary.h new file mode 100644 index 00000000..ad6d64fe --- /dev/null +++ b/src/serializers/binary.h @@ -0,0 +1,131 @@ +#pragma once + +#include "serializer.h" + +#include + +class BinaryReader : public Reader { + const char* p_; + + template + T Get() { + auto ret = *reinterpret_cast(p_); + p_ += sizeof(T); + return ret; + } + + uint64_t VarUInt() { + auto x = *reinterpret_cast(p_++); + if (x < 253) + return x; + if (x == 253) + return Get(); + if (x == 254) + return Get(); + return Get(); + } + int64_t VarInt() { + uint64_t x = VarUInt(); + return int64_t(x >> 1 ^ -(x & 1)); + } + + public: + BinaryReader(std::string_view buf) : p_(buf.data()) {} + SerializeFormat Format() const override { + return SerializeFormat::Binary; + } + + bool IsBool() override { return true; } + // Abuse how the function is called in serializer.h + bool IsNull() override { return !*p_++; } + bool IsInt() override { return true; } + bool IsInt64() override {return true;} + bool IsUInt64() override {return true;} + bool IsDouble() override {return true;}; + bool IsString() override {return true;} + + void GetNull() override {} + bool GetBool() override { return Get(); } + int GetInt() override { return VarInt(); } + int64_t GetInt64() override { return VarInt(); } + uint8_t GetUInt8() override { return Get(); } + uint32_t GetUInt32() override { return VarUInt(); } + uint64_t GetUInt64() override { return VarUInt(); } + double GetDouble() override { return Get(); } + std::string GetString() override { + if (auto n = VarUInt()) { + std::string ret(p_, n); + p_ += n; + return ret; + } + return ""; + } + + bool HasMember(const char* x) override { return true; } + std::unique_ptr operator[](const char* x) override { return {}; } + + void IterArray(std::function fn) override { + for (auto n = VarUInt(); n; n--) + fn(*this); + } + + void DoMember(const char*, std::function fn) override { + fn(*this); + } +}; + +class BinaryWriter : public Writer { + std::string buf_; + + template + void Pack(T x) { + auto i = buf_.size(); + buf_.resize(i + sizeof(x)); + *reinterpret_cast(buf_.data() + i) = x; + } + + void VarUInt(uint64_t n) { + if (n < 253) + Pack(n); + else if (n < 65536) { + Pack(253); + Pack(n); + } else if (n < 4294967296) { + Pack(254); + Pack(n); + } else { + Pack(255); + Pack(n); + } + } + void VarInt(int64_t n) { + VarUInt(uint64_t(n) << 1 ^ n >> 63); + } + + public: + SerializeFormat Format() const override { + return SerializeFormat::Binary; + } + std::string Take() { return std::move(buf_); } + + void Null() override { Pack(uint8_t(0)); } + void Bool(bool x) override { Pack(x); } + void Int(int x) override { VarInt(x); } + void Int64(int64_t x) override { VarInt(x); } + void UInt8(uint8_t x) override { Pack(x); } + void UInt32(uint32_t x) override { VarUInt(x); } + void UInt64(uint64_t x) override { VarUInt(x); } + void Double(double x) override { Pack(x); } + void String(const char* x) override { String(x, strlen(x)); } + void String(const char* x, size_t len) override { + VarUInt(len); + auto i = buf_.size(); + buf_.resize(i + len); + memcpy(buf_.data() + i, x, len); + } + void StartArray(size_t n) override { VarUInt(n); } + void EndArray() override {} + void StartObject() override {} + void EndObject() override {} + void Key(const char* name) override {} +}; diff --git a/src/serializers/json.h b/src/serializers/json.h index aa9d5f0f..d4f25f48 100644 --- a/src/serializers/json.h +++ b/src/serializers/json.h @@ -17,16 +17,17 @@ class JsonReader : public Reader { bool IsNull() override { return m_->IsNull(); } bool IsInt() override { return m_->IsInt(); } bool IsInt64() override { return m_->IsInt64(); } - bool IsUint64() override { return m_->IsUint64(); } + bool IsUInt64() override { return m_->IsUint64(); } bool IsDouble() override { return m_->IsDouble(); } bool IsString() override { return m_->IsString(); } void GetNull() override {} bool GetBool() override { return m_->GetBool(); } int GetInt() override { return m_->GetInt(); } - uint32_t GetUint32() override { return uint32_t(m_->GetUint64()); } int64_t GetInt64() override { return m_->GetInt64(); } - uint64_t GetUint64() override { return m_->GetUint64(); } + uint8_t GetUInt8() override { return uint8_t(m_->GetInt()); } + uint32_t GetUInt32() override { return uint32_t(m_->GetUint64()); } + uint64_t GetUInt64() override { return m_->GetUint64(); } double GetDouble() override { return m_->GetDouble(); } std::string GetString() override { return m_->GetString(); } @@ -83,9 +84,10 @@ class JsonWriter : public Writer { void Null() override { m_->Null(); } void Bool(bool x) override { m_->Bool(x); } void Int(int x) override { m_->Int(x); } - void Uint32(uint32_t x) override { m_->Uint64(x); } void Int64(int64_t x) override { m_->Int64(x); } - void Uint64(uint64_t x) override { m_->Uint64(x); } + void UInt8(uint8_t x) override { m_->Int(x); } + void UInt32(uint32_t x) override { m_->Uint64(x); } + void UInt64(uint64_t x) override { m_->Uint64(x); } void Double(double x) override { m_->Double(x); } void String(const char* x) override { m_->String(x); } void String(const char* x, size_t len) override { m_->String(x, len); } diff --git a/src/serializers/msgpack.h b/src/serializers/msgpack.h deleted file mode 100644 index 83ea5983..00000000 --- a/src/serializers/msgpack.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include "serializer.h" - -#include - -class MessagePackReader : public Reader { - msgpack::unpacker* pk_; - msgpack::object_handle oh_; - - template - T Get() { - T ret = oh_.get().as(); - pk_->next(oh_); - return ret; - } - - public: - MessagePackReader(msgpack::unpacker* pk) : pk_(pk) { pk->next(oh_); } - SerializeFormat Format() const override { - return SerializeFormat::MessagePack; - } - - bool IsBool() override { return oh_.get().type == msgpack::type::BOOLEAN; } - bool IsNull() override { return oh_.get().is_nil(); } - bool IsInt() override { - return oh_.get().type == msgpack::type::POSITIVE_INTEGER || - oh_.get().type == msgpack::type::NEGATIVE_INTEGER; - } - bool IsInt64() override { return IsInt(); } - bool IsUint64() override { return IsInt(); } - bool IsDouble() override { return oh_.get().type == msgpack::type::FLOAT64; }; - bool IsString() override { return oh_.get().type == msgpack::type::STR; } - - void GetNull() override { pk_->next(oh_); } - bool GetBool() override { return Get(); } - int GetInt() override { return Get(); } - uint32_t GetUint32() override { return Get(); } - int64_t GetInt64() override { return Get(); } - uint64_t GetUint64() override { return Get(); } - double GetDouble() override { return Get(); } - std::string GetString() override { return Get(); } - - bool HasMember(const char* x) override { return true; } - std::unique_ptr operator[](const char* x) override { return {}; } - - void IterArray(std::function fn) override { - size_t n = Get(); - for (size_t i = 0; i < n; i++) - fn(*this); - } - - void DoMember(const char*, std::function fn) override { - fn(*this); - } -}; - -class MessagePackWriter : public Writer { - msgpack::packer* m_; - - public: - MessagePackWriter(msgpack::packer* m) : m_(m) {} - SerializeFormat Format() const override { - return SerializeFormat::MessagePack; - } - - void Null() override { m_->pack_nil(); } - void Bool(bool x) override { m_->pack(x); } - void Int(int x) override { m_->pack(x); } - void Uint32(uint32_t x) override { m_->pack(x); } - void Int64(int64_t x) override { m_->pack(x); } - void Uint64(uint64_t x) override { m_->pack(x); } - void Double(double x) override { m_->pack(x); } - void String(const char* x) override { m_->pack(x); } - // TODO Remove std::string - void String(const char* x, size_t len) override { - m_->pack(std::string(x, len)); - } - void StartArray(size_t n) override { m_->pack(n); } - void EndArray() override {} - void StartObject() override {} - void EndObject() override {} - void Key(const char* name) override {} -}; diff --git a/third_party/msgpack-c b/third_party/msgpack-c deleted file mode 160000 index 208595b2..00000000 --- a/third_party/msgpack-c +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 208595b2620cf6260ce3d6d4cf8543f13b206449