MessagePack -> custom binary format

This commit is contained in:
Fangrui Song 2018-04-14 16:48:56 -07:00
parent 73bd987b1a
commit 79df82d4a1
11 changed files with 208 additions and 169 deletions

3
.gitmodules vendored
View File

@ -10,6 +10,3 @@
[submodule "third_party/loguru"] [submodule "third_party/loguru"]
path = third_party/loguru path = third_party/loguru
url = https://github.com/emilk/loguru url = https://github.com/emilk/loguru
[submodule "third_party/msgpack-c"]
path = third_party/msgpack-c
url = https://github.com/msgpack/msgpack-c

View File

@ -135,8 +135,7 @@ target_include_directories(ccls PRIVATE
third_party/rapidjson/include third_party/rapidjson/include
third_party/sparsepp third_party/sparsepp
third_party/loguru third_party/loguru
third_party/doctest third_party/doctest)
third_party/msgpack-c/include)
### Install ### Install

View File

@ -59,10 +59,10 @@ struct RealCacheManager : ICacheManager {
std::string AppendSerializationFormat(const std::string& base) { std::string AppendSerializationFormat(const std::string& base) {
switch (g_config->cacheFormat) { switch (g_config->cacheFormat) {
case SerializeFormat::Binary:
return base + ".blob";
case SerializeFormat::Json: case SerializeFormat::Json:
return base + ".json"; return base + ".json";
case SerializeFormat::MessagePack:
return base + ".mpack";
} }
} }
}; };

View File

@ -38,12 +38,10 @@ struct Config {
// "json" generates `cacheDirectory/.../xxx.json` files which can be pretty // "json" generates `cacheDirectory/.../xxx.json` files which can be pretty
// printed with jq. // printed with jq.
// //
// "msgpack" uses a compact binary serialization format (the underlying wire // "binary" uses a compact binary serialization format.
// format is [MessagePack](https://msgpack.org/index.html)) which typically // It is not schema-aware and you need to re-index whenever a struct
// takes only 60% of the corresponding JSON size, but is difficult to inspect.
// msgpack does not store map keys and you need to re-index whenever a struct
// member has changed. // member has changed.
SerializeFormat cacheFormat = SerializeFormat::MessagePack; SerializeFormat cacheFormat = SerializeFormat::Binary;
// Value to use for clang -resource-dir if not present in // Value to use for clang -resource-dir if not present in
// compile_commands.json. // compile_commands.json.
// //

View File

@ -83,8 +83,8 @@ IndexFile* FileConsumer::TryConsumeFile(
if (clang_getFileUniqueID(file, &file_id) != 0) { if (clang_getFileUniqueID(file, &file_id) != 0) {
std::string file_name = FileName(file); std::string file_name = FileName(file);
if (!file_name.empty()) { if (!file_name.empty()) {
LOG_S(ERROR) << "Could not get unique file id for " << file_name // LOG_S(ERROR) << "Could not get unique file id for " << file_name
<< " when parsing " << parse_file_; // << " when parsing " << parse_file_;
} }
return nullptr; return nullptr;
} }

View File

@ -1,8 +1,8 @@
#include "serializer.h" #include "serializer.h"
#include "filesystem.hh" #include "filesystem.hh"
#include "serializers/binary.h"
#include "serializers/json.h" #include "serializers/json.h"
#include "serializers/msgpack.h"
#include "indexer.h" #include "indexer.h"
@ -15,12 +15,10 @@ bool gTestOutputMode = false;
//// Elementary types //// Elementary types
void Reflect(Reader& visitor, uint8_t& value) { void Reflect(Reader& visitor, uint8_t& value) {
if (!visitor.IsInt()) value = visitor.GetUInt8();
throw std::invalid_argument("uint8_t");
value = (uint8_t)visitor.GetInt();
} }
void Reflect(Writer& visitor, uint8_t& value) { void Reflect(Writer& visitor, uint8_t& value) {
visitor.Int(value); visitor.UInt8(value);
} }
void Reflect(Reader& visitor, short& value) { void Reflect(Reader& visitor, short& value) {
@ -51,12 +49,12 @@ void Reflect(Writer& visitor, int& value) {
} }
void Reflect(Reader& visitor, unsigned& value) { void Reflect(Reader& visitor, unsigned& value) {
if (!visitor.IsUint64()) if (!visitor.IsUInt64())
throw std::invalid_argument("unsigned"); throw std::invalid_argument("unsigned");
value = visitor.GetUint32(); value = visitor.GetUInt32();
} }
void Reflect(Writer& visitor, unsigned& value) { void Reflect(Writer& visitor, unsigned& value) {
visitor.Uint32(value); visitor.UInt32(value);
} }
void Reflect(Reader& visitor, long& value) { void Reflect(Reader& visitor, long& value) {
@ -69,12 +67,12 @@ void Reflect(Writer& visitor, long& value) {
} }
void Reflect(Reader& visitor, unsigned long& value) { void Reflect(Reader& visitor, unsigned long& value) {
if (!visitor.IsUint64()) if (!visitor.IsUInt64())
throw std::invalid_argument("unsigned long"); throw std::invalid_argument("unsigned long");
value = (unsigned long)visitor.GetUint64(); value = (unsigned long)visitor.GetUInt64();
} }
void Reflect(Writer& visitor, unsigned long& value) { void Reflect(Writer& visitor, unsigned long& value) {
visitor.Uint64(value); visitor.UInt64(value);
} }
void Reflect(Reader& visitor, long long& value) { void Reflect(Reader& visitor, long long& value) {
@ -87,12 +85,12 @@ void Reflect(Writer& visitor, long long& value) {
} }
void Reflect(Reader& visitor, unsigned long long& value) { void Reflect(Reader& visitor, unsigned long long& value) {
if (!visitor.IsUint64()) if (!visitor.IsUInt64())
throw std::invalid_argument("unsigned long long"); throw std::invalid_argument("unsigned long long");
value = visitor.GetUint64(); value = visitor.GetUInt64();
} }
void Reflect(Writer& visitor, unsigned long long& value) { void Reflect(Writer& visitor, unsigned long long& value) {
visitor.Uint64(value); visitor.UInt64(value);
} }
void Reflect(Reader& visitor, double& value) { void Reflect(Reader& visitor, double& value) {
@ -302,6 +300,7 @@ void Reflect(TVisitor& visitor, IndexFile& value) {
} }
void Reflect(Reader& visitor, std::monostate&) { void Reflect(Reader& visitor, std::monostate&) {
assert(visitor.Format() == SerializeFormat::Json);
visitor.GetNull(); visitor.GetNull();
} }
@ -311,22 +310,31 @@ void Reflect(Writer& visitor, std::monostate&) {
void Reflect(Reader& visitor, SerializeFormat& value) { void Reflect(Reader& visitor, SerializeFormat& value) {
std::string fmt = visitor.GetString(); std::string fmt = visitor.GetString();
value = fmt[0] == 'm' ? SerializeFormat::MessagePack : SerializeFormat::Json; value = fmt[0] == 'b' ? SerializeFormat::Binary : SerializeFormat::Json;
} }
void Reflect(Writer& visitor, SerializeFormat& value) { void Reflect(Writer& visitor, SerializeFormat& value) {
switch (value) { switch (value) {
case SerializeFormat::Binary:
visitor.String("binary");
break;
case SerializeFormat::Json: case SerializeFormat::Json:
visitor.String("json"); visitor.String("json");
break; break;
case SerializeFormat::MessagePack:
visitor.String("msgpack");
break;
} }
} }
std::string Serialize(SerializeFormat format, IndexFile& file) { std::string Serialize(SerializeFormat format, IndexFile& file) {
switch (format) { switch (format) {
case SerializeFormat::Binary: {
BinaryWriter writer;
int major = IndexFile::kMajorVersion;
int minor = IndexFile::kMinorVersion;
Reflect(writer, major);
Reflect(writer, minor);
Reflect(writer, file);
return writer.Take();
}
case SerializeFormat::Json: { case SerializeFormat::Json: {
rapidjson::StringBuffer output; rapidjson::StringBuffer output;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(output); rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(output);
@ -343,17 +351,6 @@ std::string Serialize(SerializeFormat format, IndexFile& file) {
Reflect(json_writer, file); Reflect(json_writer, file);
return output.GetString(); return output.GetString();
} }
case SerializeFormat::MessagePack: {
msgpack::sbuffer buf;
msgpack::packer<msgpack::sbuffer> pk(&buf);
MessagePackWriter msgpack_writer(&pk);
uint64_t magic = IndexFile::kMajorVersion;
int version = IndexFile::kMinorVersion;
Reflect(msgpack_writer, magic);
Reflect(msgpack_writer, version);
Reflect(msgpack_writer, file);
return std::string(buf.data(), buf.size());
}
} }
return ""; return "";
} }
@ -369,6 +366,26 @@ std::unique_ptr<IndexFile> Deserialize(
std::unique_ptr<IndexFile> file; std::unique_ptr<IndexFile> file;
switch (format) { switch (format) {
case SerializeFormat::Binary: {
try {
int major, minor;
if (serialized_index_content.size() < 8)
throw std::invalid_argument("Invalid");
BinaryReader reader(serialized_index_content);
Reflect(reader, major);
Reflect(reader, minor);
if (major != IndexFile::kMajorVersion ||
minor != IndexFile::kMinorVersion)
throw std::invalid_argument("Invalid version");
file = std::make_unique<IndexFile>(path, file_content);
Reflect(reader, *file);
} catch (std::invalid_argument& e) {
LOG_S(INFO) << "Failed to deserialize '" << path
<< "': " << e.what();
return nullptr;
}
break;
}
case SerializeFormat::Json: { case SerializeFormat::Json: {
rapidjson::Document reader; rapidjson::Document reader;
if (gTestOutputMode || !expected_version) { if (gTestOutputMode || !expected_version) {
@ -395,32 +412,6 @@ std::unique_ptr<IndexFile> Deserialize(
} }
break; break;
} }
case SerializeFormat::MessagePack: {
try {
int major, minor;
if (serialized_index_content.size() < 8)
throw std::invalid_argument("Invalid");
msgpack::unpacker upk;
upk.reserve_buffer(serialized_index_content.size());
memcpy(upk.buffer(), serialized_index_content.data(),
serialized_index_content.size());
upk.buffer_consumed(serialized_index_content.size());
file = std::make_unique<IndexFile>(path, file_content);
MessagePackReader reader(&upk);
Reflect(reader, major);
Reflect(reader, minor);
if (major != IndexFile::kMajorVersion ||
minor != IndexFile::kMinorVersion)
throw std::invalid_argument("Invalid version");
Reflect(reader, *file);
} catch (std::invalid_argument& e) {
LOG_S(INFO) << "Failed to deserialize msgpack '" << path
<< "': " << e.what();
return nullptr;
}
break;
}
} }
// Restore non-serialized state. // Restore non-serialized state.

View File

@ -16,7 +16,7 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
enum class SerializeFormat { Json, MessagePack }; enum class SerializeFormat { Binary, Json };
class Reader { class Reader {
public: public:
@ -27,16 +27,17 @@ class Reader {
virtual bool IsNull() = 0; virtual bool IsNull() = 0;
virtual bool IsInt() = 0; virtual bool IsInt() = 0;
virtual bool IsInt64() = 0; virtual bool IsInt64() = 0;
virtual bool IsUint64() = 0; virtual bool IsUInt64() = 0;
virtual bool IsDouble() = 0; virtual bool IsDouble() = 0;
virtual bool IsString() = 0; virtual bool IsString() = 0;
virtual void GetNull() = 0; virtual void GetNull() = 0;
virtual bool GetBool() = 0; virtual bool GetBool() = 0;
virtual uint8_t GetUInt8() = 0;
virtual int GetInt() = 0; virtual int GetInt() = 0;
virtual uint32_t GetUint32() = 0; virtual uint32_t GetUInt32() = 0;
virtual int64_t GetInt64() = 0; virtual int64_t GetInt64() = 0;
virtual uint64_t GetUint64() = 0; virtual uint64_t GetUInt64() = 0;
virtual double GetDouble() = 0; virtual double GetDouble() = 0;
virtual std::string GetString() = 0; virtual std::string GetString() = 0;
@ -55,9 +56,10 @@ class Writer {
virtual void Null() = 0; virtual void Null() = 0;
virtual void Bool(bool x) = 0; virtual void Bool(bool x) = 0;
virtual void Int(int x) = 0; virtual void Int(int x) = 0;
virtual void Uint32(uint32_t x) = 0;
virtual void Int64(int64_t x) = 0; virtual void Int64(int64_t x) = 0;
virtual void Uint64(uint64_t x) = 0; virtual void UInt8(uint8_t x) = 0;
virtual void UInt32(uint32_t x) = 0;
virtual void UInt64(uint64_t x) = 0;
virtual void Double(double x) = 0; virtual void Double(double x) = 0;
virtual void String(const char* x) = 0; virtual void String(const char* x) = 0;
virtual void String(const char* x, size_t len) = 0; virtual void String(const char* x, size_t len) = 0;
@ -192,9 +194,11 @@ void Reflect(Reader& visitor, std::optional<T>& value) {
} }
template <typename T> template <typename T>
void Reflect(Writer& visitor, std::optional<T>& value) { void Reflect(Writer& visitor, std::optional<T>& value) {
if (value) if (value) {
if (visitor.Format() != SerializeFormat::Json)
visitor.UInt8(1);
Reflect(visitor, *value); Reflect(visitor, *value);
else } else
visitor.Null(); visitor.Null();
} }
@ -211,9 +215,11 @@ void Reflect(Reader& visitor, Maybe<T>& value) {
} }
template <typename T> template <typename T>
void Reflect(Writer& visitor, Maybe<T>& value) { void Reflect(Writer& visitor, Maybe<T>& value) {
if (value) if (value) {
if (visitor.Format() != SerializeFormat::Json)
visitor.UInt8(1);
Reflect(visitor, *value); Reflect(visitor, *value);
else } else
visitor.Null(); visitor.Null();
} }

131
src/serializers/binary.h Normal file
View File

@ -0,0 +1,131 @@
#pragma once
#include "serializer.h"
#include <assert.h>
class BinaryReader : public Reader {
const char* p_;
template <typename T>
T Get() {
auto ret = *reinterpret_cast<const T*>(p_);
p_ += sizeof(T);
return ret;
}
uint64_t VarUInt() {
auto x = *reinterpret_cast<const uint8_t*>(p_++);
if (x < 253)
return x;
if (x == 253)
return Get<uint16_t>();
if (x == 254)
return Get<uint32_t>();
return Get<uint64_t>();
}
int64_t VarInt() {
uint64_t x = VarUInt();
return int64_t(x >> 1 ^ -(x & 1));
}
public:
BinaryReader(std::string_view buf) : p_(buf.data()) {}
SerializeFormat Format() const override {
return SerializeFormat::Binary;
}
bool IsBool() override { return true; }
// Abuse how the function is called in serializer.h
bool IsNull() override { return !*p_++; }
bool IsInt() override { return true; }
bool IsInt64() override {return true;}
bool IsUInt64() override {return true;}
bool IsDouble() override {return true;};
bool IsString() override {return true;}
void GetNull() override {}
bool GetBool() override { return Get<bool>(); }
int GetInt() override { return VarInt(); }
int64_t GetInt64() override { return VarInt(); }
uint8_t GetUInt8() override { return Get<uint8_t>(); }
uint32_t GetUInt32() override { return VarUInt(); }
uint64_t GetUInt64() override { return VarUInt(); }
double GetDouble() override { return Get<double>(); }
std::string GetString() override {
if (auto n = VarUInt()) {
std::string ret(p_, n);
p_ += n;
return ret;
}
return "";
}
bool HasMember(const char* x) override { return true; }
std::unique_ptr<Reader> operator[](const char* x) override { return {}; }
void IterArray(std::function<void(Reader&)> fn) override {
for (auto n = VarUInt(); n; n--)
fn(*this);
}
void DoMember(const char*, std::function<void(Reader&)> fn) override {
fn(*this);
}
};
class BinaryWriter : public Writer {
std::string buf_;
template <typename T>
void Pack(T x) {
auto i = buf_.size();
buf_.resize(i + sizeof(x));
*reinterpret_cast<T*>(buf_.data() + i) = x;
}
void VarUInt(uint64_t n) {
if (n < 253)
Pack<uint8_t>(n);
else if (n < 65536) {
Pack<uint8_t>(253);
Pack<uint16_t>(n);
} else if (n < 4294967296) {
Pack<uint8_t>(254);
Pack<uint32_t>(n);
} else {
Pack<uint8_t>(255);
Pack<uint64_t>(n);
}
}
void VarInt(int64_t n) {
VarUInt(uint64_t(n) << 1 ^ n >> 63);
}
public:
SerializeFormat Format() const override {
return SerializeFormat::Binary;
}
std::string Take() { return std::move(buf_); }
void Null() override { Pack(uint8_t(0)); }
void Bool(bool x) override { Pack(x); }
void Int(int x) override { VarInt(x); }
void Int64(int64_t x) override { VarInt(x); }
void UInt8(uint8_t x) override { Pack(x); }
void UInt32(uint32_t x) override { VarUInt(x); }
void UInt64(uint64_t x) override { VarUInt(x); }
void Double(double x) override { Pack(x); }
void String(const char* x) override { String(x, strlen(x)); }
void String(const char* x, size_t len) override {
VarUInt(len);
auto i = buf_.size();
buf_.resize(i + len);
memcpy(buf_.data() + i, x, len);
}
void StartArray(size_t n) override { VarUInt(n); }
void EndArray() override {}
void StartObject() override {}
void EndObject() override {}
void Key(const char* name) override {}
};

View File

@ -17,16 +17,17 @@ class JsonReader : public Reader {
bool IsNull() override { return m_->IsNull(); } bool IsNull() override { return m_->IsNull(); }
bool IsInt() override { return m_->IsInt(); } bool IsInt() override { return m_->IsInt(); }
bool IsInt64() override { return m_->IsInt64(); } bool IsInt64() override { return m_->IsInt64(); }
bool IsUint64() override { return m_->IsUint64(); } bool IsUInt64() override { return m_->IsUint64(); }
bool IsDouble() override { return m_->IsDouble(); } bool IsDouble() override { return m_->IsDouble(); }
bool IsString() override { return m_->IsString(); } bool IsString() override { return m_->IsString(); }
void GetNull() override {} void GetNull() override {}
bool GetBool() override { return m_->GetBool(); } bool GetBool() override { return m_->GetBool(); }
int GetInt() override { return m_->GetInt(); } int GetInt() override { return m_->GetInt(); }
uint32_t GetUint32() override { return uint32_t(m_->GetUint64()); }
int64_t GetInt64() override { return m_->GetInt64(); } int64_t GetInt64() override { return m_->GetInt64(); }
uint64_t GetUint64() override { return m_->GetUint64(); } uint8_t GetUInt8() override { return uint8_t(m_->GetInt()); }
uint32_t GetUInt32() override { return uint32_t(m_->GetUint64()); }
uint64_t GetUInt64() override { return m_->GetUint64(); }
double GetDouble() override { return m_->GetDouble(); } double GetDouble() override { return m_->GetDouble(); }
std::string GetString() override { return m_->GetString(); } std::string GetString() override { return m_->GetString(); }
@ -83,9 +84,10 @@ class JsonWriter : public Writer {
void Null() override { m_->Null(); } void Null() override { m_->Null(); }
void Bool(bool x) override { m_->Bool(x); } void Bool(bool x) override { m_->Bool(x); }
void Int(int x) override { m_->Int(x); } void Int(int x) override { m_->Int(x); }
void Uint32(uint32_t x) override { m_->Uint64(x); }
void Int64(int64_t x) override { m_->Int64(x); } void Int64(int64_t x) override { m_->Int64(x); }
void Uint64(uint64_t x) override { m_->Uint64(x); } void UInt8(uint8_t x) override { m_->Int(x); }
void UInt32(uint32_t x) override { m_->Uint64(x); }
void UInt64(uint64_t x) override { m_->Uint64(x); }
void Double(double x) override { m_->Double(x); } void Double(double x) override { m_->Double(x); }
void String(const char* x) override { m_->String(x); } void String(const char* x) override { m_->String(x); }
void String(const char* x, size_t len) override { m_->String(x, len); } void String(const char* x, size_t len) override { m_->String(x, len); }

View File

@ -1,84 +0,0 @@
#pragma once
#include "serializer.h"
#include <msgpack.hpp>
class MessagePackReader : public Reader {
msgpack::unpacker* pk_;
msgpack::object_handle oh_;
template <typename T>
T Get() {
T ret = oh_.get().as<T>();
pk_->next(oh_);
return ret;
}
public:
MessagePackReader(msgpack::unpacker* pk) : pk_(pk) { pk->next(oh_); }
SerializeFormat Format() const override {
return SerializeFormat::MessagePack;
}
bool IsBool() override { return oh_.get().type == msgpack::type::BOOLEAN; }
bool IsNull() override { return oh_.get().is_nil(); }
bool IsInt() override {
return oh_.get().type == msgpack::type::POSITIVE_INTEGER ||
oh_.get().type == msgpack::type::NEGATIVE_INTEGER;
}
bool IsInt64() override { return IsInt(); }
bool IsUint64() override { return IsInt(); }
bool IsDouble() override { return oh_.get().type == msgpack::type::FLOAT64; };
bool IsString() override { return oh_.get().type == msgpack::type::STR; }
void GetNull() override { pk_->next(oh_); }
bool GetBool() override { return Get<bool>(); }
int GetInt() override { return Get<int>(); }
uint32_t GetUint32() override { return Get<uint32_t>(); }
int64_t GetInt64() override { return Get<int64_t>(); }
uint64_t GetUint64() override { return Get<uint64_t>(); }
double GetDouble() override { return Get<double>(); }
std::string GetString() override { return Get<std::string>(); }
bool HasMember(const char* x) override { return true; }
std::unique_ptr<Reader> operator[](const char* x) override { return {}; }
void IterArray(std::function<void(Reader&)> fn) override {
size_t n = Get<size_t>();
for (size_t i = 0; i < n; i++)
fn(*this);
}
void DoMember(const char*, std::function<void(Reader&)> fn) override {
fn(*this);
}
};
class MessagePackWriter : public Writer {
msgpack::packer<msgpack::sbuffer>* m_;
public:
MessagePackWriter(msgpack::packer<msgpack::sbuffer>* m) : m_(m) {}
SerializeFormat Format() const override {
return SerializeFormat::MessagePack;
}
void Null() override { m_->pack_nil(); }
void Bool(bool x) override { m_->pack(x); }
void Int(int x) override { m_->pack(x); }
void Uint32(uint32_t x) override { m_->pack(x); }
void Int64(int64_t x) override { m_->pack(x); }
void Uint64(uint64_t x) override { m_->pack(x); }
void Double(double x) override { m_->pack(x); }
void String(const char* x) override { m_->pack(x); }
// TODO Remove std::string
void String(const char* x, size_t len) override {
m_->pack(std::string(x, len));
}
void StartArray(size_t n) override { m_->pack(n); }
void EndArray() override {}
void StartObject() override {}
void EndObject() override {}
void Key(const char* name) override {}
};

@ -1 +0,0 @@
Subproject commit 208595b2620cf6260ce3d6d4cf8543f13b206449