leveldb源码阅读随笔

  1. 数据结构
    1. slice
    2. Varint
  2. Key
  3. Log

数据结构

slice

1
2
3
4
5
6
class Slice {

private:
const char* data_; // 外部存储, slice中不存储实际数据, 因为记录长度,允许值中存在'\0'
size_t size_; // 记录长度
};

Varint

变长整型, 多个字节表示一个数字, 最多用5个字节表示 每个字节最高位 1 表示后面的字节也是这个数字的一部分 如果是0结束

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
char* EncodeVarint32(char* dst, uint32_t v) {
// Operate on characters as unsigneds
uint8_t* ptr = reinterpret_cast<uint8_t*>(dst);
static const int B = 128;
if (v < (1 << 7)) {
*(ptr++) = v;
} else if (v < (1 << 14)) {
*(ptr++) = v | B;
*(ptr++) = v >> 7;
} else if (v < (1 << 21)) {
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = v >> 14;
} else if (v < (1 << 28)) {
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = v >> 21;
} else {
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = (v >> 21) | B;
*(ptr++) = v >> 28;
}
return reinterpret_cast<char*>(ptr);
}

对外提供几个接口:

1
2
3
4
5
6
7
8
9
10
11
12
void PutFixed32(std::string* dst, uint32_t value)
void PutFixed64(std::string* dst, uint64_t value)
void PutVarint32(std::string* dst, uint32_t value)
void PutVarint64(std::string* dst, uint64_t value)

bool GetVarint32(Slice* input, uint32_t* value)
bool GetVarint64(Slice* input, uint64_t* value)

void EncodeFixed32(char* dst, uint32_t value)
void EncodeFixed64(char* dst, uint64_t value)
uint32_t DecodeFixed32(const char* ptr)
uint64_t DecodeFixed64(const char* ptr)

Key

三种Key的关系:

InternalKey : 作为sstable中单条record的key

1
2
3
4
5
6
7
8
LookupKey
+-----------------------------------------------------+
| +------------------------------------------+ |
| | Internal Key | |
| Length +------------------------+----------+------+ |
| | User Key | Sequence | Type | |
| +------------------------+----------+------+ |
+-----------------------------------------------------+

Log

使用顺序写入类初始化, Log写入流程:

  1. 检查当前block剩余空间(block_size - block_offset) 是否能写入Header(checksum), 如果不足, 切到一个新block, 上一个文件剩余部分用0填充
  2. 检查record slice是否能够一次写入, 如果不能则分段存储, 使用type标记如何存储
  3. 将数据组成指定格式存到磁盘
    1. 创建一个header buffer char[7] (7字节 Header大小), 后三位写length和type, 前四位是根据record的信息生成的crc校检码
    2. header数组、recored字符串生成slice 添加到dest指针缓冲区中 flush到磁盘
    3. 移动文件内偏移
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
Status Writer::AddRecord(const Slice& slice) {
const char* ptr = slice.data();
size_t left = slice.size();

// Fragment the record if necessary and emit it. Note that if slice
// is empty, we still want to iterate once to emit a single
// zero-length record
Status s;
bool begin = true;
do {
const int leftover = kBlockSize - block_offset_;
assert(leftover >= 0);
if (leftover < kHeaderSize) {
// Switch to a new block
if (leftover > 0) {
// Fill the trailer (literal below relies on kHeaderSize being 7)
static_assert(kHeaderSize == 7, "");
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
}
block_offset_ = 0;
}

// Invariant: we never leave < kHeaderSize bytes in a block.
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);

const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
const size_t fragment_length = (left < avail) ? left : avail;

// 如果新的slice小于avail,则该slice可用整个添加到当前Block中,
// 不需要分段,此时type=kFullType
// 如果slice大于等于avail,则该slice需要分段存储,如果是第一段
// type = kFirstType,如果是最后一段type = kLastType,否则type = kMiddleType
RecordType type;
const bool end = (left == fragment_length);
if (begin && end) {
type = kFullType;
} else if (begin) {
type = kFirstType;
} else if (end) {
type = kLastType;
} else {
type = kMiddleType;
}

// 将数据组建成指定格式后存储到磁盘
s = EmitPhysicalRecord(type, ptr, fragment_length);
ptr += fragment_length;
left -= fragment_length;
begin = false;
} while (s.ok() && left > 0);
return s;
}

Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr,
size_t length) {
assert(length <= 0xffff); // Must fit in two bytes
assert(block_offset_ + kHeaderSize + length <= kBlockSize);

// Format the header
char buf[kHeaderSize];
buf[4] = static_cast<char>(length & 0xff);
buf[5] = static_cast<char>(length >> 8);
buf[6] = static_cast<char>(t);

// Compute the crc of the record type and the payload.
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, length);
crc = crc32c::Mask(crc); // Adjust for storage
EncodeFixed32(buf, crc);

// Write the header and the payload
Status s = dest_->Append(Slice(buf, kHeaderSize));
if (s.ok()) {
s = dest_->Append(Slice(ptr, length));
if (s.ok()) {
s = dest_->Flush();
}
}
block_offset_ += kHeaderSize + length;
return s;
}
script>