Skip to content

Commit 2924f29

Browse files
committed
perf: optimize data key value arrangement and improve cache hit rate under the same table data
1 parent 7043e74 commit 2924f29

File tree

1 file changed

+42
-39
lines changed

1 file changed

+42
-39
lines changed

src/storage/table_codec.rs

Lines changed: 42 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ use crate::types::tuple::{Tuple, TupleId};
66
const COLUMNS_MIN: u8 = 0;
77
const COLUMNS_MAX: u8 = 1;
88

9+
const TUPLE_ID_LEN: usize = 16;
10+
const COLUMNS_ID_LEN: usize = 10;
11+
912
#[derive(Clone)]
1013
pub struct TableCodec {
1114
pub table: TableCatalog
@@ -15,10 +18,10 @@ impl TableCodec {
1518
pub fn tuple_bound(&self) -> (Vec<u8>, Vec<u8>) {
1619
let op = |bound_id| {
1720
format!(
18-
"Data_{}_{:0width$}",
21+
"{}_Data_{:0width$}",
1922
self.table.name,
2023
bound_id,
21-
width = std::mem::size_of::<i64>() * 2 + 3
24+
width = TUPLE_ID_LEN
2225
)
2326
};
2427

@@ -28,7 +31,7 @@ impl TableCodec {
2831
pub fn columns_bound(name: &String) -> (Vec<u8>, Vec<u8>) {
2932
let op = |bound_id| {
3033
format!(
31-
"Catalog_{}_{}",
34+
"{}_Catalog_{}",
3235
name,
3336
bound_id
3437
)
@@ -37,7 +40,7 @@ impl TableCodec {
3740
(op(COLUMNS_MIN).into_bytes(), op(COLUMNS_MAX).into_bytes())
3841
}
3942

40-
/// Key: Data_TableName_RowID(Sorted)
43+
/// Key: TableName_Data_RowID(Sorted)
4144
/// Value: Tuple
4245
pub fn encode_tuple(&self, tuple: &Tuple) -> (Bytes, Bytes) {
4346
let key = self.encode_tuple_key(&tuple.id.unwrap());
@@ -47,10 +50,10 @@ impl TableCodec {
4750

4851
pub fn encode_tuple_key(&self, tuple_id: &TupleId) -> Vec<u8> {
4952
format!(
50-
"Data_{}_{:0width$}",
53+
"{}_Data_{:0width$}",
5154
self.table.name,
5255
tuple_id,
53-
width = std::mem::size_of::<i64>() * 2 + 3
56+
width = TUPLE_ID_LEN
5457
).into_bytes()
5558
}
5659

@@ -68,7 +71,7 @@ impl TableCodec {
6871
}))
6972
}
7073

71-
/// Key: Catalog_TableName_0_ColumnName_ColumnId
74+
/// Key: TableName_Catalog_0_ColumnName_ColumnId
7275
/// Value: ColumnCatalog
7376
///
7477
/// Tips: the `0` for bound range
@@ -78,12 +81,12 @@ impl TableCodec {
7881
bincode::serialize(&col).ok()
7982
.map(|bytes| {
8083
let key = format!(
81-
"Catalog_{}_{}_{}_{:0width$}",
84+
"{}_Catalog_{}_{}_{:0width$}",
8285
table_name,
8386
COLUMNS_MIN,
8487
col.name,
8588
col.id,
86-
width = std::mem::size_of::<u32>() * 2 + 2
89+
width = COLUMNS_ID_LEN
8790
);
8891

8992
(Bytes::from(key.into_bytes()), Bytes::from(bytes))
@@ -93,7 +96,7 @@ impl TableCodec {
9396
pub fn decode_column(key: &[u8], bytes: &[u8]) -> Option<(TableName, ColumnCatalog)> {
9497
String::from_utf8(key.to_owned()).ok()?
9598
.split("_")
96-
.nth(1)
99+
.nth(0)
97100
.and_then(|table_name| {
98101
bincode::deserialize::<ColumnCatalog>(bytes).ok()
99102
.and_then(|col| {
@@ -110,7 +113,7 @@ mod tests {
110113
use std::sync::Arc;
111114
use itertools::Itertools;
112115
use crate::catalog::{ColumnCatalog, ColumnDesc, TableCatalog};
113-
use crate::storage::table_codec::TableCodec;
116+
use crate::storage::table_codec::{COLUMNS_ID_LEN, TableCodec, TUPLE_ID_LEN};
114117
use crate::types::LogicalType;
115118
use crate::types::tuple::Tuple;
116119
use crate::types::value::DataValue;
@@ -145,10 +148,10 @@ mod tests {
145148
assert_eq!(
146149
String::from_utf8(key.to_vec()).ok().unwrap(),
147150
format!(
148-
"Data_{}_{:0width$}",
151+
"{}_Data_{:0width$}",
149152
table_catalog.name,
150153
tuple.id.unwrap(),
151-
width = std::mem::size_of::<i64>() * 2 + 3
154+
width = TUPLE_ID_LEN
152155
)
153156
);
154157
assert_eq!(codec.decode_tuple(&key, &bytes).unwrap(), tuple)
@@ -163,11 +166,11 @@ mod tests {
163166
assert_eq!(
164167
String::from_utf8(key.to_vec()).ok().unwrap(),
165168
format!(
166-
"Catalog_{}_0_{}_{:0width$}",
169+
"{}_Catalog_0_{}_{:0width$}",
167170
table_catalog.name,
168171
col.name,
169172
col.id,
170-
width = std::mem::size_of::<u32>() * 2 + 2
173+
width = COLUMNS_ID_LEN
171174
)
172175
);
173176

@@ -184,17 +187,17 @@ mod tests {
184187
str.to_string().into_bytes()
185188
};
186189

187-
set.insert(op("Catalog_T0_0_C0_0"));
188-
set.insert(op("Catalog_T0_0_C1_1"));
189-
set.insert(op("Catalog_T0_0_C2_2"));
190+
set.insert(op("T0_Catalog_0_C0_0"));
191+
set.insert(op("T0_Catalog_0_C1_1"));
192+
set.insert(op("T0_Catalog_0_C2_2"));
190193

191-
set.insert(op("Catalog_T1_0_C0_0"));
192-
set.insert(op("Catalog_T1_0_C1_1"));
193-
set.insert(op("Catalog_T1_0_C2_2"));
194+
set.insert(op("T1_Catalog_0_C0_0"));
195+
set.insert(op("T1_Catalog_0_C1_1"));
196+
set.insert(op("T1_Catalog_0_C2_2"));
194197

195-
set.insert(op("Catalog_T2_0_C0_0"));
196-
set.insert(op("Catalog_T2_0_C1_1"));
197-
set.insert(op("Catalog_T2_0_C2_2"));
198+
set.insert(op("T2_Catalog_0_C0_0"));
199+
set.insert(op("T2_Catalog_0_C1_1"));
200+
set.insert(op("T2_Catalog_0_C2_2"));
198201

199202
let (min, max) = TableCodec::columns_bound(
200203
&Arc::new("T1".to_string())
@@ -204,9 +207,9 @@ mod tests {
204207
.range::<Vec<u8>, (Bound<&Vec<u8>>, Bound<&Vec<u8>>)>((Bound::Included(&min), Bound::Included(&max)))
205208
.collect_vec();
206209

207-
assert_eq!(String::from_utf8(vec[0].clone()).unwrap(), "Catalog_T1_0_C0_0");
208-
assert_eq!(String::from_utf8(vec[1].clone()).unwrap(), "Catalog_T1_0_C1_1");
209-
assert_eq!(String::from_utf8(vec[2].clone()).unwrap(), "Catalog_T1_0_C2_2");
210+
assert_eq!(String::from_utf8(vec[0].clone()).unwrap(), "T1_Catalog_0_C0_0");
211+
assert_eq!(String::from_utf8(vec[1].clone()).unwrap(), "T1_Catalog_0_C1_1");
212+
assert_eq!(String::from_utf8(vec[2].clone()).unwrap(), "T1_Catalog_0_C2_2");
210213
}
211214

212215
#[test]
@@ -216,17 +219,17 @@ mod tests {
216219
str.to_string().into_bytes()
217220
};
218221

219-
set.insert(op("Data_T0_0000000000000000000"));
220-
set.insert(op("Data_T0_0000000000000000001"));
221-
set.insert(op("Data_T0_0000000000000000002"));
222+
set.insert(op("T0_Data_0000000000000000000"));
223+
set.insert(op("T0_Data_0000000000000000001"));
224+
set.insert(op("T0_Data_0000000000000000002"));
222225

223-
set.insert(op("Data_T1_0000000000000000000"));
224-
set.insert(op("Data_T1_0000000000000000001"));
225-
set.insert(op("Data_T1_0000000000000000002"));
226+
set.insert(op("T1_Data_0000000000000000000"));
227+
set.insert(op("T1_Data_0000000000000000001"));
228+
set.insert(op("T1_Data_0000000000000000002"));
226229

227-
set.insert(op("Data_T2_0000000000000000000"));
228-
set.insert(op("Data_T2_0000000000000000001"));
229-
set.insert(op("Data_T2_0000000000000000002"));
230+
set.insert(op("T2_Data_0000000000000000000"));
231+
set.insert(op("T2_Data_0000000000000000001"));
232+
set.insert(op("T2_Data_0000000000000000002"));
230233

231234
let table_codec = TableCodec {
232235
table: TableCatalog::new(Arc::new("T1".to_string()), vec![]).unwrap(),
@@ -237,8 +240,8 @@ mod tests {
237240
.range::<Vec<u8>, (Bound<&Vec<u8>>, Bound<&Vec<u8>>)>((Bound::Included(&min), Bound::Included(&max)))
238241
.collect_vec();
239242

240-
assert_eq!(String::from_utf8(vec[0].clone()).unwrap(), "Data_T1_0000000000000000000");
241-
assert_eq!(String::from_utf8(vec[1].clone()).unwrap(), "Data_T1_0000000000000000001");
242-
assert_eq!(String::from_utf8(vec[2].clone()).unwrap(), "Data_T1_0000000000000000002");
243+
assert_eq!(String::from_utf8(vec[0].clone()).unwrap(), "T1_Data_0000000000000000000");
244+
assert_eq!(String::from_utf8(vec[1].clone()).unwrap(), "T1_Data_0000000000000000001");
245+
assert_eq!(String::from_utf8(vec[2].clone()).unwrap(), "T1_Data_0000000000000000002");
243246
}
244247
}

0 commit comments

Comments
 (0)