Skip to content

Commit e6bfab7

Browse files
authored
Add support to stitch MongoDB frames (#1743)
Summary: This PR adds functionality to stitch MongoDB frames together. It relies on state to iterate over the order of streamIDs parsed and uses a response led matching approach. Related issues: #640 Type of change: /kind feature Test Plan: Added tests --------- Signed-off-by: Kartik Pattaswamy <kpattaswamy@pixielabs.ai>
1 parent 18340f4 commit e6bfab7

File tree

5 files changed

+621
-1
lines changed

5 files changed

+621
-1
lines changed

src/stirling/source_connectors/socket_tracer/protocols/mongodb/BUILD.bazel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,9 @@ pl_cc_test(
4646
srcs = ["parse_test.cc"],
4747
deps = [":cc_library"],
4848
)
49+
50+
pl_cc_test(
51+
name = "stitcher_test",
52+
srcs = ["stitcher_test.cc"],
53+
deps = [":cc_library"],
54+
)
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/*
2+
* Copyright 2018- The Pixie Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
#include "src/stirling/source_connectors/socket_tracer/protocols/mongodb/stitcher.h"
20+
21+
#include <string>
22+
#include <utility>
23+
#include <variant>
24+
25+
#include <absl/container/flat_hash_map.h>
26+
#include <absl/strings/str_replace.h>
27+
#include "src/common/base/base.h"
28+
29+
#include "src/stirling/source_connectors/socket_tracer/protocols/common/interface.h"
30+
#include "src/stirling/source_connectors/socket_tracer/protocols/mongodb/types.h"
31+
#include "src/stirling/utils/binary_decoder.h"
32+
33+
namespace px {
34+
namespace stirling {
35+
namespace protocols {
36+
namespace mongodb {
37+
38+
void FindMoreToComeResponses(
39+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, int* error_count,
40+
mongodb::Frame* resp_frame, uint64_t* latest_resp_ts) {
41+
// In a more to come message, the response frame's responseTo will be the requestID of the prior
42+
// response frame.
43+
44+
// Find and insert all of the more to come frame(s) section data to the head response frame.
45+
auto curr_resp = resp_frame;
46+
47+
while (curr_resp->more_to_come) {
48+
// Find the next response's deque.
49+
auto next_resp_deque_it = resps->find(curr_resp->request_id);
50+
if (next_resp_deque_it == resps->end()) {
51+
VLOG(1) << absl::Substitute(
52+
"Did not find a response deque extending the prior more to come response. "
53+
"requestID: $0",
54+
curr_resp->request_id);
55+
(*error_count)++;
56+
return;
57+
}
58+
59+
// Response deque containing the next more to come response frame.
60+
auto& next_resp_deque = next_resp_deque_it->second;
61+
62+
// Find the next response frame from the deque with a timestamp just greater than the
63+
// current response frame's timestamp.
64+
auto next_resp_it = std::upper_bound(
65+
next_resp_deque.begin(), next_resp_deque.end(), *latest_resp_ts,
66+
[](const uint64_t ts, const mongodb::Frame& frame) { return ts < frame.timestamp_ns; });
67+
if (next_resp_it->timestamp_ns < *latest_resp_ts) {
68+
VLOG(1) << absl::Substitute(
69+
"Did not find a response extending the prior more to come response. RequestID: $0",
70+
curr_resp->request_id);
71+
(*error_count)++;
72+
return;
73+
}
74+
75+
// Insert the next response's section data to the head of the more to come response.
76+
mongodb::Frame& next_resp = *next_resp_it;
77+
resp_frame->sections.insert(std::end(resp_frame->sections), std::begin(next_resp.sections),
78+
std::end(next_resp.sections));
79+
next_resp.consumed = true;
80+
*latest_resp_ts = next_resp.timestamp_ns;
81+
curr_resp = &next_resp;
82+
}
83+
84+
// TODO(kpattaswamy): In the case of "missing" more to come middle/tail frames, determine whether
85+
// they are truly missing or have not been parsed yet.
86+
}
87+
88+
void FlattenSections(mongodb::Frame* frame) {
89+
// Flatten the vector of sections containing vector of documents into a single string.
90+
for (const auto& section : frame->sections) {
91+
for (const auto& doc : section.documents) {
92+
frame->frame_body.append(doc).append(" ");
93+
}
94+
}
95+
frame->sections.clear();
96+
}
97+
98+
RecordsWithErrorCount<mongodb::Record> StitchFrames(
99+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* reqs,
100+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, State* state) {
101+
std::vector<mongodb::Record> records;
102+
int error_count = 0;
103+
104+
for (auto& stream_id_pair : state->stream_order) {
105+
auto stream_id = stream_id_pair.first;
106+
107+
// Find the stream ID's response deque.
108+
auto resp_it = resps->find(stream_id);
109+
if (resp_it == resps->end()) {
110+
VLOG(1) << absl::Substitute("Did not find a response deque with the stream ID: $0",
111+
stream_id);
112+
continue;
113+
}
114+
115+
// Response deque for the stream ID.
116+
auto& resp_deque = resp_it->second;
117+
118+
// Find the stream ID's request deque.
119+
auto req_it = reqs->find(stream_id);
120+
// The request deque should exist in the reqs map since the state contained the stream ID.
121+
CTX_DCHECK(req_it != reqs->end());
122+
123+
// Request deque for the stream ID.
124+
auto& req_deque = req_it->second;
125+
126+
// Track the latest response timestamp to compare against request frame's timestamp later.
127+
uint64_t latest_resp_ts = 0;
128+
129+
// Stitch the first frame in the response deque with the corresponding request frame.
130+
for (auto& resp_frame : resp_deque) {
131+
if (resp_frame.consumed) {
132+
continue;
133+
}
134+
135+
latest_resp_ts = resp_frame.timestamp_ns;
136+
137+
// Find the corresponding request frame for the head response frame.
138+
auto req_frame_it = std::upper_bound(
139+
req_deque.begin(), req_deque.end(), latest_resp_ts,
140+
[](const uint64_t ts, const mongodb::Frame& frame) { return ts < frame.timestamp_ns; });
141+
142+
if (req_frame_it != req_deque.begin()) {
143+
--req_frame_it;
144+
}
145+
146+
if (req_frame_it->timestamp_ns > latest_resp_ts) {
147+
VLOG(1) << absl::Substitute(
148+
"Did not find a request frame that is earlier than the response. Response's "
149+
"responseTo: $0",
150+
resp_frame.response_to);
151+
resp_frame.consumed = true;
152+
error_count++;
153+
break;
154+
}
155+
156+
mongodb::Frame& req_frame = *req_frame_it;
157+
158+
FindMoreToComeResponses(resps, &error_count, &resp_frame, &latest_resp_ts);
159+
160+
// Stitch the request/response and add it to the records.
161+
req_frame.consumed = true;
162+
resp_frame.consumed = true;
163+
FlattenSections(&req_frame);
164+
FlattenSections(&resp_frame);
165+
records.push_back({std::move(req_frame), std::move(resp_frame)});
166+
break;
167+
}
168+
169+
auto erase_until_iter = req_deque.begin();
170+
while (erase_until_iter != req_deque.end() &&
171+
(erase_until_iter->consumed || erase_until_iter->timestamp_ns < latest_resp_ts)) {
172+
if (!erase_until_iter->consumed) {
173+
error_count++;
174+
}
175+
++erase_until_iter;
176+
}
177+
178+
req_deque.erase(req_deque.begin(), erase_until_iter);
179+
stream_id_pair.second = true;
180+
}
181+
182+
// Clear the response deques.
183+
for (auto it = resps->begin(); it != resps->end(); it++) {
184+
auto& resp_deque = it->second;
185+
for (auto& resp : resp_deque) {
186+
if (!resp.consumed) {
187+
error_count++;
188+
}
189+
}
190+
resp_deque.clear();
191+
}
192+
193+
// Clear the state.
194+
auto it = state->stream_order.begin();
195+
while (it != state->stream_order.end()) {
196+
if (it->second) {
197+
it = state->stream_order.erase(it);
198+
} else {
199+
it++;
200+
}
201+
}
202+
203+
return {records, error_count};
204+
}
205+
206+
} // namespace mongodb
207+
} // namespace protocols
208+
} // namespace stirling
209+
} // namespace px
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright 2018- The Pixie Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
#pragma once
20+
21+
#include <absl/container/flat_hash_map.h>
22+
#include <deque>
23+
#include <utility>
24+
#include <vector>
25+
26+
#include "src/common/base/base.h"
27+
#include "src/stirling/source_connectors/socket_tracer/protocols/common/interface.h"
28+
#include "src/stirling/source_connectors/socket_tracer/protocols/mongodb/types.h"
29+
30+
namespace px {
31+
namespace stirling {
32+
namespace protocols {
33+
namespace mongodb {
34+
35+
void FindMoreToComeResponses(
36+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, int* error_count,
37+
mongodb::Frame* resp_frame, uint64_t* latest_resp_ts);
38+
39+
void FlattenSections(mongodb::Frame* frame);
40+
41+
RecordsWithErrorCount<mongodb::Record> StitchFrames(
42+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* reqs,
43+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, State* state);
44+
} // namespace mongodb
45+
46+
template <>
47+
inline RecordsWithErrorCount<mongodb::Record> StitchFrames(
48+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* reqs,
49+
absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps,
50+
mongodb::StateWrapper* state) {
51+
return mongodb::StitchFrames(reqs, resps, &state->global);
52+
}
53+
54+
} // namespace protocols
55+
} // namespace stirling
56+
} // namespace px

0 commit comments

Comments
 (0)