|
| 1 | +/* |
| 2 | + * Copyright 2018- The Pixie Authors. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + * |
| 16 | + * SPDX-License-Identifier: Apache-2.0 |
| 17 | + */ |
| 18 | + |
| 19 | +#include "src/stirling/source_connectors/socket_tracer/protocols/mongodb/stitcher.h" |
| 20 | + |
| 21 | +#include <string> |
| 22 | +#include <utility> |
| 23 | +#include <variant> |
| 24 | + |
| 25 | +#include <absl/container/flat_hash_map.h> |
| 26 | +#include <absl/strings/str_replace.h> |
| 27 | +#include "src/common/base/base.h" |
| 28 | + |
| 29 | +#include "src/stirling/source_connectors/socket_tracer/protocols/common/interface.h" |
| 30 | +#include "src/stirling/source_connectors/socket_tracer/protocols/mongodb/types.h" |
| 31 | +#include "src/stirling/utils/binary_decoder.h" |
| 32 | + |
| 33 | +namespace px { |
| 34 | +namespace stirling { |
| 35 | +namespace protocols { |
| 36 | +namespace mongodb { |
| 37 | + |
| 38 | +void FindMoreToComeResponses( |
| 39 | + absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, int* error_count, |
| 40 | + mongodb::Frame* resp_frame, uint64_t* latest_resp_ts) { |
| 41 | + // In a more to come message, the response frame's responseTo will be the requestID of the prior |
| 42 | + // response frame. |
| 43 | + |
| 44 | + // Find and insert all of the more to come frame(s) section data to the head response frame. |
| 45 | + auto curr_resp = resp_frame; |
| 46 | + |
| 47 | + while (curr_resp->more_to_come) { |
| 48 | + // Find the next response's deque. |
| 49 | + auto next_resp_deque_it = resps->find(curr_resp->request_id); |
| 50 | + if (next_resp_deque_it == resps->end()) { |
| 51 | + VLOG(1) << absl::Substitute( |
| 52 | + "Did not find a response deque extending the prior more to come response. " |
| 53 | + "requestID: $0", |
| 54 | + curr_resp->request_id); |
| 55 | + (*error_count)++; |
| 56 | + return; |
| 57 | + } |
| 58 | + |
| 59 | + // Response deque containing the next more to come response frame. |
| 60 | + auto& next_resp_deque = next_resp_deque_it->second; |
| 61 | + |
| 62 | + // Find the next response frame from the deque with a timestamp just greater than the |
| 63 | + // current response frame's timestamp. |
| 64 | + auto next_resp_it = std::upper_bound( |
| 65 | + next_resp_deque.begin(), next_resp_deque.end(), *latest_resp_ts, |
| 66 | + [](const uint64_t ts, const mongodb::Frame& frame) { return ts < frame.timestamp_ns; }); |
| 67 | + if (next_resp_it->timestamp_ns < *latest_resp_ts) { |
| 68 | + VLOG(1) << absl::Substitute( |
| 69 | + "Did not find a response extending the prior more to come response. RequestID: $0", |
| 70 | + curr_resp->request_id); |
| 71 | + (*error_count)++; |
| 72 | + return; |
| 73 | + } |
| 74 | + |
| 75 | + // Insert the next response's section data to the head of the more to come response. |
| 76 | + mongodb::Frame& next_resp = *next_resp_it; |
| 77 | + resp_frame->sections.insert(std::end(resp_frame->sections), std::begin(next_resp.sections), |
| 78 | + std::end(next_resp.sections)); |
| 79 | + next_resp.consumed = true; |
| 80 | + *latest_resp_ts = next_resp.timestamp_ns; |
| 81 | + curr_resp = &next_resp; |
| 82 | + } |
| 83 | + |
| 84 | + // TODO(kpattaswamy): In the case of "missing" more to come middle/tail frames, determine whether |
| 85 | + // they are truly missing or have not been parsed yet. |
| 86 | +} |
| 87 | + |
| 88 | +void FlattenSections(mongodb::Frame* frame) { |
| 89 | + // Flatten the vector of sections containing vector of documents into a single string. |
| 90 | + for (const auto& section : frame->sections) { |
| 91 | + for (const auto& doc : section.documents) { |
| 92 | + frame->frame_body.append(doc).append(" "); |
| 93 | + } |
| 94 | + } |
| 95 | + frame->sections.clear(); |
| 96 | +} |
| 97 | + |
| 98 | +RecordsWithErrorCount<mongodb::Record> StitchFrames( |
| 99 | + absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* reqs, |
| 100 | + absl::flat_hash_map<mongodb::stream_id_t, std::deque<mongodb::Frame>>* resps, State* state) { |
| 101 | + std::vector<mongodb::Record> records; |
| 102 | + int error_count = 0; |
| 103 | + |
| 104 | + for (auto& stream_id_pair : state->stream_order) { |
| 105 | + auto stream_id = stream_id_pair.first; |
| 106 | + |
| 107 | + // Find the stream ID's response deque. |
| 108 | + auto resp_it = resps->find(stream_id); |
| 109 | + if (resp_it == resps->end()) { |
| 110 | + VLOG(1) << absl::Substitute("Did not find a response deque with the stream ID: $0", |
| 111 | + stream_id); |
| 112 | + continue; |
| 113 | + } |
| 114 | + |
| 115 | + // Response deque for the stream ID. |
| 116 | + auto& resp_deque = resp_it->second; |
| 117 | + |
| 118 | + // Find the stream ID's request deque. |
| 119 | + auto req_it = reqs->find(stream_id); |
| 120 | + // The request deque should exist in the reqs map since the state contained the stream ID. |
| 121 | + CTX_DCHECK(req_it != reqs->end()); |
| 122 | + |
| 123 | + // Request deque for the stream ID. |
| 124 | + auto& req_deque = req_it->second; |
| 125 | + |
| 126 | + // Track the latest response timestamp to compare against request frame's timestamp later. |
| 127 | + uint64_t latest_resp_ts = 0; |
| 128 | + |
| 129 | + // Stitch the first frame in the response deque with the corresponding request frame. |
| 130 | + for (auto& resp_frame : resp_deque) { |
| 131 | + if (resp_frame.consumed) { |
| 132 | + continue; |
| 133 | + } |
| 134 | + |
| 135 | + latest_resp_ts = resp_frame.timestamp_ns; |
| 136 | + |
| 137 | + // Find the corresponding request frame for the head response frame. |
| 138 | + auto req_frame_it = std::upper_bound( |
| 139 | + req_deque.begin(), req_deque.end(), latest_resp_ts, |
| 140 | + [](const uint64_t ts, const mongodb::Frame& frame) { return ts < frame.timestamp_ns; }); |
| 141 | + |
| 142 | + if (req_frame_it != req_deque.begin()) { |
| 143 | + --req_frame_it; |
| 144 | + } |
| 145 | + |
| 146 | + if (req_frame_it->timestamp_ns > latest_resp_ts) { |
| 147 | + VLOG(1) << absl::Substitute( |
| 148 | + "Did not find a request frame that is earlier than the response. Response's " |
| 149 | + "responseTo: $0", |
| 150 | + resp_frame.response_to); |
| 151 | + resp_frame.consumed = true; |
| 152 | + error_count++; |
| 153 | + break; |
| 154 | + } |
| 155 | + |
| 156 | + mongodb::Frame& req_frame = *req_frame_it; |
| 157 | + |
| 158 | + FindMoreToComeResponses(resps, &error_count, &resp_frame, &latest_resp_ts); |
| 159 | + |
| 160 | + // Stitch the request/response and add it to the records. |
| 161 | + req_frame.consumed = true; |
| 162 | + resp_frame.consumed = true; |
| 163 | + FlattenSections(&req_frame); |
| 164 | + FlattenSections(&resp_frame); |
| 165 | + records.push_back({std::move(req_frame), std::move(resp_frame)}); |
| 166 | + break; |
| 167 | + } |
| 168 | + |
| 169 | + auto erase_until_iter = req_deque.begin(); |
| 170 | + while (erase_until_iter != req_deque.end() && |
| 171 | + (erase_until_iter->consumed || erase_until_iter->timestamp_ns < latest_resp_ts)) { |
| 172 | + if (!erase_until_iter->consumed) { |
| 173 | + error_count++; |
| 174 | + } |
| 175 | + ++erase_until_iter; |
| 176 | + } |
| 177 | + |
| 178 | + req_deque.erase(req_deque.begin(), erase_until_iter); |
| 179 | + stream_id_pair.second = true; |
| 180 | + } |
| 181 | + |
| 182 | + // Clear the response deques. |
| 183 | + for (auto it = resps->begin(); it != resps->end(); it++) { |
| 184 | + auto& resp_deque = it->second; |
| 185 | + for (auto& resp : resp_deque) { |
| 186 | + if (!resp.consumed) { |
| 187 | + error_count++; |
| 188 | + } |
| 189 | + } |
| 190 | + resp_deque.clear(); |
| 191 | + } |
| 192 | + |
| 193 | + // Clear the state. |
| 194 | + auto it = state->stream_order.begin(); |
| 195 | + while (it != state->stream_order.end()) { |
| 196 | + if (it->second) { |
| 197 | + it = state->stream_order.erase(it); |
| 198 | + } else { |
| 199 | + it++; |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + return {records, error_count}; |
| 204 | +} |
| 205 | + |
| 206 | +} // namespace mongodb |
| 207 | +} // namespace protocols |
| 208 | +} // namespace stirling |
| 209 | +} // namespace px |
0 commit comments