From e7a27730572d3a73add219e8256ab7c5f7aa2d64 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Sat, 6 Dec 2025 13:53:45 -0500 Subject: [PATCH 01/11] initial implementation --- google/cloud/bigtable/CMakeLists.txt | 2 + .../bigtable/google_cloud_cpp_bigtable.bzl | 2 + ...igtable_random_two_least_used_decorator.cc | 354 ++++++++++++++++++ ...bigtable_random_two_least_used_decorator.h | 147 ++++++++ google/cloud/google_cloud_cpp_grpc_utils.bzl | 2 + .../cloud/google_cloud_cpp_grpc_utils.cmake | 3 + ...google_cloud_cpp_grpc_utils_unit_tests.bzl | 1 + google/cloud/internal/channel_pool.cc | 23 ++ google/cloud/internal/channel_pool.h | 244 ++++++++++++ google/cloud/internal/channel_pool_test.cc | 25 ++ 10 files changed, 803 insertions(+) create mode 100644 google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc create mode 100644 google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h create mode 100644 google/cloud/internal/channel_pool.cc create mode 100644 google/cloud/internal/channel_pool.h create mode 100644 google/cloud/internal/channel_pool_test.cc diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index 48436804617f3..0463bf11af9b6 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -166,6 +166,8 @@ add_library( internal/bigtable_logging_decorator.h internal/bigtable_metadata_decorator.cc internal/bigtable_metadata_decorator.h + internal/bigtable_random_two_least_used_decorator.cc + internal/bigtable_random_two_least_used_decorator.h internal/bigtable_round_robin_decorator.cc internal/bigtable_round_robin_decorator.h internal/bigtable_stub.cc diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index c5d4f47beff0b..2003b64b86b99 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -80,6 +80,7 @@ google_cloud_cpp_bigtable_hdrs = [ "internal/bigtable_channel_refresh.h", "internal/bigtable_logging_decorator.h", "internal/bigtable_metadata_decorator.h", + "internal/bigtable_random_two_least_used_decorator.h", "internal/bigtable_round_robin_decorator.h", "internal/bigtable_stub.h", "internal/bigtable_stub_factory.h", @@ -204,6 +205,7 @@ google_cloud_cpp_bigtable_srcs = [ "internal/bigtable_channel_refresh.cc", "internal/bigtable_logging_decorator.cc", "internal/bigtable_metadata_decorator.cc", + "internal/bigtable_random_two_least_used_decorator.cc", "internal/bigtable_round_robin_decorator.cc", "internal/bigtable_stub.cc", "internal/bigtable_stub_factory.cc", diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc new file mode 100644 index 0000000000000..7efead4db5ab6 --- /dev/null +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -0,0 +1,354 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Generated by the Codegen C++ plugin. +// If you make any local changes, they will be lost. +// source: google/bigtable/v2/bigtable.proto + +#include "google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h" +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace { + +template +class StreamingReadRpcTracking + : public google::cloud::internal::StreamingReadRpc { + public: + StreamingReadRpcTracking( + std::unique_ptr> child, + std::function on_destruction) + : child_(std::move(child)), on_destruction_(std::move(on_destruction)) {} + + ~StreamingReadRpcTracking() override { on_destruction_(); } + + void Cancel() override { child_->Cancel(); } + absl::optional Read(T* response) override { + return child_->Read(response); + } + RpcMetadata GetRequestMetadata() const override { + return child_->GetRequestMetadata(); + } + + private: + std::unique_ptr> child_; + std::function on_destruction_; +}; + +template +class AsyncStreamingReadRpcTracking + : public google::cloud::internal::AsyncStreamingReadRpc { + public: + AsyncStreamingReadRpcTracking( + std::unique_ptr> child, + std::function on_destruction) + : child_(std::move(child)), on_destruction_(std::move(on_destruction)) {} + + ~AsyncStreamingReadRpcTracking() override { on_destruction_(); } + + void Cancel() override { child_->Cancel(); } + future Start() override { return child_->Start(); } + future> Read() override { return child_->Read(); } + future Finish() override { return child_->Finish(); } + RpcMetadata GetRequestMetadata() const override { + return child_->GetRequestMetadata(); + } + + private: + std::unique_ptr> child_; + std::function on_destruction_; +}; + +} // namespace + +BigtableRandomTwoLeastUsed::BigtableRandomTwoLeastUsed( + CompletionQueue cq, + internal::DynamicChannelPool::StubFactoryFn factory_fn, + std::vector> children) + : pool_(internal::DynamicChannelPool::Create( + std::move(cq), std::move(children), std::move(factory_fn))) {} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::ReadRows( + std::shared_ptr context, Options const& options, + google::bigtable::v2::ReadRowsRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->ReadRows(std::move(context), options, request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique< + StreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::SampleRowKeys( + std::shared_ptr context, Options const& options, + google::bigtable::v2::SampleRowKeysRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->SampleRowKeys(std::move(context), options, request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique< + StreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +StatusOr +BigtableRandomTwoLeastUsed::MutateRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::MutateRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->MutateRow(context, options, request); + child->ReleaseStub(); + return result; +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::MutateRows( + std::shared_ptr context, Options const& options, + google::bigtable::v2::MutateRowsRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->MutateRows(std::move(context), options, request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique< + StreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +StatusOr +BigtableRandomTwoLeastUsed::CheckAndMutateRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::CheckAndMutateRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->CheckAndMutateRow(context, options, request); + child->ReleaseStub(); + return result; +} + +StatusOr +BigtableRandomTwoLeastUsed::PingAndWarm( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::PingAndWarmRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->PingAndWarm(context, options, request); + child->ReleaseStub(); + return result; +} + +StatusOr +BigtableRandomTwoLeastUsed::ReadModifyWriteRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->ReadModifyWriteRow(context, options, request); + child->ReleaseStub(); + return result; +} + +StatusOr +BigtableRandomTwoLeastUsed::PrepareQuery( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::PrepareQueryRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->PrepareQuery(context, options, request); + child->ReleaseStub(); + return result; +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::ExecuteQuery( + std::shared_ptr context, Options const& options, + google::bigtable::v2::ExecuteQueryRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->ExecuteQuery(std::move(context), options, request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique< + StreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::AsyncReadRows( + google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::ReadRowsRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = + stub->AsyncReadRows(cq, std::move(context), std::move(options), request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique< + AsyncStreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::AsyncSampleRowKeys( + google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::SampleRowKeysRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->AsyncSampleRowKeys(cq, std::move(context), + std::move(options), request); + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + return std::make_unique>(std::move(result), + std::move(release_fn)); +} + +future> +BigtableRandomTwoLeastUsed::AsyncMutateRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::MutateRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = + stub->AsyncMutateRow(cq, std::move(context), std::move(options), request); + child->ReleaseStub(); + return result; +} + +std::unique_ptr> +BigtableRandomTwoLeastUsed::AsyncMutateRows( + google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::MutateRowsRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->AsyncMutateRows(cq, std::move(context), + std::move(options), request); + + std::weak_ptr> weak = child; + auto release_fn = [weak = std::move(weak)]() { + auto child = weak.lock(); + if (child) child->ReleaseStub(); + }; + + return std::make_unique< + AsyncStreamingReadRpcTracking>( + std::move(result), std::move(release_fn)); +} + +future> +BigtableRandomTwoLeastUsed::AsyncCheckAndMutateRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::CheckAndMutateRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->AsyncCheckAndMutateRow(cq, std::move(context), + std::move(options), request); + child->ReleaseStub(); + return result; +} + +future> +BigtableRandomTwoLeastUsed::AsyncReadModifyWriteRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->AsyncReadModifyWriteRow(cq, std::move(context), + std::move(options), request); + child->ReleaseStub(); + return result; +} + +future> +BigtableRandomTwoLeastUsed::AsyncPrepareQuery( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::PrepareQueryRequest const& request) { + auto child = Child(); + auto stub = child->AcquireStub(); + auto result = stub->AsyncPrepareQuery(cq, std::move(context), + std::move(options), request); + child->ReleaseStub(); + return result; +} + +std::shared_ptr> +BigtableRandomTwoLeastUsed::Child() { + return pool_->GetChannelRandomTwoLeastUsed(); + // std::unique_lock lk(mu_); + // std::vector indices(pool_->size(lk) - 1); + // // TODO(sdhart): Maybe use iota on iterators instead of indices + // std::iota(indices.begin(), indices.end(), 0); + // std::shuffle(indices.begin(), indices.end(), rng_); + // auto channel_1 = pool_->GetChannel(lk, indices[0]); + // auto channel_2 = pool_->GetChannel(lk, indices[1]); + // + // return channel_1->outstanding_rpcs(lk) < channel_2->outstanding_rpcs(lk) + // ? channel_1 + // : channel_2; +} + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h new file mode 100644 index 0000000000000..7aec08e067d3b --- /dev/null +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -0,0 +1,147 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Generated by the Codegen C++ plugin. +// If you make any local changes, they will be lost. +// source: google/bigtable/v2/bigtable.proto + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_RANDOM_TWO_LEAST_USED_DECORATOR_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_RANDOM_TWO_LEAST_USED_DECORATOR_H + +#include "google/cloud/bigtable/internal/bigtable_stub.h" +#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/version.h" +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + +class BigtableRandomTwoLeastUsed : public BigtableStub { + public: + BigtableRandomTwoLeastUsed( + CompletionQueue cq, + internal::DynamicChannelPool::StubFactoryFn factory_fn, + std::vector> children); + ~BigtableRandomTwoLeastUsed() override = default; + + std::unique_ptr> + ReadRows(std::shared_ptr context, Options const& options, + google::bigtable::v2::ReadRowsRequest const& request) override; + + std::unique_ptr> + SampleRowKeys( + std::shared_ptr context, Options const& options, + google::bigtable::v2::SampleRowKeysRequest const& request) override; + + StatusOr MutateRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::MutateRowRequest const& request) override; + + std::unique_ptr> + MutateRows(std::shared_ptr context, + Options const& options, + google::bigtable::v2::MutateRowsRequest const& request) override; + + StatusOr CheckAndMutateRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::CheckAndMutateRowRequest const& request) override; + + StatusOr PingAndWarm( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::PingAndWarmRequest const& request) override; + + StatusOr ReadModifyWriteRow( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::ReadModifyWriteRowRequest const& request) override; + + StatusOr PrepareQuery( + grpc::ClientContext& context, Options const& options, + google::bigtable::v2::PrepareQueryRequest const& request) override; + + std::unique_ptr> + ExecuteQuery( + std::shared_ptr context, Options const& options, + google::bigtable::v2::ExecuteQueryRequest const& request) override; + + std::unique_ptr<::google::cloud::internal::AsyncStreamingReadRpc< + google::bigtable::v2::ReadRowsResponse>> + AsyncReadRows(google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::ReadRowsRequest const& request) override; + + std::unique_ptr<::google::cloud::internal::AsyncStreamingReadRpc< + google::bigtable::v2::SampleRowKeysResponse>> + AsyncSampleRowKeys( + google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::SampleRowKeysRequest const& request) override; + + future> AsyncMutateRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::MutateRowRequest const& request) override; + + std::unique_ptr<::google::cloud::internal::AsyncStreamingReadRpc< + google::bigtable::v2::MutateRowsResponse>> + AsyncMutateRows( + google::cloud::CompletionQueue const& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::MutateRowsRequest const& request) override; + + future> + AsyncCheckAndMutateRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::CheckAndMutateRowRequest const& request) override; + + future> + AsyncReadModifyWriteRow( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::ReadModifyWriteRowRequest const& request) override; + + future> + AsyncPrepareQuery( + google::cloud::CompletionQueue& cq, + std::shared_ptr context, + google::cloud::internal::ImmutableOptions options, + google::bigtable::v2::PrepareQueryRequest const& request) override; + + private: + std::shared_ptr> Child(); + + // std::mutex mu_; + std::shared_ptr> pool_; +}; + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable_internal +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_RANDOM_TWO_LEAST_USED_DECORATOR_H diff --git a/google/cloud/google_cloud_cpp_grpc_utils.bzl b/google/cloud/google_cloud_cpp_grpc_utils.bzl index d54df10510c40..c6d83ae5f3eeb 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils.bzl +++ b/google/cloud/google_cloud_cpp_grpc_utils.bzl @@ -54,6 +54,7 @@ google_cloud_cpp_grpc_utils_hdrs = [ "internal/async_streaming_write_rpc_timeout.h", "internal/async_streaming_write_rpc_tracing.h", "internal/background_threads_impl.h", + "internal/channel_pool.h", "internal/completion_queue_impl.h", "internal/debug_string_protobuf.h", "internal/debug_string_status.h", @@ -95,6 +96,7 @@ google_cloud_cpp_grpc_utils_srcs = [ "internal/async_connection_ready.cc", "internal/async_polling_loop.cc", "internal/background_threads_impl.cc", + "internal/channel_pool.cc", "internal/debug_string_protobuf.cc", "internal/debug_string_status.cc", "internal/default_completion_queue_impl.cc", diff --git a/google/cloud/google_cloud_cpp_grpc_utils.cmake b/google/cloud/google_cloud_cpp_grpc_utils.cmake index f73a7d81e09b2..f9ce47a1bf486 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils.cmake +++ b/google/cloud/google_cloud_cpp_grpc_utils.cmake @@ -61,6 +61,8 @@ add_library( internal/async_streaming_write_rpc_tracing.h internal/background_threads_impl.cc internal/background_threads_impl.h + internal/channel_pool.cc + internal/channel_pool.h internal/completion_queue_impl.h internal/debug_string_protobuf.cc internal/debug_string_protobuf.h @@ -256,6 +258,7 @@ if (BUILD_TESTING) internal/async_streaming_write_rpc_timeout_test.cc internal/async_streaming_write_rpc_tracing_test.cc internal/background_threads_impl_test.cc + internal/channel_pool_test.cc internal/debug_string_protobuf_test.cc internal/debug_string_status_test.cc internal/extract_long_running_result_test.cc diff --git a/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl b/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl index 95e6a7aa6f643..cd8a97c763b34 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl +++ b/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl @@ -42,6 +42,7 @@ google_cloud_cpp_grpc_utils_unit_tests = [ "internal/async_streaming_write_rpc_timeout_test.cc", "internal/async_streaming_write_rpc_tracing_test.cc", "internal/background_threads_impl_test.cc", + "internal/channel_pool_test.cc", "internal/debug_string_protobuf_test.cc", "internal/debug_string_status_test.cc", "internal/extract_long_running_result_test.cc", diff --git a/google/cloud/internal/channel_pool.cc b/google/cloud/internal/channel_pool.cc new file mode 100644 index 0000000000000..943a68b8a2340 --- /dev/null +++ b/google/cloud/internal/channel_pool.cc @@ -0,0 +1,23 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/version.h" + +namespace google { +namespace cloud { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace cloud +} // namespace google diff --git a/google/cloud/internal/channel_pool.h b/google/cloud/internal/channel_pool.h new file mode 100644 index 0000000000000..4e2c8b6b3deca --- /dev/null +++ b/google/cloud/internal/channel_pool.h @@ -0,0 +1,244 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H + +#include "google/cloud/completion_queue.h" +#include "google/cloud/internal/random.h" +#include "google/cloud/version.h" +#include +#include +#include +#include + +namespace google { +namespace cloud { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +// StubWrapper could maybe be a Decorator on the Stub +// May have to be a Decorator on Stub to handle accounting destructor for +// streaming +template +class StubWrapper { + public: + explicit StubWrapper(std::shared_ptr stub) + : stub_(std::move(stub)), outstanding_rpcs_(0) {} + + int outstanding_rpcs(std::unique_lock const&) const { + return outstanding_rpcs_; + } + + std::shared_ptr AcquireStub() { + std::unique_lock lk(mu_); + ++outstanding_rpcs_; + return stub_; + } + + void ReleaseStub() { + std::unique_lock lk(mu_); + --outstanding_rpcs_; + } + + private: + mutable std::mutex mu_; + std::shared_ptr stub_; + int outstanding_rpcs_; +}; + +template +class StaticChannelPool { + public: + explicit StaticChannelPool(std::size_t size); + + std::shared_ptr GetChannel(); + std::shared_ptr GetChannel(std::size_t index); + + private: + std::vector channels_; +}; + +template +class DynamicChannelPool + : public std::enable_shared_from_this> { + public: + using StubFactoryFn = std::function()>; + struct SizingPolicy { + // To avoid channel churn, the pool will not add or remove channels more + // frequently that this period. + std::chrono::milliseconds resize_cooldown_interval; + + // If the average number of outstanding RPCs is below this threshold, + // the pool size will be decreased. + std::size_t minimum_average_outstanding_rpcs_per_channel; + // If the average number of outstanding RPCs is above this threshold, + // the pool size will be increased. + std::size_t maximum_average_outstanding_rpcs_per_channel; + + // When channels are removed from the pool, we have to wait until all + // outstanding RPCs on that channel are completed before destroying it. + std::chrono::milliseconds decommissioned_channel_polling_interval; + }; + + static std::shared_ptr Create( + CompletionQueue cq, std::size_t initial_size, StubFactoryFn factory_fn, + SizingPolicy sizing_policy = {}) { + std::vector>> initial_wrapped_channels; + for (std::size_t i = 0; i < initial_size; ++i) { + initial_wrapped_channels.emplace_back(factory_fn()); + } + auto pool = std::shared_ptr(new DynamicChannelPool( + std::move(cq), std::move(initial_wrapped_channels), + std::move(factory_fn), std::move(sizing_policy))); + } + static std::shared_ptr Create( + CompletionQueue cq, std::vector> initial_channels, + StubFactoryFn factory_fn, SizingPolicy sizing_policy = {}) { + auto pool = std::shared_ptr(new DynamicChannelPool( + std::move(cq), std::move(initial_channels), std::move(factory_fn), + std::move(sizing_policy))); + return pool; + } + + // This is a snapshot aka dirty read as the size could immediately change + // after this function returns. + std::size_t size() const { + std::unique_lock lk(mu_); + return channels_.size(); + } + + // std::shared_ptr> GetChannel( + // std::unique_lock const&) { + // // TODO: check for empty + // return channels_[0]; + // } + // + // std::shared_ptr> GetChannel( + // std::unique_lock const&, std::size_t index) { + // // TODO: bounds check + // return channels_[index]; + // } + + std::shared_ptr> GetChannelRandomTwoLeastUsed() { + std::unique_lock lk(mu_); + + // TODO: check if resize is needed. + + std::vector indices(channels_.size()); + // TODO(sdhart): Maybe use iota on iterators instead of indices + std::iota(indices.begin(), indices.end(), 0); + std::shuffle(indices.begin(), indices.end(), rng_); + auto channel_1 = channels_[indices[0]]; + auto channel_2 = channels_[indices[1]]; + + return channel_1->outstanding_rpcs(lk) < channel_2->outstanding_rpcs(lk) + ? channel_1 + : channel_2; + } + + private: + DynamicChannelPool( + CompletionQueue cq, + std::vector>> initial_wrapped_channels, + StubFactoryFn factory_fn, SizingPolicy sizing_policy) + : cq_(std::move(cq)), + factory_fn_(std::move(factory_fn)), + channels_(std::move(initial_wrapped_channels)), + sizing_policy_(std::move(sizing_policy)) {} + + DynamicChannelPool(CompletionQueue cq, + std::vector> initial_channels, + StubFactoryFn factory_fn, SizingPolicy sizing_policy) + : cq_(std::move(cq)), + factory_fn_(std::move(factory_fn)), + channels_(initial_channels.size()), + sizing_policy_(std::move(sizing_policy)) { + for (auto& channel : initial_channels) { + channels_.push_back(std::make_shared>(channel)); + } + } + + void ScheduleAddChannel() {} + void AddChannel() {} + + void ScheduleRemoveChannel() { + // std::weak_ptr> weak = this->shared_from_this(); + decommission_timer_ = + cq_.MakeRelativeTimer( + sizing_policy_.decommissioned_channel_polling_interval) + .then([weak = std::weak_ptr>( + this->shared_from_this())]() { + if (weak.lock()) { + weak->RemoveChannel(); + } + }); + } + + void RemoveChannel() { + std::unique_lock lk(mu_); + std::sort(decommissioned_channels_.begin(), decommissioned_channels_.end(), + [](std::shared_ptr> const& a, + std::shared_ptr> b) { + return a->outstanding_rpcs() > b->outstanding_rpcs(); + }); + while (!decommissioned_channels_.empty()) { + if (decommissioned_channels_.back()->outstanding_rpcs() != 0) { + ScheduleRemoveChannel(); + return; + } + decommissioned_channels_.pop_back(); + } + } + + void CheckPoolChannelHealth(std::unique_lock const&) { + auto average_rpc_per_channel = + std::accumulate(channels_.begin(), channels_.end(), + [](std::shared_ptr> const& s) { + return s->outstanding_rpcs(); + }) / + channels_.size(); + if (average_rpc_per_channel < + sizing_policy_.minimum_average_outstanding_rpcs_per_channel) { + // TODO(sdhart): Is there a downside to always removing the most recently + // created channel? + decommissioned_channels_.push_back(std::move(channels_.back())); + channels_.pop_back(); + ScheduleRemoveChannel(); + } + if (average_rpc_per_channel > + sizing_policy_.maximum_average_outstanding_rpcs_per_channel) { + // Channel/stub creation is expensive, instead of making the current RPC + // wait on this, use an existing channel right now, and schedule a channel + // to be added. + ScheduleAddChannel(); + } + } + + mutable std::mutex mu_; + CompletionQueue cq_; + google::cloud::internal::DefaultPRNG rng_; + StubFactoryFn factory_fn_; + std::vector>> channels_; + SizingPolicy sizing_policy_; + std::vector>> decommissioned_channels_; + future> decommission_timer_; +}; + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H diff --git a/google/cloud/internal/channel_pool_test.cc b/google/cloud/internal/channel_pool_test.cc new file mode 100644 index 0000000000000..46137a70d01b5 --- /dev/null +++ b/google/cloud/internal/channel_pool_test.cc @@ -0,0 +1,25 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/testing_util/status_matchers.h" +#include + +namespace google { +namespace cloud { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal {} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace cloud +} // namespace google From 984599cdb2a02374dc0946fba64a7401eb7b8b23 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Sat, 6 Dec 2025 17:21:19 -0500 Subject: [PATCH 02/11] working integration tests with new stub and pool --- ...igtable_random_two_least_used_decorator.cc | 3 ++ .../cloud/bigtable/internal/bigtable_stub.cc | 2 + .../internal/bigtable_stub_factory.cc | 18 ++++++++- .../cloud/bigtable/internal/bulk_mutator.cc | 3 ++ .../bigtable/internal/data_connection_impl.cc | 6 +++ google/cloud/bigtable/table.cc | 1 + .../testing/table_integration_test.cc | 1 + .../bigtable/tests/data_integration_test.cc | 1 + google/cloud/internal/channel_pool.h | 40 ++++++++++++++----- 9 files changed, 63 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index 7efead4db5ab6..f051edea75294 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -89,6 +89,7 @@ std::unique_ptr context, Options const& options, google::bigtable::v2::ReadRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->ReadRows(std::move(context), options, request); @@ -136,6 +137,7 @@ std::unique_ptr context, Options const& options, google::bigtable::v2::MutateRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->MutateRows(std::move(context), options, request); @@ -334,6 +336,7 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( std::shared_ptr> BigtableRandomTwoLeastUsed::Child() { + std::cout << __PRETTY_FUNCTION__ << std::endl; return pool_->GetChannelRandomTwoLeastUsed(); // std::unique_lock lk(mu_); // std::vector indices(pool_->size(lk) - 1); diff --git a/google/cloud/bigtable/internal/bigtable_stub.cc b/google/cloud/bigtable/internal/bigtable_stub.cc index cb00ccb83ffe8..7d7cec8d88552 100644 --- a/google/cloud/bigtable/internal/bigtable_stub.cc +++ b/google/cloud/bigtable/internal/bigtable_stub.cc @@ -36,6 +36,7 @@ std::unique_ptr context, Options const&, google::bigtable::v2::ReadRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto stream = grpc_stub_->ReadRows(context.get(), request); return std::make_unique>(std::move(context), @@ -70,6 +71,7 @@ std::unique_ptr context, Options const&, google::bigtable::v2::MutateRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto stream = grpc_stub_->MutateRows(context.get(), request); return std::make_unique>(std::move(context), diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 9d5add4a61c44..5ab5b8c583c13 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -17,6 +17,7 @@ #include "google/cloud/bigtable/internal/bigtable_channel_refresh.h" #include "google/cloud/bigtable/internal/bigtable_logging_decorator.h" #include "google/cloud/bigtable/internal/bigtable_metadata_decorator.h" +#include "google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h" #include "google/cloud/bigtable/internal/bigtable_round_robin_decorator.h" #include "google/cloud/bigtable/internal/bigtable_tracing_stub.h" #include "google/cloud/bigtable/internal/connection_refresh_state.h" @@ -73,6 +74,18 @@ std::shared_ptr CreateBigtableStubRoundRobin( return std::make_shared(std::move(children)); } +std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( + Options const& options, CompletionQueue cq, + std::function(int)> child_factory) { + std::vector> children( + (std::max)(1, options.get())); + int id = 0; + std::generate(children.begin(), children.end(), + [&id, &child_factory] { return child_factory(id++); }); + return std::make_shared(cq, child_factory, + std::move(children)); +} + std::shared_ptr CreateDecoratedStubs( std::shared_ptr auth, CompletionQueue const& cq, Options const& options, @@ -87,7 +100,10 @@ std::shared_ptr CreateDecoratedStubs( if (refresh->enabled()) ScheduleChannelRefresh(cq_impl, refresh, channel); return base_factory(std::move(channel)); }; - auto stub = CreateBigtableStubRoundRobin(options, std::move(child_factory)); + // auto stub = CreateBigtableStubRoundRobin(options, + // std::move(child_factory)); + auto stub = CreateBigtableStubRandomTwoLeastUsed(options, cq, + std::move(child_factory)); if (refresh->enabled()) { stub = std::make_shared(std::move(stub), std::move(refresh)); diff --git a/google/cloud/bigtable/internal/bulk_mutator.cc b/google/cloud/bigtable/internal/bulk_mutator.cc index 7203d23fbd14d..481cab7fe3ce2 100644 --- a/google/cloud/bigtable/internal/bulk_mutator.cc +++ b/google/cloud/bigtable/internal/bulk_mutator.cc @@ -214,6 +214,7 @@ grpc::Status BulkMutator::MakeOneRequest(bigtable::DataClient& client, Status BulkMutator::MakeOneRequest(BigtableStub& stub, MutateRowsLimiter& limiter, Options const& options) { + std::cout << __PRETTY_FUNCTION__ << std::endl; // Send the request to the server. auto const& mutations = state_.BeforeStart(); @@ -226,8 +227,10 @@ Status BulkMutator::MakeOneRequest(BigtableStub& stub, // Potentially throttle the request limiter.Acquire(); + std::cout << __PRETTY_FUNCTION__ << ": pre-stub" << std::endl; // Read the stream of responses. auto stream = stub.MutateRows(client_context, options, mutations); + std::cout << __PRETTY_FUNCTION__ << ": post-stub" << std::endl; absl::optional status; while (true) { btproto::MutateRowsResponse response; diff --git a/google/cloud/bigtable/internal/data_connection_impl.cc b/google/cloud/bigtable/internal/data_connection_impl.cc index 87aeeadd0f7c4..d7dc6b48b262a 100644 --- a/google/cloud/bigtable/internal/data_connection_impl.cc +++ b/google/cloud/bigtable/internal/data_connection_impl.cc @@ -102,6 +102,7 @@ bigtable::RowReader ReadRowsHelper( params, // NOLINT(performance-unnecessary-value-param) std::shared_ptr operation_context) { // NOLINT(performance-unnecessary-value-param) + std::cout << __PRETTY_FUNCTION__ << std::endl; auto impl = std::make_shared( stub, std::move(params.app_profile_id), std::move(params.table_name), std::move(params.row_set), params.rows_limit, std::move(params.filter), @@ -338,6 +339,7 @@ future DataConnectionImpl::AsyncApply(std::string const& table_name, std::vector DataConnectionImpl::BulkApply( std::string const& table_name, bigtable::BulkMutation mut) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto current = google::cloud::internal::SaveCurrentOptions(); if (mut.empty()) return {}; auto operation_context = operation_context_factory_->MutateRows( @@ -350,6 +352,7 @@ std::vector DataConnectionImpl::BulkApply( std::unique_ptr retry; std::unique_ptr backoff; Status status; + std::cout << __PRETTY_FUNCTION__ << ": pre-loop" << std::endl; while (true) { status = mutator.MakeOneRequest(*stub_, *limiter_, *current); if (!mutator.HasPendingMutations()) break; @@ -361,6 +364,7 @@ std::vector DataConnectionImpl::BulkApply( if (!delay) break; std::this_thread::sleep_for(*delay); } + std::cout << __PRETTY_FUNCTION__ << ": post-loop" << std::endl; operation_context->OnDone(status); return std::move(mutator).OnRetryDone(); } @@ -380,6 +384,7 @@ DataConnectionImpl::AsyncBulkApply(std::string const& table_name, bigtable::RowReader DataConnectionImpl::ReadRowsFull( bigtable::ReadRowsParams params) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto current = google::cloud::internal::SaveCurrentOptions(); auto operation_context = operation_context_factory_->ReadRows( params.table_name, params.app_profile_id); @@ -660,6 +665,7 @@ void DataConnectionImpl::AsyncReadRows( std::function(bigtable::Row)> on_row, std::function on_finish, bigtable::RowSet row_set, std::int64_t rows_limit, bigtable::Filter filter) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto current = google::cloud::internal::SaveCurrentOptions(); auto operation_context = operation_context_factory_->ReadRows( table_name, app_profile_id(*current)); diff --git a/google/cloud/bigtable/table.cc b/google/cloud/bigtable/table.cc index 14b96ef6715d3..0bba7b934d7a5 100644 --- a/google/cloud/bigtable/table.cc +++ b/google/cloud/bigtable/table.cc @@ -222,6 +222,7 @@ RowReader Table::ReadRows(RowSet row_set, Filter filter, Options opts) { RowReader Table::ReadRows(RowSet row_set, std::int64_t rows_limit, Filter filter, Options opts) { + std::cout << __PRETTY_FUNCTION__ << std::endl; if (connection_) { OptionsSpan span(MergeOptions(std::move(opts), options_)); return connection_->ReadRows(table_name_, std::move(row_set), rows_limit, diff --git a/google/cloud/bigtable/testing/table_integration_test.cc b/google/cloud/bigtable/testing/table_integration_test.cc index d9f8156437f7b..0cfabf9c990f0 100644 --- a/google/cloud/bigtable/testing/table_integration_test.cc +++ b/google/cloud/bigtable/testing/table_integration_test.cc @@ -122,6 +122,7 @@ void TableAdminTestEnvironment::TearDown() { } void TableIntegrationTest::SetUp() { + std::cout << __PRETTY_FUNCTION__ << std::endl; data_connection_ = MakeDataConnection(); data_client_ = bigtable::MakeDataClient(TableTestEnvironment::project_id(), TableTestEnvironment::instance_id()); diff --git a/google/cloud/bigtable/tests/data_integration_test.cc b/google/cloud/bigtable/tests/data_integration_test.cc index 9a835f61176e3..f81fc1472b4de 100644 --- a/google/cloud/bigtable/tests/data_integration_test.cc +++ b/google/cloud/bigtable/tests/data_integration_test.cc @@ -206,6 +206,7 @@ TEST_P(DataIntegrationTest, TableReadRowNotExistTest) { } TEST_P(DataIntegrationTest, TableReadRowsAllRows) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto table = GetTable(GetParam()); std::string const row_key1 = "row-key-1"; std::string const row_key2 = "row-key-2"; diff --git a/google/cloud/internal/channel_pool.h b/google/cloud/internal/channel_pool.h index 4e2c8b6b3deca..a4c74e17b4c7e 100644 --- a/google/cloud/internal/channel_pool.h +++ b/google/cloud/internal/channel_pool.h @@ -74,27 +74,30 @@ template class DynamicChannelPool : public std::enable_shared_from_this> { public: - using StubFactoryFn = std::function()>; + using StubFactoryFn = std::function(int id)>; struct SizingPolicy { // To avoid channel churn, the pool will not add or remove channels more // frequently that this period. - std::chrono::milliseconds resize_cooldown_interval; + std::chrono::milliseconds resize_cooldown_interval = + std::chrono::milliseconds(60 * 1000); // If the average number of outstanding RPCs is below this threshold, // the pool size will be decreased. - std::size_t minimum_average_outstanding_rpcs_per_channel; + std::size_t minimum_average_outstanding_rpcs_per_channel = 20; // If the average number of outstanding RPCs is above this threshold, // the pool size will be increased. - std::size_t maximum_average_outstanding_rpcs_per_channel; + std::size_t maximum_average_outstanding_rpcs_per_channel = 80; // When channels are removed from the pool, we have to wait until all // outstanding RPCs on that channel are completed before destroying it. - std::chrono::milliseconds decommissioned_channel_polling_interval; + std::chrono::milliseconds decommissioned_channel_polling_interval = + std::chrono::milliseconds(30 * 1000); }; static std::shared_ptr Create( CompletionQueue cq, std::size_t initial_size, StubFactoryFn factory_fn, SizingPolicy sizing_policy = {}) { + std::cout << __PRETTY_FUNCTION__ << std::endl; std::vector>> initial_wrapped_channels; for (std::size_t i = 0; i < initial_size; ++i) { initial_wrapped_channels.emplace_back(factory_fn()); @@ -103,9 +106,11 @@ class DynamicChannelPool std::move(cq), std::move(initial_wrapped_channels), std::move(factory_fn), std::move(sizing_policy))); } + static std::shared_ptr Create( CompletionQueue cq, std::vector> initial_channels, StubFactoryFn factory_fn, SizingPolicy sizing_policy = {}) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto pool = std::shared_ptr(new DynamicChannelPool( std::move(cq), std::move(initial_channels), std::move(factory_fn), std::move(sizing_policy))); @@ -132,16 +137,20 @@ class DynamicChannelPool // } std::shared_ptr> GetChannelRandomTwoLeastUsed() { + std::cout << __PRETTY_FUNCTION__ << std::endl; std::unique_lock lk(mu_); + std::cout << __PRETTY_FUNCTION__ << ": channels_size()=" << channels_.size() + << std::endl; // TODO: check if resize is needed. std::vector indices(channels_.size()); // TODO(sdhart): Maybe use iota on iterators instead of indices std::iota(indices.begin(), indices.end(), 0); std::shuffle(indices.begin(), indices.end(), rng_); - auto channel_1 = channels_[indices[0]]; - auto channel_2 = channels_[indices[1]]; + + std::shared_ptr> channel_1 = channels_[indices[0]]; + std::shared_ptr> channel_2 = channels_[indices[1]]; return channel_1->outstanding_rpcs(lk) < channel_2->outstanding_rpcs(lk) ? channel_1 @@ -156,18 +165,26 @@ class DynamicChannelPool : cq_(std::move(cq)), factory_fn_(std::move(factory_fn)), channels_(std::move(initial_wrapped_channels)), - sizing_policy_(std::move(sizing_policy)) {} + sizing_policy_(std::move(sizing_policy)), + next_channel_id_(channels_.size()) {} DynamicChannelPool(CompletionQueue cq, std::vector> initial_channels, StubFactoryFn factory_fn, SizingPolicy sizing_policy) : cq_(std::move(cq)), factory_fn_(std::move(factory_fn)), - channels_(initial_channels.size()), - sizing_policy_(std::move(sizing_policy)) { + channels_(), + sizing_policy_(std::move(sizing_policy)), + next_channel_id_(initial_channels.size()) { + std::cout << __PRETTY_FUNCTION__ << ": wrap initial_channels" << std::endl; + channels_.reserve(initial_channels.size()); for (auto& channel : initial_channels) { - channels_.push_back(std::make_shared>(channel)); + channels_.push_back(std::make_shared>(std::move(channel))); } + // for (auto i = 0; i < channels_.size(); ++i) { + // std::cout << __PRETTY_FUNCTION__ << ": channels_[" << i << + // "].get()=" << channels_[i].get() << std::endl; + // } } void ScheduleAddChannel() {} @@ -234,6 +251,7 @@ class DynamicChannelPool SizingPolicy sizing_policy_; std::vector>> decommissioned_channels_; future> decommission_timer_; + int next_channel_id_; }; } // namespace internal From 4f261553f19b1986c68708fddc3f4cdad6557799 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Sun, 7 Dec 2025 14:25:16 -0500 Subject: [PATCH 03/11] add and remove implemented --- ...igtable_random_two_least_used_decorator.cc | 41 ++-- ...bigtable_random_two_least_used_decorator.h | 5 +- .../internal/bigtable_stub_factory.cc | 45 ++-- .../bigtable/internal/bigtable_stub_factory.h | 17 +- google/cloud/internal/channel_pool.h | 203 +++++++++++------- 5 files changed, 180 insertions(+), 131 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index f051edea75294..9aec9fde8169f 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -79,10 +79,12 @@ class AsyncStreamingReadRpcTracking BigtableRandomTwoLeastUsed::BigtableRandomTwoLeastUsed( CompletionQueue cq, - internal::DynamicChannelPool::StubFactoryFn factory_fn, + internal::DynamicChannelPool::StubFactoryFn + refreshing_channel_stub_factory_fn, std::vector> children) : pool_(internal::DynamicChannelPool::Create( - std::move(cq), std::move(children), std::move(factory_fn))) {} + std::move(cq), std::move(children), + std::move(refreshing_channel_stub_factory_fn))) {} std::unique_ptr> @@ -93,8 +95,7 @@ BigtableRandomTwoLeastUsed::ReadRows( auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->ReadRows(std::move(context), options, request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -111,8 +112,7 @@ BigtableRandomTwoLeastUsed::SampleRowKeys( auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->SampleRowKeys(std::move(context), options, request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -141,8 +141,7 @@ BigtableRandomTwoLeastUsed::MutateRows( auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->MutateRows(std::move(context), options, request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -203,8 +202,7 @@ BigtableRandomTwoLeastUsed::ExecuteQuery( auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->ExecuteQuery(std::move(context), options, request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -224,8 +222,7 @@ BigtableRandomTwoLeastUsed::AsyncReadRows( auto stub = child->AcquireStub(); auto result = stub->AsyncReadRows(cq, std::move(context), std::move(options), request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -245,8 +242,7 @@ BigtableRandomTwoLeastUsed::AsyncSampleRowKeys( auto stub = child->AcquireStub(); auto result = stub->AsyncSampleRowKeys(cq, std::move(context), std::move(options), request); - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -280,9 +276,7 @@ BigtableRandomTwoLeastUsed::AsyncMutateRows( auto stub = child->AcquireStub(); auto result = stub->AsyncMutateRows(cq, std::move(context), std::move(options), request); - - std::weak_ptr> weak = child; - auto release_fn = [weak = std::move(weak)]() { + auto release_fn = [weak = child->MakeWeak()] { auto child = weak.lock(); if (child) child->ReleaseStub(); }; @@ -334,21 +328,10 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( return result; } -std::shared_ptr> +std::shared_ptr> BigtableRandomTwoLeastUsed::Child() { std::cout << __PRETTY_FUNCTION__ << std::endl; return pool_->GetChannelRandomTwoLeastUsed(); - // std::unique_lock lk(mu_); - // std::vector indices(pool_->size(lk) - 1); - // // TODO(sdhart): Maybe use iota on iterators instead of indices - // std::iota(indices.begin(), indices.end(), 0); - // std::shuffle(indices.begin(), indices.end(), rng_); - // auto channel_1 = pool_->GetChannel(lk, indices[0]); - // auto channel_2 = pool_->GetChannel(lk, indices[1]); - // - // return channel_1->outstanding_rpcs(lk) < channel_2->outstanding_rpcs(lk) - // ? channel_1 - // : channel_2; } GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h index 7aec08e067d3b..70791b4d0f131 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -35,7 +35,8 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { public: BigtableRandomTwoLeastUsed( CompletionQueue cq, - internal::DynamicChannelPool::StubFactoryFn factory_fn, + internal::DynamicChannelPool::StubFactoryFn + refreshing_channel_stub_factory_fn, std::vector> children); ~BigtableRandomTwoLeastUsed() override = default; @@ -133,7 +134,7 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { google::bigtable::v2::PrepareQueryRequest const& request) override; private: - std::shared_ptr> Child(); + std::shared_ptr> Child(); // std::mutex mu_; std::shared_ptr> pool_; diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 5ab5b8c583c13..00bc436f66207 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -64,46 +64,59 @@ std::string FeaturesMetadata() { } // namespace std::shared_ptr CreateBigtableStubRoundRobin( - Options const& options, - std::function(int)> child_factory) { + Options const& options, std::function(int)> + refreshing_channel_stub_factory) { std::vector> children( (std::max)(1, options.get())); int id = 0; std::generate(children.begin(), children.end(), - [&id, &child_factory] { return child_factory(id++); }); + [&id, &refreshing_channel_stub_factory] { + return refreshing_channel_stub_factory(id++); + }); return std::make_shared(std::move(children)); } std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( - Options const& options, CompletionQueue cq, - std::function(int)> child_factory) { + CompletionQueue cq, Options const& options, + std::function(int)> + refreshing_channel_stub_factory) { std::vector> children( (std::max)(1, options.get())); int id = 0; std::generate(children.begin(), children.end(), - [&id, &child_factory] { return child_factory(id++); }); - return std::make_shared(cq, child_factory, - std::move(children)); + [&id, &refreshing_channel_stub_factory] { + return refreshing_channel_stub_factory(id++); + }); + return std::make_shared( + cq, refreshing_channel_stub_factory, std::move(children)); } std::shared_ptr CreateDecoratedStubs( std::shared_ptr auth, CompletionQueue const& cq, Options const& options, - BaseBigtableStubFactory const& base_factory) { + BaseBigtableStubFactory const& stub_factory) { auto cq_impl = internal::GetCompletionQueueImpl(cq); auto refresh = std::make_shared( cq_impl, options.get(), options.get()); - auto child_factory = [base_factory, cq_impl, refresh, &auth, - options](int id) { + auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, &auth, + options](int id) { auto channel = CreateGrpcChannel(*auth, options, id); if (refresh->enabled()) ScheduleChannelRefresh(cq_impl, refresh, channel); - return base_factory(std::move(channel)); + return stub_factory(std::move(channel)); }; - // auto stub = CreateBigtableStubRoundRobin(options, - // std::move(child_factory)); - auto stub = CreateBigtableStubRandomTwoLeastUsed(options, cq, - std::move(child_factory)); + + std::shared_ptr stub; + if (options.has() && + options.get() == + ChannelSelectionStrategy::kRandomTwoLeastUsed) { + stub = CreateBigtableStubRandomTwoLeastUsed( + cq, options, std::move(refreshing_channel_stub_factory)); + } else { + stub = CreateBigtableStubRoundRobin( + options, std::move(refreshing_channel_stub_factory)); + } + if (refresh->enabled()) { stub = std::make_shared(std::move(stub), std::move(refresh)); diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.h b/google/cloud/bigtable/internal/bigtable_stub_factory.h index cdf7a633b0f42..df69d79de7b32 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.h +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.h @@ -28,18 +28,29 @@ namespace cloud { namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +enum class ChannelSelectionStrategy { kNone, kRoundRobin, kRandomTwoLeastUsed }; + +struct ChannelSelectionStrategyOption { + using Type = ChannelSelectionStrategy; +}; + using BaseBigtableStubFactory = std::function( std::shared_ptr)>; std::shared_ptr CreateBigtableStubRoundRobin( - Options const& options, - std::function(int)> child_factory); + Options const& options, std::function(int)> + refreshing_channel_stub_factory); + +std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( + CompletionQueue cq, Options const& options, + std::function(int)> + refreshing_channel_stub_factory); /// Used in testing to create decorated mocks. std::shared_ptr CreateDecoratedStubs( std::shared_ptr auth, CompletionQueue const& cq, Options const& options, - BaseBigtableStubFactory const& base_factory); + BaseBigtableStubFactory const& stub_factory); /// Default function used by `DataConnectionImpl`. std::shared_ptr CreateBigtableStub( diff --git a/google/cloud/internal/channel_pool.h b/google/cloud/internal/channel_pool.h index a4c74e17b4c7e..75ca09653719b 100644 --- a/google/cloud/internal/channel_pool.h +++ b/google/cloud/internal/channel_pool.h @@ -28,19 +28,23 @@ namespace cloud { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN namespace internal { -// StubWrapper could maybe be a Decorator on the Stub -// May have to be a Decorator on Stub to handle accounting destructor for -// streaming template -class StubWrapper { +class StubUsageWrapper + : public std::enable_shared_from_this> { public: - explicit StubWrapper(std::shared_ptr stub) - : stub_(std::move(stub)), outstanding_rpcs_(0) {} + explicit StubUsageWrapper(std::shared_ptr stub) : stub_(std::move(stub)) {} - int outstanding_rpcs(std::unique_lock const&) const { + // This value is a snapshot and can change immediately after the lock is + // released. + int outstanding_rpcs() const { + std::unique_lock lk(mu_); return outstanding_rpcs_; } + std::weak_ptr> MakeWeak() { + return this->shared_from_this(); + } + std::shared_ptr AcquireStub() { std::unique_lock lk(mu_); ++outstanding_rpcs_; @@ -55,7 +59,7 @@ class StubWrapper { private: mutable std::mutex mu_; std::shared_ptr stub_; - int outstanding_rpcs_; + int outstanding_rpcs_ = 0; }; template @@ -78,41 +82,41 @@ class DynamicChannelPool struct SizingPolicy { // To avoid channel churn, the pool will not add or remove channels more // frequently that this period. - std::chrono::milliseconds resize_cooldown_interval = + std::chrono::milliseconds pool_resize_cooldown_interval = std::chrono::milliseconds(60 * 1000); // If the average number of outstanding RPCs is below this threshold, // the pool size will be decreased. - std::size_t minimum_average_outstanding_rpcs_per_channel = 20; + int minimum_average_outstanding_rpcs_per_channel = 20; // If the average number of outstanding RPCs is above this threshold, // the pool size will be increased. - std::size_t maximum_average_outstanding_rpcs_per_channel = 80; + int maximum_average_outstanding_rpcs_per_channel = 80; // When channels are removed from the pool, we have to wait until all // outstanding RPCs on that channel are completed before destroying it. - std::chrono::milliseconds decommissioned_channel_polling_interval = + std::chrono::milliseconds remove_channel_polling_interval = std::chrono::milliseconds(30 * 1000); }; static std::shared_ptr Create( - CompletionQueue cq, std::size_t initial_size, StubFactoryFn factory_fn, - SizingPolicy sizing_policy = {}) { + CompletionQueue cq, std::size_t initial_size, + StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { std::cout << __PRETTY_FUNCTION__ << std::endl; - std::vector>> initial_wrapped_channels; + std::vector>> initial_wrapped_channels; for (std::size_t i = 0; i < initial_size; ++i) { - initial_wrapped_channels.emplace_back(factory_fn()); + initial_wrapped_channels.emplace_back(stub_factory_fn()); } auto pool = std::shared_ptr(new DynamicChannelPool( std::move(cq), std::move(initial_wrapped_channels), - std::move(factory_fn), std::move(sizing_policy))); + std::move(stub_factory_fn), std::move(sizing_policy))); } static std::shared_ptr Create( CompletionQueue cq, std::vector> initial_channels, - StubFactoryFn factory_fn, SizingPolicy sizing_policy = {}) { + StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { std::cout << __PRETTY_FUNCTION__ << std::endl; auto pool = std::shared_ptr(new DynamicChannelPool( - std::move(cq), std::move(initial_channels), std::move(factory_fn), + std::move(cq), std::move(initial_channels), std::move(stub_factory_fn), std::move(sizing_policy))); return pool; } @@ -124,133 +128,170 @@ class DynamicChannelPool return channels_.size(); } - // std::shared_ptr> GetChannel( + // std::shared_ptr> GetChannel( // std::unique_lock const&) { // // TODO: check for empty // return channels_[0]; // } // - // std::shared_ptr> GetChannel( + // std::shared_ptr> GetChannel( // std::unique_lock const&, std::size_t index) { // // TODO: bounds check // return channels_[index]; // } - std::shared_ptr> GetChannelRandomTwoLeastUsed() { - std::cout << __PRETTY_FUNCTION__ << std::endl; + std::shared_ptr> GetChannelRandomTwoLeastUsed() { std::unique_lock lk(mu_); - std::cout << __PRETTY_FUNCTION__ << ": channels_size()=" << channels_.size() << std::endl; - // TODO: check if resize is needed. - std::vector indices(channels_.size()); - // TODO(sdhart): Maybe use iota on iterators instead of indices - std::iota(indices.begin(), indices.end(), 0); - std::shuffle(indices.begin(), indices.end(), rng_); + if (!pool_resize_cooldown_timer_) { + CheckPoolChannelHealth(lk); + } else if (pool_resize_cooldown_timer_->is_ready()) { + (void)pool_resize_cooldown_timer_->get(); + pool_resize_cooldown_timer_ = absl::nullopt; + CheckPoolChannelHealth(lk); + } - std::shared_ptr> channel_1 = channels_[indices[0]]; - std::shared_ptr> channel_2 = channels_[indices[1]]; + if (channels_.size() == 1) { + return channels_[0]; + } - return channel_1->outstanding_rpcs(lk) < channel_2->outstanding_rpcs(lk) + std::shared_ptr> channel_1; + std::shared_ptr> channel_2; + if (channels_.size() == 2) { + channel_1 = channels_[0]; + channel_2 = channels_[1]; + } else { + std::vector indices(channels_.size()); + // TODO(sdhart): Maybe use iota on iterators instead of indices + std::iota(indices.begin(), indices.end(), 0); + std::shuffle(indices.begin(), indices.end(), rng_); + + channel_1 = channels_[indices[0]]; + channel_2 = channels_[indices[1]]; + } + return channel_1->outstanding_rpcs() < channel_2->outstanding_rpcs() ? channel_1 : channel_2; } private: - DynamicChannelPool( - CompletionQueue cq, - std::vector>> initial_wrapped_channels, - StubFactoryFn factory_fn, SizingPolicy sizing_policy) + DynamicChannelPool(CompletionQueue cq, + std::vector>> + initial_wrapped_channels, + StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) : cq_(std::move(cq)), - factory_fn_(std::move(factory_fn)), + stub_factory_fn_(std::move(stub_factory_fn)), channels_(std::move(initial_wrapped_channels)), sizing_policy_(std::move(sizing_policy)), next_channel_id_(channels_.size()) {} DynamicChannelPool(CompletionQueue cq, std::vector> initial_channels, - StubFactoryFn factory_fn, SizingPolicy sizing_policy) + StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) : cq_(std::move(cq)), - factory_fn_(std::move(factory_fn)), + stub_factory_fn_(std::move(stub_factory_fn)), channels_(), sizing_policy_(std::move(sizing_policy)), next_channel_id_(initial_channels.size()) { std::cout << __PRETTY_FUNCTION__ << ": wrap initial_channels" << std::endl; channels_.reserve(initial_channels.size()); for (auto& channel : initial_channels) { - channels_.push_back(std::make_shared>(std::move(channel))); + channels_.push_back( + std::make_shared>(std::move(channel))); } - // for (auto i = 0; i < channels_.size(); ++i) { - // std::cout << __PRETTY_FUNCTION__ << ": channels_[" << i << - // "].get()=" << channels_[i].get() << std::endl; - // } } - void ScheduleAddChannel() {} - void AddChannel() {} - - void ScheduleRemoveChannel() { - // std::weak_ptr> weak = this->shared_from_this(); - decommission_timer_ = - cq_.MakeRelativeTimer( - sizing_policy_.decommissioned_channel_polling_interval) - .then([weak = std::weak_ptr>( - this->shared_from_this())]() { - if (weak.lock()) { - weak->RemoveChannel(); - } - }); + void ScheduleAddChannel(std::unique_lock const&) { + std::weak_ptr> foo = this->shared_from_this(); + cq_.RunAsync( + [new_channel_id = next_channel_id_++, weak = std::move(foo)]() { + if (auto self = weak.lock()) { + self->AddChannel(new_channel_id); + } + }); + } + + void AddChannel(int new_channel_id) { + auto new_stub = stub_factory_fn_(new_channel_id); + std::unique_lock lk(mu_); + channels_.push_back( + std::make_shared>(std::move(new_stub))); + } + + void ScheduleRemoveChannel(std::unique_lock const&) { + std::weak_ptr> foo = this->shared_from_this(); + remove_channel_poll_timer_ = + cq_.MakeRelativeTimer(sizing_policy_.remove_channel_polling_interval) + .then( + [weak = std::move(foo)]( + future> f) { + if (f.get().ok()) { + if (auto self = weak.lock()) { + self->RemoveChannel(); + } + } + }); } void RemoveChannel() { std::unique_lock lk(mu_); - std::sort(decommissioned_channels_.begin(), decommissioned_channels_.end(), - [](std::shared_ptr> const& a, - std::shared_ptr> b) { + std::sort(draining_channels_.begin(), draining_channels_.end(), + [](std::shared_ptr> const& a, + std::shared_ptr> b) { return a->outstanding_rpcs() > b->outstanding_rpcs(); }); - while (!decommissioned_channels_.empty()) { - if (decommissioned_channels_.back()->outstanding_rpcs() != 0) { - ScheduleRemoveChannel(); + while (!draining_channels_.empty()) { + if (draining_channels_.back()->outstanding_rpcs() != 0) { + ScheduleRemoveChannel(lk); return; } - decommissioned_channels_.pop_back(); + draining_channels_.pop_back(); } } - void CheckPoolChannelHealth(std::unique_lock const&) { - auto average_rpc_per_channel = - std::accumulate(channels_.begin(), channels_.end(), - [](std::shared_ptr> const& s) { - return s->outstanding_rpcs(); - }) / - channels_.size(); + void SetResizeCooldownTimer(std::unique_lock const&) { + pool_resize_cooldown_timer_ = + cq_.MakeRelativeTimer(sizing_policy_.pool_resize_cooldown_interval); + } + + void CheckPoolChannelHealth(std::unique_lock const& lk) { + int average_rpc_per_channel = + std::accumulate( + channels_.begin(), channels_.end(), 0, + [](int a, auto const& b) { return a + b->outstanding_rpcs(); }) / + static_cast(channels_.size()); if (average_rpc_per_channel < sizing_policy_.minimum_average_outstanding_rpcs_per_channel) { - // TODO(sdhart): Is there a downside to always removing the most recently - // created channel? - decommissioned_channels_.push_back(std::move(channels_.back())); + auto random_channel = std::uniform_int_distribution( + 0, channels_.size() - 1)(rng_); + std::swap(channels_[random_channel], channels_.back()); + draining_channels_.push_back(std::move(channels_.back())); channels_.pop_back(); - ScheduleRemoveChannel(); + ScheduleRemoveChannel(lk); + SetResizeCooldownTimer(lk); } if (average_rpc_per_channel > sizing_policy_.maximum_average_outstanding_rpcs_per_channel) { // Channel/stub creation is expensive, instead of making the current RPC // wait on this, use an existing channel right now, and schedule a channel // to be added. - ScheduleAddChannel(); + ScheduleAddChannel(lk); + SetResizeCooldownTimer(lk); } } mutable std::mutex mu_; CompletionQueue cq_; google::cloud::internal::DefaultPRNG rng_; - StubFactoryFn factory_fn_; - std::vector>> channels_; + StubFactoryFn stub_factory_fn_; + std::vector>> channels_; SizingPolicy sizing_policy_; - std::vector>> decommissioned_channels_; - future> decommission_timer_; + std::vector>> draining_channels_; + future remove_channel_poll_timer_; + absl::optional>> + pool_resize_cooldown_timer_ = absl::nullopt; int next_channel_id_; }; From 4b11b1698c95b33dfdf2e12dca830917bfc68236 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Sun, 7 Dec 2025 19:43:23 -0500 Subject: [PATCH 04/11] fixed or percentage channel add --- .../internal/bigtable_stub_factory.cc | 2 +- google/cloud/internal/channel_pool.h | 62 +++++++++++++++---- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 00bc436f66207..88477fb861407 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -88,7 +88,7 @@ std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( return refreshing_channel_stub_factory(id++); }); return std::make_shared( - cq, refreshing_channel_stub_factory, std::move(children)); + std::move(cq), refreshing_channel_stub_factory, std::move(children)); } std::shared_ptr CreateDecoratedStubs( diff --git a/google/cloud/internal/channel_pool.h b/google/cloud/internal/channel_pool.h index 75ca09653719b..23b0cf2146fb1 100644 --- a/google/cloud/internal/channel_pool.h +++ b/google/cloud/internal/channel_pool.h @@ -18,6 +18,7 @@ #include "google/cloud/completion_queue.h" #include "google/cloud/internal/random.h" #include "google/cloud/version.h" +#include #include #include #include @@ -85,6 +86,15 @@ class DynamicChannelPool std::chrono::milliseconds pool_resize_cooldown_interval = std::chrono::milliseconds(60 * 1000); + struct DiscreteChannels { + int number; + }; + struct PercentageOfPoolSize { + double percentage; + }; + absl::variant + channels_to_add_per_resize = DiscreteChannels{1}; + // If the average number of outstanding RPCs is below this threshold, // the pool size will be decreased. int minimum_average_outstanding_rpcs_per_channel = 20; @@ -154,7 +164,7 @@ class DynamicChannelPool } if (channels_.size() == 1) { - return channels_[0]; + return channels_.front(); } std::shared_ptr> channel_1; @@ -194,7 +204,7 @@ class DynamicChannelPool stub_factory_fn_(std::move(stub_factory_fn)), channels_(), sizing_policy_(std::move(sizing_policy)), - next_channel_id_(initial_channels.size()) { + next_channel_id_(static_cast(initial_channels.size())) { std::cout << __PRETTY_FUNCTION__ << ": wrap initial_channels" << std::endl; channels_.reserve(initial_channels.size()); for (auto& channel : initial_channels) { @@ -203,21 +213,49 @@ class DynamicChannelPool } } + struct ChannelAddVisitor { + std::size_t pool_size; + explicit ChannelAddVisitor(std::size_t pool_size) : pool_size(pool_size) {} + int operator()(typename SizingPolicy::DiscreteChannels const& c) { + return c.number; + } + + int operator()(typename SizingPolicy::PercentageOfPoolSize const& c) { + return static_cast( + std::floor(static_cast(pool_size) * c.percentage)); + } + }; + void ScheduleAddChannel(std::unique_lock const&) { + auto num_channels_to_add = + absl::visit(ChannelAddVisitor(channels_.size()), + sizing_policy_.channels_to_add_per_resize); + std::vector new_channel_ids; + new_channel_ids.reserve(num_channels_to_add); + for (int i = 0; i < num_channels_to_add; ++i) { + new_channel_ids.push_back(next_channel_id_++); + } + std::weak_ptr> foo = this->shared_from_this(); - cq_.RunAsync( - [new_channel_id = next_channel_id_++, weak = std::move(foo)]() { - if (auto self = weak.lock()) { - self->AddChannel(new_channel_id); - } - }); + cq_.RunAsync([new_channel_ids = std::move(new_channel_ids), + weak = std::move(foo)]() { + if (auto self = weak.lock()) { + self->AddChannel(new_channel_ids); + } + }); } - void AddChannel(int new_channel_id) { - auto new_stub = stub_factory_fn_(new_channel_id); + void AddChannel(std::vector const& new_channel_ids) { + std::vector>> new_stubs; + new_stubs.reserve(new_channel_ids.size()); + for (auto const& id : new_channel_ids) { + new_stubs.push_back( + std::make_shared>(stub_factory_fn_(id))); + } std::unique_lock lk(mu_); - channels_.push_back( - std::make_shared>(std::move(new_stub))); + channels_.insert(channels_.end(), + std::make_move_iterator(new_stubs.begin()), + std::make_move_iterator(new_stubs.end())); } void ScheduleRemoveChannel(std::unique_lock const&) { From 97d4f82b41b8a9a91997816c3b129ad64625ceff Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Mon, 8 Dec 2025 18:20:54 -0500 Subject: [PATCH 05/11] undo some of the renaming --- google/cloud/bigtable/CMakeLists.txt | 3 ++ .../bigtable/bigtable_client_unit_tests.bzl | 1 + .../bigtable/google_cloud_cpp_bigtable.bzl | 2 ++ ...igtable_random_two_least_used_decorator.cc | 10 ++---- ...bigtable_random_two_least_used_decorator.h | 12 +++---- .../internal/dynamic_channel_pool.cc} | 4 ++- .../internal/dynamic_channel_pool.h} | 34 +++---------------- .../internal/dynamic_channel_pool_test.cc} | 4 ++- google/cloud/google_cloud_cpp_grpc_utils.bzl | 2 -- .../cloud/google_cloud_cpp_grpc_utils.cmake | 3 -- ...google_cloud_cpp_grpc_utils_unit_tests.bzl | 1 - 11 files changed, 24 insertions(+), 52 deletions(-) rename google/cloud/{internal/channel_pool.cc => bigtable/internal/dynamic_channel_pool.cc} (85%) rename google/cloud/{internal/channel_pool.h => bigtable/internal/dynamic_channel_pool.h} (93%) rename google/cloud/{internal/channel_pool_test.cc => bigtable/internal/dynamic_channel_pool_test.cc} (87%) diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index 0463bf11af9b6..b8c0781f75b7a 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -196,6 +196,8 @@ add_library( internal/default_row_reader.h internal/defaults.cc internal/defaults.h + internal/dynamic_channel_pool.cc + internal/dynamic_channel_pool.h internal/google_bytes_traits.cc internal/google_bytes_traits.h internal/legacy_async_bulk_apply.cc @@ -493,6 +495,7 @@ if (BUILD_TESTING) internal/data_tracing_connection_test.cc internal/default_row_reader_test.cc internal/defaults_test.cc + internal/dynamic_channel_pool_test.cc internal/google_bytes_traits_test.cc internal/legacy_async_bulk_apply_test.cc internal/legacy_async_row_reader_test.cc diff --git a/google/cloud/bigtable/bigtable_client_unit_tests.bzl b/google/cloud/bigtable/bigtable_client_unit_tests.bzl index c5dda043da0a4..7c6ec80782a75 100644 --- a/google/cloud/bigtable/bigtable_client_unit_tests.bzl +++ b/google/cloud/bigtable/bigtable_client_unit_tests.bzl @@ -56,6 +56,7 @@ bigtable_client_unit_tests = [ "internal/data_tracing_connection_test.cc", "internal/default_row_reader_test.cc", "internal/defaults_test.cc", + "internal/dynamic_channel_pool_test.cc", "internal/google_bytes_traits_test.cc", "internal/legacy_async_bulk_apply_test.cc", "internal/legacy_async_row_reader_test.cc", diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 2003b64b86b99..eccb8f17ba000 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -96,6 +96,7 @@ google_cloud_cpp_bigtable_hdrs = [ "internal/data_tracing_connection.h", "internal/default_row_reader.h", "internal/defaults.h", + "internal/dynamic_channel_pool.h", "internal/google_bytes_traits.h", "internal/legacy_async_bulk_apply.h", "internal/legacy_async_row_reader.h", @@ -219,6 +220,7 @@ google_cloud_cpp_bigtable_srcs = [ "internal/data_tracing_connection.cc", "internal/default_row_reader.cc", "internal/defaults.cc", + "internal/dynamic_channel_pool.cc", "internal/google_bytes_traits.cc", "internal/legacy_async_bulk_apply.cc", "internal/legacy_async_row_reader.cc", diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index 9aec9fde8169f..8c8381841436a 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -12,10 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Generated by the Codegen C++ plugin. -// If you make any local changes, they will be lost. -// source: google/bigtable/v2/bigtable.proto - #include "google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h" #include #include @@ -79,10 +75,10 @@ class AsyncStreamingReadRpcTracking BigtableRandomTwoLeastUsed::BigtableRandomTwoLeastUsed( CompletionQueue cq, - internal::DynamicChannelPool::StubFactoryFn + DynamicChannelPool::StubFactoryFn refreshing_channel_stub_factory_fn, std::vector> children) - : pool_(internal::DynamicChannelPool::Create( + : pool_(DynamicChannelPool::Create( std::move(cq), std::move(children), std::move(refreshing_channel_stub_factory_fn))) {} @@ -328,7 +324,7 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( return result; } -std::shared_ptr> +std::shared_ptr> BigtableRandomTwoLeastUsed::Child() { std::cout << __PRETTY_FUNCTION__ << std::endl; return pool_->GetChannelRandomTwoLeastUsed(); diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h index 70791b4d0f131..52a20c78a47e5 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -12,15 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Generated by the Codegen C++ plugin. -// If you make any local changes, they will be lost. -// source: google/bigtable/v2/bigtable.proto - #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_RANDOM_TWO_LEAST_USED_DECORATOR_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_RANDOM_TWO_LEAST_USED_DECORATOR_H #include "google/cloud/bigtable/internal/bigtable_stub.h" -#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/bigtable/internal/dynamic_channel_pool.h" #include "google/cloud/version.h" #include #include @@ -35,7 +31,7 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { public: BigtableRandomTwoLeastUsed( CompletionQueue cq, - internal::DynamicChannelPool::StubFactoryFn + DynamicChannelPool::StubFactoryFn refreshing_channel_stub_factory_fn, std::vector> children); ~BigtableRandomTwoLeastUsed() override = default; @@ -134,10 +130,10 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { google::bigtable::v2::PrepareQueryRequest const& request) override; private: - std::shared_ptr> Child(); + std::shared_ptr> Child(); // std::mutex mu_; - std::shared_ptr> pool_; + std::shared_ptr> pool_; }; GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END diff --git a/google/cloud/internal/channel_pool.cc b/google/cloud/bigtable/internal/dynamic_channel_pool.cc similarity index 85% rename from google/cloud/internal/channel_pool.cc rename to google/cloud/bigtable/internal/dynamic_channel_pool.cc index 943a68b8a2340..d47303482257a 100644 --- a/google/cloud/internal/channel_pool.cc +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/bigtable/internal/dynamic_channel_pool.h" #include "google/cloud/version.h" namespace google { namespace cloud { +namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable_internal } // namespace cloud } // namespace google diff --git a/google/cloud/internal/channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h similarity index 93% rename from google/cloud/internal/channel_pool.h rename to google/cloud/bigtable/internal/dynamic_channel_pool.h index 23b0cf2146fb1..b491a0b38e81a 100644 --- a/google/cloud/internal/channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H -#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H #include "google/cloud/completion_queue.h" #include "google/cloud/internal/random.h" @@ -26,8 +26,8 @@ namespace google { namespace cloud { +namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN -namespace internal { template class StubUsageWrapper @@ -63,18 +63,6 @@ class StubUsageWrapper int outstanding_rpcs_ = 0; }; -template -class StaticChannelPool { - public: - explicit StaticChannelPool(std::size_t size); - - std::shared_ptr GetChannel(); - std::shared_ptr GetChannel(std::size_t index); - - private: - std::vector channels_; -}; - template class DynamicChannelPool : public std::enable_shared_from_this> { @@ -138,18 +126,6 @@ class DynamicChannelPool return channels_.size(); } - // std::shared_ptr> GetChannel( - // std::unique_lock const&) { - // // TODO: check for empty - // return channels_[0]; - // } - // - // std::shared_ptr> GetChannel( - // std::unique_lock const&, std::size_t index) { - // // TODO: bounds check - // return channels_[index]; - // } - std::shared_ptr> GetChannelRandomTwoLeastUsed() { std::unique_lock lk(mu_); std::cout << __PRETTY_FUNCTION__ << ": channels_size()=" << channels_.size() @@ -333,9 +309,9 @@ class DynamicChannelPool int next_channel_id_; }; -} // namespace internal GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable_internal } // namespace cloud } // namespace google -#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_INTERNAL_CHANNEL_POOL_H +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H diff --git a/google/cloud/internal/channel_pool_test.cc b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc similarity index 87% rename from google/cloud/internal/channel_pool_test.cc rename to google/cloud/bigtable/internal/dynamic_channel_pool_test.cc index 46137a70d01b5..ed4333fd8e010 100644 --- a/google/cloud/internal/channel_pool_test.cc +++ b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc @@ -12,14 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/internal/channel_pool.h" +#include "google/cloud/bigtable/internal/dynamic_channel_pool.h" #include "google/cloud/testing_util/status_matchers.h" #include namespace google { namespace cloud { +namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN namespace internal {} // namespace internal GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable_internal } // namespace cloud } // namespace google diff --git a/google/cloud/google_cloud_cpp_grpc_utils.bzl b/google/cloud/google_cloud_cpp_grpc_utils.bzl index c6d83ae5f3eeb..d54df10510c40 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils.bzl +++ b/google/cloud/google_cloud_cpp_grpc_utils.bzl @@ -54,7 +54,6 @@ google_cloud_cpp_grpc_utils_hdrs = [ "internal/async_streaming_write_rpc_timeout.h", "internal/async_streaming_write_rpc_tracing.h", "internal/background_threads_impl.h", - "internal/channel_pool.h", "internal/completion_queue_impl.h", "internal/debug_string_protobuf.h", "internal/debug_string_status.h", @@ -96,7 +95,6 @@ google_cloud_cpp_grpc_utils_srcs = [ "internal/async_connection_ready.cc", "internal/async_polling_loop.cc", "internal/background_threads_impl.cc", - "internal/channel_pool.cc", "internal/debug_string_protobuf.cc", "internal/debug_string_status.cc", "internal/default_completion_queue_impl.cc", diff --git a/google/cloud/google_cloud_cpp_grpc_utils.cmake b/google/cloud/google_cloud_cpp_grpc_utils.cmake index f9ce47a1bf486..f73a7d81e09b2 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils.cmake +++ b/google/cloud/google_cloud_cpp_grpc_utils.cmake @@ -61,8 +61,6 @@ add_library( internal/async_streaming_write_rpc_tracing.h internal/background_threads_impl.cc internal/background_threads_impl.h - internal/channel_pool.cc - internal/channel_pool.h internal/completion_queue_impl.h internal/debug_string_protobuf.cc internal/debug_string_protobuf.h @@ -258,7 +256,6 @@ if (BUILD_TESTING) internal/async_streaming_write_rpc_timeout_test.cc internal/async_streaming_write_rpc_tracing_test.cc internal/background_threads_impl_test.cc - internal/channel_pool_test.cc internal/debug_string_protobuf_test.cc internal/debug_string_status_test.cc internal/extract_long_running_result_test.cc diff --git a/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl b/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl index cd8a97c763b34..95e6a7aa6f643 100644 --- a/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl +++ b/google/cloud/google_cloud_cpp_grpc_utils_unit_tests.bzl @@ -42,7 +42,6 @@ google_cloud_cpp_grpc_utils_unit_tests = [ "internal/async_streaming_write_rpc_timeout_test.cc", "internal/async_streaming_write_rpc_tracing_test.cc", "internal/background_threads_impl_test.cc", - "internal/channel_pool_test.cc", "internal/debug_string_protobuf_test.cc", "internal/debug_string_status_test.cc", "internal/extract_long_running_result_test.cc", From 434a1a8b932842c13b86b648cf659f92ab21fc93 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Mon, 8 Dec 2025 18:32:36 -0500 Subject: [PATCH 06/11] remove printf from generated stub --- google/cloud/bigtable/internal/bigtable_stub.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_stub.cc b/google/cloud/bigtable/internal/bigtable_stub.cc index 7d7cec8d88552..cb00ccb83ffe8 100644 --- a/google/cloud/bigtable/internal/bigtable_stub.cc +++ b/google/cloud/bigtable/internal/bigtable_stub.cc @@ -36,7 +36,6 @@ std::unique_ptr context, Options const&, google::bigtable::v2::ReadRowsRequest const& request) { - std::cout << __PRETTY_FUNCTION__ << std::endl; auto stream = grpc_stub_->ReadRows(context.get(), request); return std::make_unique>(std::move(context), @@ -71,7 +70,6 @@ std::unique_ptr context, Options const&, google::bigtable::v2::MutateRowsRequest const& request) { - std::cout << __PRETTY_FUNCTION__ << std::endl; auto stream = grpc_stub_->MutateRows(context.get(), request); return std::make_unique>(std::move(context), From a4503f6d845cce2b03b975e25271e0e67aa22251 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Mon, 8 Dec 2025 18:56:35 -0500 Subject: [PATCH 07/11] make refresh_state a member of channel pool --- ...igtable_random_two_least_used_decorator.cc | 4 +-- ...bigtable_random_two_least_used_decorator.h | 2 +- .../internal/bigtable_stub_factory.cc | 35 ++++++++++++------- .../bigtable/internal/bigtable_stub_factory.h | 4 ++- .../bigtable/internal/dynamic_channel_pool.h | 18 +++++++--- 5 files changed, 42 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index 8c8381841436a..9a3129ff951b9 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -74,12 +74,12 @@ class AsyncStreamingReadRpcTracking } // namespace BigtableRandomTwoLeastUsed::BigtableRandomTwoLeastUsed( - CompletionQueue cq, + CompletionQueue cq, std::shared_ptr refresh_state, DynamicChannelPool::StubFactoryFn refreshing_channel_stub_factory_fn, std::vector> children) : pool_(DynamicChannelPool::Create( - std::move(cq), std::move(children), + std::move(cq), std::move(children), std::move(refresh_state), std::move(refreshing_channel_stub_factory_fn))) {} std::unique_ptr refresh_state, DynamicChannelPool::StubFactoryFn refreshing_channel_stub_factory_fn, std::vector> children); diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 88477fb861407..4551cc1db1a04 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -79,7 +79,8 @@ std::shared_ptr CreateBigtableStubRoundRobin( std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( CompletionQueue cq, Options const& options, std::function(int)> - refreshing_channel_stub_factory) { + refreshing_channel_stub_factory, + std::shared_ptr refresh_state) { std::vector> children( (std::max)(1, options.get())); int id = 0; @@ -88,7 +89,8 @@ std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( return refreshing_channel_stub_factory(id++); }); return std::make_shared( - std::move(cq), refreshing_channel_stub_factory, std::move(children)); + std::move(cq), std::move(refresh_state), refreshing_channel_stub_factory, + std::move(children)); } std::shared_ptr CreateDecoratedStubs( @@ -99,28 +101,35 @@ std::shared_ptr CreateDecoratedStubs( auto refresh = std::make_shared( cq_impl, options.get(), options.get()); - auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, &auth, - options](int id) { - auto channel = CreateGrpcChannel(*auth, options, id); - if (refresh->enabled()) ScheduleChannelRefresh(cq_impl, refresh, channel); - return stub_factory(std::move(channel)); - }; std::shared_ptr stub; if (options.has() && options.get() == ChannelSelectionStrategy::kRandomTwoLeastUsed) { + auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, + &auth, options](int id) { + auto channel = CreateGrpcChannel(*auth, options, id); + ScheduleChannelRefresh(cq_impl, refresh, channel); + return stub_factory(std::move(channel)); + }; stub = CreateBigtableStubRandomTwoLeastUsed( - cq, options, std::move(refreshing_channel_stub_factory)); + cq, options, std::move(refreshing_channel_stub_factory), + std::move(refresh)); } else { + auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, + &auth, options](int id) { + auto channel = CreateGrpcChannel(*auth, options, id); + if (refresh->enabled()) ScheduleChannelRefresh(cq_impl, refresh, channel); + return stub_factory(std::move(channel)); + }; stub = CreateBigtableStubRoundRobin( options, std::move(refreshing_channel_stub_factory)); + if (refresh->enabled()) { + stub = std::make_shared(std::move(stub), + std::move(refresh)); + } } - if (refresh->enabled()) { - stub = std::make_shared(std::move(stub), - std::move(refresh)); - } if (auth->RequiresConfigureContext()) { stub = std::make_shared(std::move(auth), std::move(stub)); } diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.h b/google/cloud/bigtable/internal/bigtable_stub_factory.h index df69d79de7b32..7d71f47b97612 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.h +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.h @@ -16,6 +16,7 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_BIGTABLE_STUB_FACTORY_H #include "google/cloud/bigtable/internal/bigtable_stub.h" +#include "google/cloud/bigtable/internal/connection_refresh_state.h" #include "google/cloud/completion_queue.h" #include "google/cloud/internal/unified_grpc_credentials.h" #include "google/cloud/options.h" @@ -44,7 +45,8 @@ std::shared_ptr CreateBigtableStubRoundRobin( std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( CompletionQueue cq, Options const& options, std::function(int)> - refreshing_channel_stub_factory); + refreshing_channel_stub_factory, + std::shared_ptr refresh_state); /// Used in testing to create decorated mocks. std::shared_ptr CreateDecoratedStubs( diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h index b491a0b38e81a..a425406fdf839 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -15,6 +15,7 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H +#include "google/cloud/bigtable/internal/connection_refresh_state.h" #include "google/cloud/completion_queue.h" #include "google/cloud/internal/random.h" #include "google/cloud/version.h" @@ -98,7 +99,9 @@ class DynamicChannelPool static std::shared_ptr Create( CompletionQueue cq, std::size_t initial_size, - StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { + StubFactoryFn stub_factory_fn, + std::shared_ptr refresh_state, + SizingPolicy sizing_policy = {}) { std::cout << __PRETTY_FUNCTION__ << std::endl; std::vector>> initial_wrapped_channels; for (std::size_t i = 0; i < initial_size; ++i) { @@ -106,16 +109,18 @@ class DynamicChannelPool } auto pool = std::shared_ptr(new DynamicChannelPool( std::move(cq), std::move(initial_wrapped_channels), - std::move(stub_factory_fn), std::move(sizing_policy))); + std::move(refresh_state), std::move(stub_factory_fn), + std::move(sizing_policy))); } static std::shared_ptr Create( CompletionQueue cq, std::vector> initial_channels, + std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { std::cout << __PRETTY_FUNCTION__ << std::endl; auto pool = std::shared_ptr(new DynamicChannelPool( - std::move(cq), std::move(initial_channels), std::move(stub_factory_fn), - std::move(sizing_policy))); + std::move(cq), std::move(initial_channels), std::move(refresh_state), + std::move(stub_factory_fn), std::move(sizing_policy))); return pool; } @@ -166,8 +171,10 @@ class DynamicChannelPool DynamicChannelPool(CompletionQueue cq, std::vector>> initial_wrapped_channels, + std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) : cq_(std::move(cq)), + refresh_state_(std::move(refresh_state)), stub_factory_fn_(std::move(stub_factory_fn)), channels_(std::move(initial_wrapped_channels)), sizing_policy_(std::move(sizing_policy)), @@ -175,8 +182,10 @@ class DynamicChannelPool DynamicChannelPool(CompletionQueue cq, std::vector> initial_channels, + std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) : cq_(std::move(cq)), + refresh_state_(std::move(refresh_state)), stub_factory_fn_(std::move(stub_factory_fn)), channels_(), sizing_policy_(std::move(sizing_policy)), @@ -299,6 +308,7 @@ class DynamicChannelPool mutable std::mutex mu_; CompletionQueue cq_; google::cloud::internal::DefaultPRNG rng_; + std::shared_ptr refresh_state_; StubFactoryFn stub_factory_fn_; std::vector>> channels_; SizingPolicy sizing_policy_; From c2d51e4d1591ae349687e55e9628528d9d7987eb Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Tue, 9 Dec 2025 17:18:50 -0500 Subject: [PATCH 08/11] kinda working with iota iterators --- .../builds/integration-production.sh | 8 +- ci/cloudbuild/builds/lib/integration.sh | 2 +- ...igtable_random_two_least_used_decorator.cc | 16 +- ...bigtable_random_two_least_used_decorator.h | 2 +- .../internal/bigtable_stub_factory.cc | 9 +- .../bigtable/internal/bigtable_stub_factory.h | 2 +- .../internal/connection_refresh_state.cc | 29 ++- .../internal/connection_refresh_state.h | 3 +- .../bigtable/internal/dynamic_channel_pool.h | 232 ++++++++++++++---- .../internal/dynamic_channel_pool_test.cc | 34 ++- google/cloud/bigtable/options.h | 9 + .../bigtable/test_proxy/cbt_test_proxy.cc | 2 + .../testing/table_integration_test.cc | 6 +- .../bigtable/tests/data_integration_test.cc | 3 +- 14 files changed, 285 insertions(+), 72 deletions(-) diff --git a/ci/cloudbuild/builds/integration-production.sh b/ci/cloudbuild/builds/integration-production.sh index 66ca4496d87fa..3edee6d29aa8f 100755 --- a/ci/cloudbuild/builds/integration-production.sh +++ b/ci/cloudbuild/builds/integration-production.sh @@ -27,7 +27,7 @@ export CC=clang export CXX=clang++ mapfile -t args < <(bazel::common_args) -io::run bazel test "${args[@]}" --test_tag_filters=-integration-test "${BAZEL_TARGETS[@]}" +#io::run bazel test "${args[@]}" --test_tag_filters=-integration-test "${BAZEL_TARGETS[@]}" excluded_rules=( "-//examples:grpc_credential_types" @@ -41,6 +41,6 @@ excluded_rules=( io::log_h2 "Running the integration tests against prod" mapfile -t integration_args < <(integration::bazel_args) -io::run bazel test "${args[@]}" "${integration_args[@]}" \ - --cache_test_results="auto" --test_tag_filters="integration-test,-ud-only" \ - -- "${BAZEL_TARGETS[@]}" "${excluded_rules[@]}" +io::run bazel test "${args[@]}" "${integration_args[@]}" --test_output=all \ + --test_filter="*ReadRowsAllRows*" --test_timeout=30 \ + //google/cloud/bigtable/tests:data_integration_test diff --git a/ci/cloudbuild/builds/lib/integration.sh b/ci/cloudbuild/builds/lib/integration.sh index 4ab702369f232..ebb8f71b240c0 100644 --- a/ci/cloudbuild/builds/lib/integration.sh +++ b/ci/cloudbuild/builds/lib/integration.sh @@ -51,7 +51,7 @@ function integration::bazel_args() { # Integration tests are inherently flaky. Make up to three attempts to get the # test passing. - args+=(--flaky_test_attempts=3) + #args+=(--flaky_test_attempts=3) args+=( # Common settings diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index 9a3129ff951b9..a70a77f74e546 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -105,6 +105,7 @@ std::unique_ptr context, Options const& options, google::bigtable::v2::SampleRowKeysRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->SampleRowKeys(std::move(context), options, request); @@ -121,6 +122,7 @@ StatusOr BigtableRandomTwoLeastUsed::MutateRow( grpc::ClientContext& context, Options const& options, google::bigtable::v2::MutateRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->MutateRow(context, options, request); @@ -150,6 +152,7 @@ StatusOr BigtableRandomTwoLeastUsed::CheckAndMutateRow( grpc::ClientContext& context, Options const& options, google::bigtable::v2::CheckAndMutateRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->CheckAndMutateRow(context, options, request); @@ -161,6 +164,7 @@ StatusOr BigtableRandomTwoLeastUsed::PingAndWarm( grpc::ClientContext& context, Options const& options, google::bigtable::v2::PingAndWarmRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->PingAndWarm(context, options, request); @@ -172,6 +176,7 @@ StatusOr BigtableRandomTwoLeastUsed::ReadModifyWriteRow( grpc::ClientContext& context, Options const& options, google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->ReadModifyWriteRow(context, options, request); @@ -183,6 +188,7 @@ StatusOr BigtableRandomTwoLeastUsed::PrepareQuery( grpc::ClientContext& context, Options const& options, google::bigtable::v2::PrepareQueryRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->PrepareQuery(context, options, request); @@ -195,6 +201,7 @@ std::unique_ptr context, Options const& options, google::bigtable::v2::ExecuteQueryRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->ExecuteQuery(std::move(context), options, request); @@ -214,6 +221,7 @@ BigtableRandomTwoLeastUsed::AsyncReadRows( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::ReadRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = @@ -234,6 +242,7 @@ BigtableRandomTwoLeastUsed::AsyncSampleRowKeys( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::SampleRowKeysRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->AsyncSampleRowKeys(cq, std::move(context), @@ -253,6 +262,7 @@ BigtableRandomTwoLeastUsed::AsyncMutateRow( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::MutateRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = @@ -268,6 +278,7 @@ BigtableRandomTwoLeastUsed::AsyncMutateRows( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::MutateRowsRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->AsyncMutateRows(cq, std::move(context), @@ -288,6 +299,7 @@ BigtableRandomTwoLeastUsed::AsyncCheckAndMutateRow( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::CheckAndMutateRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->AsyncCheckAndMutateRow(cq, std::move(context), @@ -302,6 +314,7 @@ BigtableRandomTwoLeastUsed::AsyncReadModifyWriteRow( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->AsyncReadModifyWriteRow(cq, std::move(context), @@ -316,6 +329,7 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( std::shared_ptr context, google::cloud::internal::ImmutableOptions options, google::bigtable::v2::PrepareQueryRequest const& request) { + std::cout << __PRETTY_FUNCTION__ << std::endl; auto child = Child(); auto stub = child->AcquireStub(); auto result = stub->AsyncPrepareQuery(cq, std::move(context), @@ -324,7 +338,7 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( return result; } -std::shared_ptr> +std::shared_ptr> BigtableRandomTwoLeastUsed::Child() { std::cout << __PRETTY_FUNCTION__ << std::endl; return pool_->GetChannelRandomTwoLeastUsed(); diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h index 0f4f1a5964464..b1d35d1010653 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -130,7 +130,7 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { google::bigtable::v2::PrepareQueryRequest const& request) override; private: - std::shared_ptr> Child(); + std::shared_ptr> Child(); // std::mutex mu_; std::shared_ptr> pool_; diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 4551cc1db1a04..6fa4b5af2046f 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -81,6 +81,7 @@ std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( std::function(int)> refreshing_channel_stub_factory, std::shared_ptr refresh_state) { + std::cout << __PRETTY_FUNCTION__ << std::endl; std::vector> children( (std::max)(1, options.get())); int id = 0; @@ -88,6 +89,8 @@ std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( [&id, &refreshing_channel_stub_factory] { return refreshing_channel_stub_factory(id++); }); + std::cout << __PRETTY_FUNCTION__ << ": children.size()=" << children.size() + << std::endl; return std::make_shared( std::move(cq), std::move(refresh_state), refreshing_channel_stub_factory, std::move(children)); @@ -103,9 +106,9 @@ std::shared_ptr CreateDecoratedStubs( options.get()); std::shared_ptr stub; - if (options.has() && - options.get() == - ChannelSelectionStrategy::kRandomTwoLeastUsed) { + if (options.has() && + options.get() == + bigtable::experimental::ChannelPoolType::kDynamic) { auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, &auth, options](int id) { auto channel = CreateGrpcChannel(*auth, options, id); diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.h b/google/cloud/bigtable/internal/bigtable_stub_factory.h index 7d71f47b97612..f9111abbb5e88 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.h +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.h @@ -29,7 +29,7 @@ namespace cloud { namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN -enum class ChannelSelectionStrategy { kNone, kRoundRobin, kRandomTwoLeastUsed }; +enum class ChannelSelectionStrategy { kRoundRobin, kRandomTwoLeastUsed }; struct ChannelSelectionStrategyOption { using Type = ChannelSelectionStrategy; diff --git a/google/cloud/bigtable/internal/connection_refresh_state.cc b/google/cloud/bigtable/internal/connection_refresh_state.cc index 2a6cdd052b6bc..db6a0fbeec76e 100644 --- a/google/cloud/bigtable/internal/connection_refresh_state.cc +++ b/google/cloud/bigtable/internal/connection_refresh_state.cc @@ -53,10 +53,20 @@ bool ConnectionRefreshState::enabled() const { return max_conn_refresh_period_.count() != 0; } +void LogFailedConnectionRefresh(Status const& conn_status) { + if (!conn_status.ok()) { + GCP_LOG(WARNING) << "Failed to refresh connection. Error: " << conn_status; + } +} + void ScheduleChannelRefresh( std::shared_ptr const& cq_impl, std::shared_ptr const& state, - std::shared_ptr const& channel) { + std::shared_ptr const& channel, + std::function connection_status_fn) { + if (!connection_status_fn) { + connection_status_fn = LogFailedConnectionRefresh; + } // The timers will only hold weak pointers to the channel or to the // completion queue, so if either of them are destroyed, the timer chain // will simply not continue. @@ -66,7 +76,9 @@ void ScheduleChannelRefresh( using TimerFuture = future>; auto timer_future = cq.MakeRelativeTimer(state->RandomizedRefreshDelay()) - .then([weak_channel, weak_cq_impl, state](TimerFuture fut) { + .then([weak_channel, weak_cq_impl, state, + connection_status_fn = + std::move(connection_status_fn)](TimerFuture fut) { if (!fut.get()) { // Timer cancelled. return; @@ -79,17 +91,18 @@ void ScheduleChannelRefresh( cq.AsyncWaitConnectionReady( channel, std::chrono::system_clock::now() + kConnectionReadyTimeout) - .then([weak_channel, weak_cq_impl, state](future fut) { + .then([weak_channel, weak_cq_impl, state, + connection_status_fn = std::move(connection_status_fn)]( + future fut) { auto conn_status = fut.get(); - if (!conn_status.ok()) { - GCP_LOG(WARNING) << "Failed to refresh connection. Error: " - << conn_status; - } + if (connection_status_fn) connection_status_fn(conn_status); + auto channel = weak_channel.lock(); if (!channel) return; auto cq_impl = weak_cq_impl.lock(); if (!cq_impl) return; - ScheduleChannelRefresh(cq_impl, state, channel); + ScheduleChannelRefresh(cq_impl, state, channel, + std::move(connection_status_fn)); }); }); state->timers().RegisterTimer(std::move(timer_future)); diff --git a/google/cloud/bigtable/internal/connection_refresh_state.h b/google/cloud/bigtable/internal/connection_refresh_state.h index 4e4f9f3dcfdd5..7bfc1e807bb27 100644 --- a/google/cloud/bigtable/internal/connection_refresh_state.h +++ b/google/cloud/bigtable/internal/connection_refresh_state.h @@ -86,7 +86,8 @@ class ConnectionRefreshState { void ScheduleChannelRefresh( std::shared_ptr const& cq, std::shared_ptr const& state, - std::shared_ptr const& channel); + std::shared_ptr const& channel, + std::function connection_status_fn = {}); GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END } // namespace bigtable_internal diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h index a425406fdf839..c6838db547630 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -31,19 +31,31 @@ namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN template -class StubUsageWrapper - : public std::enable_shared_from_this> { +class ChannelUsageWrapper + : public std::enable_shared_from_this> { public: - explicit StubUsageWrapper(std::shared_ptr stub) : stub_(std::move(stub)) {} + explicit ChannelUsageWrapper(std::shared_ptr stub) + : stub_(std::move(stub)) {} // This value is a snapshot and can change immediately after the lock is // released. - int outstanding_rpcs() const { + StatusOr outstanding_rpcs() const { std::unique_lock lk(mu_); + if (!last_refresh_status_.ok()) return last_refresh_status_; return outstanding_rpcs_; } - std::weak_ptr> MakeWeak() { + Status LastRefreshStatus() const { + std::unique_lock lk(mu_); + return last_refresh_status_; + } + + void SetLastRefreshStatus(Status s) { + std::unique_lock lk(mu_); + last_refresh_status_ = std::move(s); + } + + std::weak_ptr> MakeWeak() { return this->shared_from_this(); } @@ -62,6 +74,7 @@ class StubUsageWrapper mutable std::mutex mu_; std::shared_ptr stub_; int outstanding_rpcs_ = 0; + Status last_refresh_status_; }; template @@ -95,6 +108,12 @@ class DynamicChannelPool // outstanding RPCs on that channel are completed before destroying it. std::chrono::milliseconds remove_channel_polling_interval = std::chrono::milliseconds(30 * 1000); + + // Limits how large the pool can grow. + std::size_t maximum_channel_pool_size; + + // This is set to the initial channel pool size. + std::size_t minimum_channel_pool_size; }; static std::shared_ptr Create( @@ -102,8 +121,9 @@ class DynamicChannelPool StubFactoryFn stub_factory_fn, std::shared_ptr refresh_state, SizingPolicy sizing_policy = {}) { - std::cout << __PRETTY_FUNCTION__ << std::endl; - std::vector>> initial_wrapped_channels; + std::cout << __PRETTY_FUNCTION__ << ": enter" << std::endl; + std::vector>> + initial_wrapped_channels; for (std::size_t i = 0; i < initial_size; ++i) { initial_wrapped_channels.emplace_back(stub_factory_fn()); } @@ -111,19 +131,34 @@ class DynamicChannelPool std::move(cq), std::move(initial_wrapped_channels), std::move(refresh_state), std::move(stub_factory_fn), std::move(sizing_policy))); + std::cout << __PRETTY_FUNCTION__ << ": return pool" << std::endl; + return pool; } static std::shared_ptr Create( CompletionQueue cq, std::vector> initial_channels, std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { - std::cout << __PRETTY_FUNCTION__ << std::endl; + std::cout << __PRETTY_FUNCTION__ << ": enter" << std::endl; auto pool = std::shared_ptr(new DynamicChannelPool( std::move(cq), std::move(initial_channels), std::move(refresh_state), std::move(stub_factory_fn), std::move(sizing_policy))); + std::cout << __PRETTY_FUNCTION__ << ": return pool" << std::endl; return pool; } + ~DynamicChannelPool() { + std::unique_lock lk(mu_); + // Eventually the channel refresh chain will terminate after this class is + // destroyed. But only after the timer futures expire on the CompletionQueue + // performing this work. We might as well cancel those timer futures now. + refresh_state_->timers().CancelAll(); + if (remove_channel_poll_timer_.valid()) remove_channel_poll_timer_.cancel(); + if (pool_resize_cooldown_timer_ && remove_channel_poll_timer_.valid()) { + pool_resize_cooldown_timer_->cancel(); + } + } + // This is a snapshot aka dirty read as the size could immediately change // after this function returns. std::size_t size() const { @@ -131,10 +166,14 @@ class DynamicChannelPool return channels_.size(); } - std::shared_ptr> GetChannelRandomTwoLeastUsed() { + std::shared_ptr> GetChannelRandomTwoLeastUsed() { std::unique_lock lk(mu_); std::cout << __PRETTY_FUNCTION__ << ": channels_size()=" << channels_.size() << std::endl; + // for (auto const& c : channels_) { + // std::cout << __PRETTY_FUNCTION__ << ": channel=" << c.get() << + // std::endl; + // } if (!pool_resize_cooldown_timer_) { CheckPoolChannelHealth(lk); @@ -144,32 +183,95 @@ class DynamicChannelPool CheckPoolChannelHealth(lk); } - if (channels_.size() == 1) { - return channels_.front(); + // std::cout << __PRETTY_FUNCTION__ << ": finished + // CheckPoolChannelHealth" + // << std::endl; + std::vector< + typename std::vector>>::iterator> + iterators(channels_.size()); + + std::iota(iterators.begin(), iterators.end(), channels_.begin()); + std::shuffle(iterators.begin(), iterators.end(), rng_); + + // std::cout << __PRETTY_FUNCTION__ << ": shuffled iterators" << + // std::endl; + // std::vector< + // typename + // std::vector>>::iterator> + // iterators; + typename std::vector>>::iterator>::iterator + shuffle_iter = iterators.begin(); + typename std::vector>>::iterator + channel_1 = *shuffle_iter; + std::shared_ptr> c = *channel_1; + // std::cout << __PRETTY_FUNCTION__ + // << ": check channel 1=" << c.get() << std::endl; + auto channel_1_rpcs = shuffle_iter != iterators.end() + ? (*channel_1)->outstanding_rpcs() + : Status{StatusCode::kNotFound, ""}; + ++shuffle_iter; + typename std::vector>>::iterator + channel_2 = *shuffle_iter; + // We want to snapshot these outstanding_rpcs values. + // std::cout << __PRETTY_FUNCTION__ + // << ": check channel 2=" << (channel_2)->get() << std::endl; + auto channel_2_rpcs = shuffle_iter != iterators.end() + ? (*channel_2)->outstanding_rpcs() + : Status{StatusCode::kNotFound, ""}; + // This is the ideal (and most common ) case so we try it first. + // std::cout << __PRETTY_FUNCTION__ << ": compare channel rpcs" << + // std::endl; + if (channel_1_rpcs.ok() && channel_2_rpcs.ok()) { + std::cout << __PRETTY_FUNCTION__ << ": 2 ok channels, returning smaller" + << std::endl; + return *channel_1_rpcs < *channel_2_rpcs ? *channel_1 : *channel_2; } - std::shared_ptr> channel_1; - std::shared_ptr> channel_2; - if (channels_.size() == 2) { - channel_1 = channels_[0]; - channel_2 = channels_[1]; - } else { - std::vector indices(channels_.size()); - // TODO(sdhart): Maybe use iota on iterators instead of indices - std::iota(indices.begin(), indices.end(), 0); - std::shuffle(indices.begin(), indices.end(), rng_); + // We have one or more bad channels. Spending time finding a good channel + // will be cheaper than trying to use a bad channel in the long run. + std::vector< + typename std::vector>>::iterator> + bad_channels; + while (!channel_1_rpcs.ok() && shuffle_iter != iterators.end()) { + bad_channels.push_back(channel_1); + ++shuffle_iter; + channel_1 = *shuffle_iter; + channel_1_rpcs = shuffle_iter != iterators.end() + ? (*channel_1)->outstanding_rpcs() + : Status{StatusCode::kNotFound, ""}; + } - channel_1 = channels_[indices[0]]; - channel_2 = channels_[indices[1]]; + while (!channel_2_rpcs.ok() && shuffle_iter != iterators.end()) { + bad_channels.push_back(channel_2); + ++shuffle_iter; + channel_2 = *shuffle_iter; + channel_2_rpcs = shuffle_iter != iterators.end() + ? (*channel_2)->outstanding_rpcs() + : Status{StatusCode::kNotFound, ""}; + } + + if (channel_1_rpcs.ok() && channel_2_rpcs.ok()) { + std::cout << __PRETTY_FUNCTION__ << ": 2 ok channels" << std::endl; + return *channel_1_rpcs < *channel_2_rpcs ? *channel_1 : *channel_2; + } + if (channel_1_rpcs.ok()) { + std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_1 ok" << std::endl; + return *channel_1; } - return channel_1->outstanding_rpcs() < channel_2->outstanding_rpcs() - ? channel_1 - : channel_2; + if (channel_2_rpcs.ok()) { + std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_2 ok" << std::endl; + return *channel_2; + } + + // TODO: we have no usable channels in the entire pool; this is bad. + std::cout << __PRETTY_FUNCTION__ << ": NO USABLE CHANNELS" << std::endl; + return nullptr; } private: DynamicChannelPool(CompletionQueue cq, - std::vector>> + std::vector>> initial_wrapped_channels, std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) @@ -178,7 +280,9 @@ class DynamicChannelPool stub_factory_fn_(std::move(stub_factory_fn)), channels_(std::move(initial_wrapped_channels)), sizing_policy_(std::move(sizing_policy)), - next_channel_id_(channels_.size()) {} + next_channel_id_(channels_.size()) { + sizing_policy_.minimum_channel_pool_size = channels_.size(); + } DynamicChannelPool(CompletionQueue cq, std::vector> initial_channels, @@ -194,30 +298,40 @@ class DynamicChannelPool channels_.reserve(initial_channels.size()); for (auto& channel : initial_channels) { channels_.push_back( - std::make_shared>(std::move(channel))); + std::make_shared>(std::move(channel))); } + sizing_policy_.minimum_channel_pool_size = channels_.size(); } struct ChannelAddVisitor { std::size_t pool_size; explicit ChannelAddVisitor(std::size_t pool_size) : pool_size(pool_size) {} - int operator()(typename SizingPolicy::DiscreteChannels const& c) { + std::size_t operator()(typename SizingPolicy::DiscreteChannels const& c) { return c.number; } - int operator()(typename SizingPolicy::PercentageOfPoolSize const& c) { - return static_cast( + std::size_t operator()( + typename SizingPolicy::PercentageOfPoolSize const& c) { + return static_cast( std::floor(static_cast(pool_size) * c.percentage)); } }; void ScheduleAddChannel(std::unique_lock const&) { - auto num_channels_to_add = - absl::visit(ChannelAddVisitor(channels_.size()), - sizing_policy_.channels_to_add_per_resize); + std::size_t num_channels_to_add; + // If we're undersized due to bad channels, get us back to the minimum size. + if (channels_.size() < sizing_policy_.minimum_channel_pool_size) { + num_channels_to_add = + sizing_policy_.minimum_channel_pool_size - channels_.size(); + } else { + num_channels_to_add = + std::min(sizing_policy_.maximum_channel_pool_size - channels_.size(), + absl::visit(ChannelAddVisitor(channels_.size()), + sizing_policy_.channels_to_add_per_resize)); + } std::vector new_channel_ids; new_channel_ids.reserve(num_channels_to_add); - for (int i = 0; i < num_channels_to_add; ++i) { + for (std::size_t i = 0; i < num_channels_to_add; ++i) { new_channel_ids.push_back(next_channel_id_++); } @@ -231,11 +345,11 @@ class DynamicChannelPool } void AddChannel(std::vector const& new_channel_ids) { - std::vector>> new_stubs; + std::vector>> new_stubs; new_stubs.reserve(new_channel_ids.size()); for (auto const& id : new_channel_ids) { new_stubs.push_back( - std::make_shared>(stub_factory_fn_(id))); + std::make_shared>(stub_factory_fn_(id))); } std::unique_lock lk(mu_); channels_.insert(channels_.end(), @@ -261,17 +375,25 @@ class DynamicChannelPool void RemoveChannel() { std::unique_lock lk(mu_); std::sort(draining_channels_.begin(), draining_channels_.end(), - [](std::shared_ptr> const& a, - std::shared_ptr> b) { - return a->outstanding_rpcs() > b->outstanding_rpcs(); + [](std::shared_ptr> const& a, + std::shared_ptr> b) { + auto rpcs_a = a->outstanding_rpcs(); + auto rpcs_b = b->outstanding_rpcs(); + if (!rpcs_a.ok()) return false; + if (!rpcs_b.ok()) return true; + return *rpcs_a > *rpcs_b; }); while (!draining_channels_.empty()) { - if (draining_channels_.back()->outstanding_rpcs() != 0) { + auto outstanding_rpcs = draining_channels_.back()->outstanding_rpcs(); + if (outstanding_rpcs.ok() && *outstanding_rpcs != 0) { ScheduleRemoveChannel(lk); return; } draining_channels_.pop_back(); } + // TODO: If iterators becomes a member variable perhaps add logic to call + // shrink_to_fit on iterators_ if there's a large + // difference between iterators_.capacity and channels_.size } void SetResizeCooldownTimer(std::unique_lock const&) { @@ -281,12 +403,19 @@ class DynamicChannelPool void CheckPoolChannelHealth(std::unique_lock const& lk) { int average_rpc_per_channel = - std::accumulate( - channels_.begin(), channels_.end(), 0, - [](int a, auto const& b) { return a + b->outstanding_rpcs(); }) / + std::accumulate(channels_.begin(), channels_.end(), 0, + [](int a, auto const& b) { + auto rpcs_b = b->outstanding_rpcs(); + return a + (rpcs_b.ok() ? *rpcs_b : 0); + }) / static_cast(channels_.size()); + std::cout << __PRETTY_FUNCTION__ + << ": channels_.size()=" << channels_.size() + << "; sizing_policy_.minimum_channel_pool_size=" + << sizing_policy_.minimum_channel_pool_size << std::endl; if (average_rpc_per_channel < - sizing_policy_.minimum_average_outstanding_rpcs_per_channel) { + sizing_policy_.minimum_average_outstanding_rpcs_per_channel && + channels_.size() > sizing_policy_.minimum_channel_pool_size) { auto random_channel = std::uniform_int_distribution( 0, channels_.size() - 1)(rng_); std::swap(channels_[random_channel], channels_.back()); @@ -296,7 +425,8 @@ class DynamicChannelPool SetResizeCooldownTimer(lk); } if (average_rpc_per_channel > - sizing_policy_.maximum_average_outstanding_rpcs_per_channel) { + sizing_policy_.maximum_average_outstanding_rpcs_per_channel && + channels_.size() < sizing_policy_.maximum_channel_pool_size) { // Channel/stub creation is expensive, instead of making the current RPC // wait on this, use an existing channel right now, and schedule a channel // to be added. @@ -310,13 +440,17 @@ class DynamicChannelPool google::cloud::internal::DefaultPRNG rng_; std::shared_ptr refresh_state_; StubFactoryFn stub_factory_fn_; - std::vector>> channels_; + std::vector>> channels_; SizingPolicy sizing_policy_; - std::vector>> draining_channels_; + std::vector>> draining_channels_; future remove_channel_poll_timer_; absl::optional>> pool_resize_cooldown_timer_ = absl::nullopt; int next_channel_id_; + // std::vector< + // typename + // std::vector>>::iterator> + // iterators_; }; GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc index ed4333fd8e010..38c7deb2afb7c 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc +++ b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "google/cloud/bigtable/internal/dynamic_channel_pool.h" +#include "google/cloud/bigtable/testing/mock_bigtable_stub.h" +#include "google/cloud/testing_util/fake_completion_queue_impl.h" #include "google/cloud/testing_util/status_matchers.h" #include @@ -20,7 +22,37 @@ namespace google { namespace cloud { namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN -namespace internal {} // namespace internal +namespace { + +using ::google::cloud::bigtable::testing::MockBigtableStub; +using ::google::cloud::testing_util::FakeCompletionQueueImpl; + +TEST(DynamicChannelPoolTest, GetChannelRandomTwoLeastUsed) { + auto fake_cq_impl = std::make_shared(); + + auto refresh_state = std::make_shared( + fake_cq_impl, std::chrono::milliseconds(1), + std::chrono::milliseconds(10)); + + auto stub_factory_fn = [](int) -> std::shared_ptr { + return std::make_shared(); + }; + + DynamicChannelPool::SizingPolicy sizing_policy; + + std::vector> channels(10); + int id = 0; + std::generate(channels.begin(), channels.end(), + [&]() { return stub_factory_fn(id++); }); + + auto pool = DynamicChannelPool::Create( + CompletionQueue(fake_cq_impl), channels, refresh_state, stub_factory_fn, + sizing_policy); + + auto selected_stub = pool->GetChannelRandomTwoLeastUsed(); +} + +} // namespace GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END } // namespace bigtable_internal } // namespace cloud diff --git a/google/cloud/bigtable/options.h b/google/cloud/bigtable/options.h index 865d5c009547e..90ab14f1e5ec9 100644 --- a/google/cloud/bigtable/options.h +++ b/google/cloud/bigtable/options.h @@ -203,6 +203,15 @@ struct QueryPlanRefreshFunctionRetryPolicyOption { using Type = std::shared_ptr; }; +/** + * Option to select between a static sized or dynamically resizing channel pool. + * The default is the static sized pool. + */ +enum class ChannelPoolType { kStatic, kDynamic }; +struct ChannelPoolTypeOption { + using Type = ChannelPoolType; +}; + } // namespace experimental /// The complete list of options accepted by `bigtable::*Client` diff --git a/google/cloud/bigtable/test_proxy/cbt_test_proxy.cc b/google/cloud/bigtable/test_proxy/cbt_test_proxy.cc index 0a390eaee1528..6df93aa3c2c6f 100644 --- a/google/cloud/bigtable/test_proxy/cbt_test_proxy.cc +++ b/google/cloud/bigtable/test_proxy/cbt_test_proxy.cc @@ -93,6 +93,8 @@ grpc::Status CbtTestProxy::CreateClient( auto options = Options{} + .set( + bigtable::experimental::ChannelPoolType::kDynamic) .set(request->data_target()) .set(grpc::InsecureChannelCredentials()) .set(std::chrono::milliseconds(0)) diff --git a/google/cloud/bigtable/testing/table_integration_test.cc b/google/cloud/bigtable/testing/table_integration_test.cc index 0cfabf9c990f0..e41e1369e385b 100644 --- a/google/cloud/bigtable/testing/table_integration_test.cc +++ b/google/cloud/bigtable/testing/table_integration_test.cc @@ -123,7 +123,11 @@ void TableAdminTestEnvironment::TearDown() { void TableIntegrationTest::SetUp() { std::cout << __PRETTY_FUNCTION__ << std::endl; - data_connection_ = MakeDataConnection(); + auto options = Options{} + .set( + experimental::ChannelPoolType::kDynamic) + .set(10); + data_connection_ = MakeDataConnection(options); data_client_ = bigtable::MakeDataClient(TableTestEnvironment::project_id(), TableTestEnvironment::instance_id()); diff --git a/google/cloud/bigtable/tests/data_integration_test.cc b/google/cloud/bigtable/tests/data_integration_test.cc index f81fc1472b4de..f373fd5bf57e3 100644 --- a/google/cloud/bigtable/tests/data_integration_test.cc +++ b/google/cloud/bigtable/tests/data_integration_test.cc @@ -206,7 +206,7 @@ TEST_P(DataIntegrationTest, TableReadRowNotExistTest) { } TEST_P(DataIntegrationTest, TableReadRowsAllRows) { - std::cout << __PRETTY_FUNCTION__ << std::endl; + std::cout << __PRETTY_FUNCTION__ << ": ENTER TEST CASE" << std::endl; auto table = GetTable(GetParam()); std::string const row_key1 = "row-key-1"; std::string const row_key2 = "row-key-2"; @@ -220,6 +220,7 @@ TEST_P(DataIntegrationTest, TableReadRowsAllRows) { CreateCells(table, created); + std::cout << __PRETTY_FUNCTION__ << ": START READING ROWS" << std::endl; // Some equivalent ways to read the three rows auto read1 = table.ReadRows(RowSet(RowRange::InfiniteRange()), Filter::PassAllFilter()); From 81631aaa9f683a751271f43f8ff632403e526974 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Tue, 9 Dec 2025 19:14:30 -0500 Subject: [PATCH 09/11] connection refresh callback sets status --- .../builds/integration-production.sh | 2 +- ...igtable_random_two_least_used_decorator.cc | 9 -- ...bigtable_random_two_least_used_decorator.h | 9 +- .../internal/bigtable_stub_factory.cc | 53 ++++++++---- .../bigtable/internal/bigtable_stub_factory.h | 8 +- .../internal/connection_refresh_state.cc | 3 +- .../bigtable/internal/dynamic_channel_pool.h | 85 ++++++------------- .../internal/dynamic_channel_pool_test.cc | 8 +- 8 files changed, 78 insertions(+), 99 deletions(-) diff --git a/ci/cloudbuild/builds/integration-production.sh b/ci/cloudbuild/builds/integration-production.sh index 3edee6d29aa8f..e0783aa8559bc 100755 --- a/ci/cloudbuild/builds/integration-production.sh +++ b/ci/cloudbuild/builds/integration-production.sh @@ -39,8 +39,8 @@ excluded_rules=( "-//google/cloud/storagecontrol:v2_samples_storage_control_anywhere_cache_samples" ) +# --test_filter="*ReadRowsAllRows*" --test_timeout=30 \ io::log_h2 "Running the integration tests against prod" mapfile -t integration_args < <(integration::bazel_args) io::run bazel test "${args[@]}" "${integration_args[@]}" --test_output=all \ - --test_filter="*ReadRowsAllRows*" --test_timeout=30 \ //google/cloud/bigtable/tests:data_integration_test diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index a70a77f74e546..22cabf4a6e63d 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -73,15 +73,6 @@ class AsyncStreamingReadRpcTracking } // namespace -BigtableRandomTwoLeastUsed::BigtableRandomTwoLeastUsed( - CompletionQueue cq, std::shared_ptr refresh_state, - DynamicChannelPool::StubFactoryFn - refreshing_channel_stub_factory_fn, - std::vector> children) - : pool_(DynamicChannelPool::Create( - std::move(cq), std::move(children), std::move(refresh_state), - std::move(refreshing_channel_stub_factory_fn))) {} - std::unique_ptr> BigtableRandomTwoLeastUsed::ReadRows( diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h index b1d35d1010653..e83b70dc2418d 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -29,11 +29,10 @@ GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN class BigtableRandomTwoLeastUsed : public BigtableStub { public: - BigtableRandomTwoLeastUsed( - CompletionQueue cq, std::shared_ptr refresh_state, - DynamicChannelPool::StubFactoryFn - refreshing_channel_stub_factory_fn, - std::vector> children); + explicit BigtableRandomTwoLeastUsed( + std::shared_ptr> pool) + : pool_(std::move(pool)) {} + ~BigtableRandomTwoLeastUsed() override = default; std::unique_ptr CreateBigtableStubRoundRobin( } std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( - CompletionQueue cq, Options const& options, - std::function(int)> - refreshing_channel_stub_factory, + std::shared_ptr auth, + std::shared_ptr cq_impl, + Options const& options, BaseBigtableStubFactory stub_factory, std::shared_ptr refresh_state) { std::cout << __PRETTY_FUNCTION__ << std::endl; - std::vector> children( - (std::max)(1, options.get())); - int id = 0; + + auto refreshing_channel_stub_factory = + [stub_factory = std::move(stub_factory), cq_impl, refresh_state, + auth = std::move(auth), options](std::uint32_t id) + -> std::shared_ptr> { + auto wrapper = std::make_shared>(); + auto connection_status_fn = [weak = wrapper->MakeWeak()](Status const& s) { + if (auto self = weak.lock()) { + self->SetLastRefreshStatus(s); + } + if (!s.ok()) { + GCP_LOG(WARNING) << "Failed to refresh connection. Error: " << s; + } + }; + auto channel = CreateGrpcChannel(*auth, options, id); + ScheduleChannelRefresh(cq_impl, refresh_state, channel, + std::move(connection_status_fn)); + wrapper->set_channel(stub_factory(std::move(channel))); + return wrapper; + }; + + std::vector>> children( + std::max(1, options.get())); + std::uint32_t id = 0; std::generate(children.begin(), children.end(), [&id, &refreshing_channel_stub_factory] { return refreshing_channel_stub_factory(id++); }); - std::cout << __PRETTY_FUNCTION__ << ": children.size()=" << children.size() - << std::endl; + return std::make_shared( - std::move(cq), std::move(refresh_state), refreshing_channel_stub_factory, - std::move(children)); + DynamicChannelPool::Create( + CompletionQueue(std::move(cq_impl)), std::move(children), + std::move(refresh_state), + std::move(refreshing_channel_stub_factory))); + // CompletionQueue(cq_impl), std::move(refresh_state), + // std::move(refreshing_channel_stub_factory), std::move(children)); } std::shared_ptr CreateDecoratedStubs( @@ -109,14 +133,9 @@ std::shared_ptr CreateDecoratedStubs( if (options.has() && options.get() == bigtable::experimental::ChannelPoolType::kDynamic) { - auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, - &auth, options](int id) { - auto channel = CreateGrpcChannel(*auth, options, id); - ScheduleChannelRefresh(cq_impl, refresh, channel); - return stub_factory(std::move(channel)); - }; stub = CreateBigtableStubRandomTwoLeastUsed( - cq, options, std::move(refreshing_channel_stub_factory), + auth, std::move(cq_impl), options, stub_factory, + // std::move(refreshing_channel_stub_factory), std::move(refresh)); } else { auto refreshing_channel_stub_factory = [stub_factory, cq_impl, refresh, diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.h b/google/cloud/bigtable/internal/bigtable_stub_factory.h index f9111abbb5e88..346014fb85b97 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.h +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.h @@ -43,9 +43,11 @@ std::shared_ptr CreateBigtableStubRoundRobin( refreshing_channel_stub_factory); std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( - CompletionQueue cq, Options const& options, - std::function(int)> - refreshing_channel_stub_factory, + std::shared_ptr auth, + std::shared_ptr cq_impl, + Options const& options, BaseBigtableStubFactory stub_factory, + // std::function(int)> + // refreshing_channel_stub_factory, std::shared_ptr refresh_state); /// Used in testing to create decorated mocks. diff --git a/google/cloud/bigtable/internal/connection_refresh_state.cc b/google/cloud/bigtable/internal/connection_refresh_state.cc index db6a0fbeec76e..603878a13ffc0 100644 --- a/google/cloud/bigtable/internal/connection_refresh_state.cc +++ b/google/cloud/bigtable/internal/connection_refresh_state.cc @@ -95,8 +95,7 @@ void ScheduleChannelRefresh( connection_status_fn = std::move(connection_status_fn)]( future fut) { auto conn_status = fut.get(); - if (connection_status_fn) connection_status_fn(conn_status); - + connection_status_fn(conn_status); auto channel = weak_channel.lock(); if (!channel) return; auto cq_impl = weak_cq_impl.lock(); diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h index c6838db547630..dbe5983223c41 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -34,6 +34,7 @@ template class ChannelUsageWrapper : public std::enable_shared_from_this> { public: + ChannelUsageWrapper() = default; explicit ChannelUsageWrapper(std::shared_ptr stub) : stub_(std::move(stub)) {} @@ -55,6 +56,11 @@ class ChannelUsageWrapper last_refresh_status_ = std::move(s); } + ChannelUsageWrapper& set_channel(std::shared_ptr channel) { + stub_ = std::move(channel); + return *this; + } + std::weak_ptr> MakeWeak() { return this->shared_from_this(); } @@ -81,7 +87,8 @@ template class DynamicChannelPool : public std::enable_shared_from_this> { public: - using StubFactoryFn = std::function(int id)>; + using StubFactoryFn = + std::function>(std::uint32_t id)>; struct SizingPolicy { // To avoid channel churn, the pool will not add or remove channels more // frequently that this period. @@ -117,26 +124,8 @@ class DynamicChannelPool }; static std::shared_ptr Create( - CompletionQueue cq, std::size_t initial_size, - StubFactoryFn stub_factory_fn, - std::shared_ptr refresh_state, - SizingPolicy sizing_policy = {}) { - std::cout << __PRETTY_FUNCTION__ << ": enter" << std::endl; - std::vector>> - initial_wrapped_channels; - for (std::size_t i = 0; i < initial_size; ++i) { - initial_wrapped_channels.emplace_back(stub_factory_fn()); - } - auto pool = std::shared_ptr(new DynamicChannelPool( - std::move(cq), std::move(initial_wrapped_channels), - std::move(refresh_state), std::move(stub_factory_fn), - std::move(sizing_policy))); - std::cout << __PRETTY_FUNCTION__ << ": return pool" << std::endl; - return pool; - } - - static std::shared_ptr Create( - CompletionQueue cq, std::vector> initial_channels, + CompletionQueue cq, + std::vector>> initial_channels, std::shared_ptr refresh_state, StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { std::cout << __PRETTY_FUNCTION__ << ": enter" << std::endl; @@ -193,17 +182,12 @@ class DynamicChannelPool std::iota(iterators.begin(), iterators.end(), channels_.begin()); std::shuffle(iterators.begin(), iterators.end(), rng_); - // std::cout << __PRETTY_FUNCTION__ << ": shuffled iterators" << - // std::endl; - // std::vector< - // typename - // std::vector>>::iterator> - // iterators; - typename std::vector>>::iterator>::iterator - shuffle_iter = iterators.begin(); - typename std::vector>>::iterator - channel_1 = *shuffle_iter; + // typename std::vector>>::iterator>::iterator + auto shuffle_iter = iterators.begin(); + // typename + // std::vector>>::iterator + auto channel_1 = *shuffle_iter; std::shared_ptr> c = *channel_1; // std::cout << __PRETTY_FUNCTION__ // << ": check channel 1=" << c.get() << std::endl; @@ -211,8 +195,9 @@ class DynamicChannelPool ? (*channel_1)->outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; ++shuffle_iter; - typename std::vector>>::iterator - channel_2 = *shuffle_iter; + // typename + // std::vector>>::iterator + auto channel_2 = *shuffle_iter; // We want to snapshot these outstanding_rpcs values. // std::cout << __PRETTY_FUNCTION__ // << ": check channel 2=" << (channel_2)->get() << std::endl; @@ -264,7 +249,7 @@ class DynamicChannelPool return *channel_2; } - // TODO: we have no usable channels in the entire pool; this is bad. + // TODO(sdhart): we have no usable channels in the entire pool; this is bad. std::cout << __PRETTY_FUNCTION__ << ": NO USABLE CHANNELS" << std::endl; return nullptr; } @@ -280,26 +265,7 @@ class DynamicChannelPool stub_factory_fn_(std::move(stub_factory_fn)), channels_(std::move(initial_wrapped_channels)), sizing_policy_(std::move(sizing_policy)), - next_channel_id_(channels_.size()) { - sizing_policy_.minimum_channel_pool_size = channels_.size(); - } - - DynamicChannelPool(CompletionQueue cq, - std::vector> initial_channels, - std::shared_ptr refresh_state, - StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) - : cq_(std::move(cq)), - refresh_state_(std::move(refresh_state)), - stub_factory_fn_(std::move(stub_factory_fn)), - channels_(), - sizing_policy_(std::move(sizing_policy)), - next_channel_id_(static_cast(initial_channels.size())) { - std::cout << __PRETTY_FUNCTION__ << ": wrap initial_channels" << std::endl; - channels_.reserve(initial_channels.size()); - for (auto& channel : initial_channels) { - channels_.push_back( - std::make_shared>(std::move(channel))); - } + next_channel_id_(static_cast(channels_.size())) { sizing_policy_.minimum_channel_pool_size = channels_.size(); } @@ -348,8 +314,8 @@ class DynamicChannelPool std::vector>> new_stubs; new_stubs.reserve(new_channel_ids.size()); for (auto const& id : new_channel_ids) { - new_stubs.push_back( - std::make_shared>(stub_factory_fn_(id))); + new_stubs.push_back(stub_factory_fn_(id)); + // std::make_shared>(stub_factory_fn_(id))); } std::unique_lock lk(mu_); channels_.insert(channels_.end(), @@ -391,7 +357,8 @@ class DynamicChannelPool } draining_channels_.pop_back(); } - // TODO: If iterators becomes a member variable perhaps add logic to call + // TODO(sdhart): If iterators becomes a member variable perhaps add logic to + // call // shrink_to_fit on iterators_ if there's a large // difference between iterators_.capacity and channels_.size } @@ -446,7 +413,7 @@ class DynamicChannelPool future remove_channel_poll_timer_; absl::optional>> pool_resize_cooldown_timer_ = absl::nullopt; - int next_channel_id_; + std::uint32_t next_channel_id_; // std::vector< // typename // std::vector>>::iterator> diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc index 38c7deb2afb7c..c43c3c2b677b2 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc +++ b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc @@ -34,13 +34,15 @@ TEST(DynamicChannelPoolTest, GetChannelRandomTwoLeastUsed) { fake_cq_impl, std::chrono::milliseconds(1), std::chrono::milliseconds(10)); - auto stub_factory_fn = [](int) -> std::shared_ptr { - return std::make_shared(); + auto stub_factory_fn = + [](int) -> std::shared_ptr> { + auto mock = std::make_shared(); + return std::make_shared>(mock); }; DynamicChannelPool::SizingPolicy sizing_policy; - std::vector> channels(10); + std::vector>> channels(10); int id = 0; std::generate(channels.begin(), channels.end(), [&]() { return stub_factory_fn(id++); }); From e5a1ff8726e4e9775356c9a28ce4a903fec6fb80 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Tue, 9 Dec 2025 20:36:43 -0500 Subject: [PATCH 10/11] factor out EvictBadChannels --- .../bigtable/internal/dynamic_channel_pool.h | 67 ++++++++++++++----- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h index dbe5983223c41..4155c5a748153 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -187,22 +187,22 @@ class DynamicChannelPool auto shuffle_iter = iterators.begin(); // typename // std::vector>>::iterator - auto channel_1 = *shuffle_iter; - std::shared_ptr> c = *channel_1; + auto channel_1_iter = *shuffle_iter; + std::shared_ptr> c = *channel_1_iter; // std::cout << __PRETTY_FUNCTION__ // << ": check channel 1=" << c.get() << std::endl; auto channel_1_rpcs = shuffle_iter != iterators.end() - ? (*channel_1)->outstanding_rpcs() + ? (*channel_1_iter)->outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; ++shuffle_iter; // typename // std::vector>>::iterator - auto channel_2 = *shuffle_iter; + auto channel_2_iter = *shuffle_iter; // We want to snapshot these outstanding_rpcs values. // std::cout << __PRETTY_FUNCTION__ // << ": check channel 2=" << (channel_2)->get() << std::endl; auto channel_2_rpcs = shuffle_iter != iterators.end() - ? (*channel_2)->outstanding_rpcs() + ? (*channel_2_iter)->outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; // This is the ideal (and most common ) case so we try it first. // std::cout << __PRETTY_FUNCTION__ << ": compare channel rpcs" << @@ -210,43 +210,49 @@ class DynamicChannelPool if (channel_1_rpcs.ok() && channel_2_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": 2 ok channels, returning smaller" << std::endl; - return *channel_1_rpcs < *channel_2_rpcs ? *channel_1 : *channel_2; + return *channel_1_rpcs < *channel_2_rpcs ? *channel_1_iter + : *channel_2_iter; } // We have one or more bad channels. Spending time finding a good channel // will be cheaper than trying to use a bad channel in the long run. std::vector< typename std::vector>>::iterator> - bad_channels; + bad_channel_iters; + while (!channel_1_rpcs.ok() && shuffle_iter != iterators.end()) { - bad_channels.push_back(channel_1); + bad_channel_iters.push_back(channel_1_iter); ++shuffle_iter; - channel_1 = *shuffle_iter; + channel_1_iter = *shuffle_iter; channel_1_rpcs = shuffle_iter != iterators.end() - ? (*channel_1)->outstanding_rpcs() + ? (*channel_1_iter)->outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; } while (!channel_2_rpcs.ok() && shuffle_iter != iterators.end()) { - bad_channels.push_back(channel_2); + bad_channel_iters.push_back(channel_2_iter); ++shuffle_iter; - channel_2 = *shuffle_iter; + channel_2_iter = *shuffle_iter; channel_2_rpcs = shuffle_iter != iterators.end() - ? (*channel_2)->outstanding_rpcs() + ? (*channel_2_iter)->outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; } + EvictBadChannels(lk, bad_channel_iters); + ScheduleRemoveChannel(lk); + if (channel_1_rpcs.ok() && channel_2_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": 2 ok channels" << std::endl; - return *channel_1_rpcs < *channel_2_rpcs ? *channel_1 : *channel_2; + return *channel_1_rpcs < *channel_2_rpcs ? *channel_1_iter + : *channel_2_iter; } if (channel_1_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_1 ok" << std::endl; - return *channel_1; + return *channel_1_iter; } if (channel_2_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_2 ok" << std::endl; - return *channel_2; + return *channel_2_iter; } // TODO(sdhart): we have no usable channels in the entire pool; this is bad. @@ -324,6 +330,9 @@ class DynamicChannelPool } void ScheduleRemoveChannel(std::unique_lock const&) { + if (remove_channel_poll_timer_.valid()) return; + std::cout << __PRETTY_FUNCTION__ << ": set remove_channel_poll_timer" + << std::endl; std::weak_ptr> foo = this->shared_from_this(); remove_channel_poll_timer_ = cq_.MakeRelativeTimer(sizing_policy_.remove_channel_polling_interval) @@ -351,7 +360,7 @@ class DynamicChannelPool }); while (!draining_channels_.empty()) { auto outstanding_rpcs = draining_channels_.back()->outstanding_rpcs(); - if (outstanding_rpcs.ok() && *outstanding_rpcs != 0) { + if (outstanding_rpcs.ok() && *outstanding_rpcs > 0) { ScheduleRemoveChannel(lk); return; } @@ -363,6 +372,28 @@ class DynamicChannelPool // difference between iterators_.capacity and channels_.size } + void EvictBadChannels(std::unique_lock const&, + std::vector>>::iterator>& + bad_channel_iters) { + auto back_iter = channels_.rbegin(); + for (auto& bad_channel_iter : bad_channel_iters) { + bool swapped = false; + while (!swapped) { + auto b = (*back_iter)->outstanding_rpcs(); + if (b.ok()) { + std::swap(*back_iter, *bad_channel_iter); + draining_channels_.push_back(std::move(*back_iter)); + swapped = true; + } + ++back_iter; + } + } + for (std::size_t i = 0; i < bad_channel_iters.size(); ++i) { + channels_.pop_back(); + } + } + void SetResizeCooldownTimer(std::unique_lock const&) { pool_resize_cooldown_timer_ = cq_.MakeRelativeTimer(sizing_policy_.pool_resize_cooldown_interval); @@ -380,6 +411,7 @@ class DynamicChannelPool << ": channels_.size()=" << channels_.size() << "; sizing_policy_.minimum_channel_pool_size=" << sizing_policy_.minimum_channel_pool_size << std::endl; + // TODO(sdhart): do we need to check if we're over max pool size here? if (average_rpc_per_channel < sizing_policy_.minimum_average_outstanding_rpcs_per_channel && channels_.size() > sizing_policy_.minimum_channel_pool_size) { @@ -391,6 +423,7 @@ class DynamicChannelPool ScheduleRemoveChannel(lk); SetResizeCooldownTimer(lk); } + // TODO(sdhart): do we need to check if we're under min pool size here? if (average_rpc_per_channel > sizing_policy_.maximum_average_outstanding_rpcs_per_channel && channels_.size() < sizing_policy_.maximum_channel_pool_size) { From 59d22fbcd6b03b58b25bbf5b1b6339cfb8be2138 Mon Sep 17 00:00:00 2001 From: Scott Hart Date: Thu, 11 Dec 2025 20:16:23 -0500 Subject: [PATCH 11/11] average per minute with tests --- ...igtable_random_two_least_used_decorator.cc | 2 +- ...bigtable_random_two_least_used_decorator.h | 4 +- .../internal/bigtable_stub_factory.cc | 16 +- .../internal/connection_refresh_state.cc | 12 +- .../bigtable/internal/dynamic_channel_pool.h | 238 ++++++++++-------- .../internal/dynamic_channel_pool_test.cc | 101 +++++++- google/cloud/bigtable/options.h | 38 +++ 7 files changed, 290 insertions(+), 121 deletions(-) diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc index 22cabf4a6e63d..8203234f6325d 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.cc @@ -329,7 +329,7 @@ BigtableRandomTwoLeastUsed::AsyncPrepareQuery( return result; } -std::shared_ptr> +std::shared_ptr> BigtableRandomTwoLeastUsed::Child() { std::cout << __PRETTY_FUNCTION__ << std::endl; return pool_->GetChannelRandomTwoLeastUsed(); diff --git a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h index e83b70dc2418d..8ee6a7504fb90 100644 --- a/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h +++ b/google/cloud/bigtable/internal/bigtable_random_two_least_used_decorator.h @@ -129,9 +129,7 @@ class BigtableRandomTwoLeastUsed : public BigtableStub { google::bigtable::v2::PrepareQueryRequest const& request) override; private: - std::shared_ptr> Child(); - - // std::mutex mu_; + std::shared_ptr> Child(); std::shared_ptr> pool_; }; diff --git a/google/cloud/bigtable/internal/bigtable_stub_factory.cc b/google/cloud/bigtable/internal/bigtable_stub_factory.cc index 3803f359f4332..e0f0537c5c8a8 100644 --- a/google/cloud/bigtable/internal/bigtable_stub_factory.cc +++ b/google/cloud/bigtable/internal/bigtable_stub_factory.cc @@ -85,30 +85,34 @@ std::shared_ptr CreateBigtableStubRandomTwoLeastUsed( auto refreshing_channel_stub_factory = [stub_factory = std::move(stub_factory), cq_impl, refresh_state, - auth = std::move(auth), options](std::uint32_t id) - -> std::shared_ptr> { - auto wrapper = std::make_shared>(); + auth = std::move(auth), options]( + std::uint32_t id, + bool prime_channel) -> std::shared_ptr> { + auto wrapper = std::make_shared>(); auto connection_status_fn = [weak = wrapper->MakeWeak()](Status const& s) { if (auto self = weak.lock()) { - self->SetLastRefreshStatus(s); + self->set_last_refresh_status(s); } if (!s.ok()) { GCP_LOG(WARNING) << "Failed to refresh connection. Error: " << s; } }; auto channel = CreateGrpcChannel(*auth, options, id); + if (prime_channel) { + (void)channel->GetState(true); + } ScheduleChannelRefresh(cq_impl, refresh_state, channel, std::move(connection_status_fn)); wrapper->set_channel(stub_factory(std::move(channel))); return wrapper; }; - std::vector>> children( + std::vector>> children( std::max(1, options.get())); std::uint32_t id = 0; std::generate(children.begin(), children.end(), [&id, &refreshing_channel_stub_factory] { - return refreshing_channel_stub_factory(id++); + return refreshing_channel_stub_factory(id++, false); }); return std::make_shared( diff --git a/google/cloud/bigtable/internal/connection_refresh_state.cc b/google/cloud/bigtable/internal/connection_refresh_state.cc index 603878a13ffc0..04ed8afe1650e 100644 --- a/google/cloud/bigtable/internal/connection_refresh_state.cc +++ b/google/cloud/bigtable/internal/connection_refresh_state.cc @@ -30,6 +30,12 @@ namespace { */ auto constexpr kConnectionReadyTimeout = std::chrono::seconds(10); +void LogFailedConnectionRefresh(Status const& conn_status) { + if (!conn_status.ok()) { + GCP_LOG(WARNING) << "Failed to refresh connection. Error: " << conn_status; + } +} + } // namespace ConnectionRefreshState::ConnectionRefreshState( @@ -53,12 +59,6 @@ bool ConnectionRefreshState::enabled() const { return max_conn_refresh_period_.count() != 0; } -void LogFailedConnectionRefresh(Status const& conn_status) { - if (!conn_status.ok()) { - GCP_LOG(WARNING) << "Failed to refresh connection. Error: " << conn_status; - } -} - void ScheduleChannelRefresh( std::shared_ptr const& cq_impl, std::shared_ptr const& state, diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool.h b/google/cloud/bigtable/internal/dynamic_channel_pool.h index 4155c5a748153..f36d97ec6148d 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool.h +++ b/google/cloud/bigtable/internal/dynamic_channel_pool.h @@ -16,7 +16,9 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_DYNAMIC_CHANNEL_POOL_H #include "google/cloud/bigtable/internal/connection_refresh_state.h" +#include "google/cloud/bigtable/options.h" #include "google/cloud/completion_queue.h" +#include "google/cloud/internal/clock.h" #include "google/cloud/internal/random.h" #include "google/cloud/version.h" #include @@ -31,103 +33,98 @@ namespace bigtable_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN template -class ChannelUsageWrapper - : public std::enable_shared_from_this> { +class ChannelUsage : public std::enable_shared_from_this> { public: - ChannelUsageWrapper() = default; - explicit ChannelUsageWrapper(std::shared_ptr stub) - : stub_(std::move(stub)) {} + using Clock = ::google::cloud::internal::SteadyClock; + ChannelUsage() : clock_(std::make_shared()) {} + explicit ChannelUsage(std::shared_ptr stub, std::shared_ptr clock = + std::make_shared()) + : stub_(std::move(stub)), clock_(std::move(clock)) {} - // This value is a snapshot and can change immediately after the lock is - // released. - StatusOr outstanding_rpcs() const { + StatusOr average_outstanding_rpcs() { std::unique_lock lk(mu_); if (!last_refresh_status_.ok()) return last_refresh_status_; - return outstanding_rpcs_; + if (measurements_.empty()) return 0; + auto time_point = clock_->Now() - std::chrono::seconds(60); + auto iter = std::find_if( + measurements_.begin(), measurements_.end(), + [&](Measurement const& m) { return m.timestamp >= time_point; }); + int num_measurements = 0; + int last_minute_sum = std::accumulate( + iter, measurements_.end(), 0, [&](int a, Measurement const& b) mutable { + ++num_measurements; + return a + b.outstanding_rpcs; + }); + measurements_.erase(measurements_.begin(), iter); + return num_measurements > 0 ? last_minute_sum / num_measurements : 0; } - Status LastRefreshStatus() const { + StatusOr instant_outstanding_rpcs() { std::unique_lock lk(mu_); - return last_refresh_status_; + if (!last_refresh_status_.ok()) return last_refresh_status_; + return outstanding_rpcs_; } - void SetLastRefreshStatus(Status s) { + ChannelUsage& set_last_refresh_status(Status s) { std::unique_lock lk(mu_); last_refresh_status_ = std::move(s); + return *this; } - ChannelUsageWrapper& set_channel(std::shared_ptr channel) { - stub_ = std::move(channel); + // A channel can only be set if the current value is nullptr. This mutator + // exists only so that we can obtain a std::weak_ptr to the ChannelUsage + // object that will eventually hold the channel. + ChannelUsage& set_channel(std::shared_ptr channel) { + std::unique_lock lk(mu_); + if (!stub_) stub_ = std::move(channel); return *this; } - std::weak_ptr> MakeWeak() { - return this->shared_from_this(); - } + std::weak_ptr> MakeWeak() { return this->shared_from_this(); } std::shared_ptr AcquireStub() { std::unique_lock lk(mu_); ++outstanding_rpcs_; + auto time = clock_->Now(); + measurements_.emplace_back(outstanding_rpcs_, time); return stub_; } void ReleaseStub() { std::unique_lock lk(mu_); --outstanding_rpcs_; + measurements_.emplace_back(outstanding_rpcs_, clock_->Now()); } private: mutable std::mutex mu_; std::shared_ptr stub_; + std::shared_ptr clock_; int outstanding_rpcs_ = 0; Status last_refresh_status_; + struct Measurement { + Measurement(int outstanding_rpcs, std::chrono::steady_clock::time_point p) + : outstanding_rpcs(outstanding_rpcs), timestamp(p) {} + int outstanding_rpcs; + std::chrono::steady_clock::time_point timestamp; + }; + std::deque measurements_; }; template class DynamicChannelPool : public std::enable_shared_from_this> { public: - using StubFactoryFn = - std::function>(std::uint32_t id)>; - struct SizingPolicy { - // To avoid channel churn, the pool will not add or remove channels more - // frequently that this period. - std::chrono::milliseconds pool_resize_cooldown_interval = - std::chrono::milliseconds(60 * 1000); - - struct DiscreteChannels { - int number; - }; - struct PercentageOfPoolSize { - double percentage; - }; - absl::variant - channels_to_add_per_resize = DiscreteChannels{1}; - - // If the average number of outstanding RPCs is below this threshold, - // the pool size will be decreased. - int minimum_average_outstanding_rpcs_per_channel = 20; - // If the average number of outstanding RPCs is above this threshold, - // the pool size will be increased. - int maximum_average_outstanding_rpcs_per_channel = 80; - - // When channels are removed from the pool, we have to wait until all - // outstanding RPCs on that channel are completed before destroying it. - std::chrono::milliseconds remove_channel_polling_interval = - std::chrono::milliseconds(30 * 1000); - - // Limits how large the pool can grow. - std::size_t maximum_channel_pool_size; - - // This is set to the initial channel pool size. - std::size_t minimum_channel_pool_size; - }; + using StubFactoryFn = std::function>( + std::uint32_t id, bool prime_channel)>; static std::shared_ptr Create( CompletionQueue cq, - std::vector>> initial_channels, + std::vector>> initial_channels, std::shared_ptr refresh_state, - StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy = {}) { + StubFactoryFn stub_factory_fn, + bigtable::experimental::DynamicChannelPoolSizingPolicy sizing_policy = + {}) { std::cout << __PRETTY_FUNCTION__ << ": enter" << std::endl; auto pool = std::shared_ptr(new DynamicChannelPool( std::move(cq), std::move(initial_channels), std::move(refresh_state), @@ -143,8 +140,8 @@ class DynamicChannelPool // performing this work. We might as well cancel those timer futures now. refresh_state_->timers().CancelAll(); if (remove_channel_poll_timer_.valid()) remove_channel_poll_timer_.cancel(); - if (pool_resize_cooldown_timer_ && remove_channel_poll_timer_.valid()) { - pool_resize_cooldown_timer_->cancel(); + if (remove_channel_poll_timer_.valid()) { + pool_resize_cooldown_timer_.cancel(); } } @@ -155,7 +152,18 @@ class DynamicChannelPool return channels_.size(); } - std::shared_ptr> GetChannelRandomTwoLeastUsed() { + // If the pool is not under a pool_resize_cooldown_timer_, call + // CheckPoolChannelHealth. + // Pick two random channels from channels_ and return the channel with the + // lower number of outstanding_rpcs. This is the "quick" path. + // If one or both of the random channels have been marked unhealthy after a + // refresh, continue choosing random channels to find a pair of healthy + // channels to compare. Any channels found to be unhealthy are moved from + // channels_ to draining_channels_ and call ScheduleRemoveChannel. + // If there is only one health channel in the pool, use it. + // If there are no healthy channels in channels_, create a new channel and + // use that one. Also call ScheduleAddChannel to replenish channels_. + std::shared_ptr> GetChannelRandomTwoLeastUsed() { std::unique_lock lk(mu_); std::cout << __PRETTY_FUNCTION__ << ": channels_size()=" << channels_.size() << std::endl; @@ -164,11 +172,10 @@ class DynamicChannelPool // std::endl; // } - if (!pool_resize_cooldown_timer_) { + if (!pool_resize_cooldown_timer_.valid()) { CheckPoolChannelHealth(lk); - } else if (pool_resize_cooldown_timer_->is_ready()) { - (void)pool_resize_cooldown_timer_->get(); - pool_resize_cooldown_timer_ = absl::nullopt; + } else if (pool_resize_cooldown_timer_.is_ready()) { + (void)pool_resize_cooldown_timer_.get(); CheckPoolChannelHealth(lk); } @@ -176,33 +183,33 @@ class DynamicChannelPool // CheckPoolChannelHealth" // << std::endl; std::vector< - typename std::vector>>::iterator> + typename std::vector>>::iterator> iterators(channels_.size()); std::iota(iterators.begin(), iterators.end(), channels_.begin()); std::shuffle(iterators.begin(), iterators.end(), rng_); // typename std::vector>>::iterator>::iterator + // std::shared_ptr>>::iterator>::iterator auto shuffle_iter = iterators.begin(); // typename - // std::vector>>::iterator + // std::vector>>::iterator auto channel_1_iter = *shuffle_iter; - std::shared_ptr> c = *channel_1_iter; + std::shared_ptr> c = *channel_1_iter; // std::cout << __PRETTY_FUNCTION__ // << ": check channel 1=" << c.get() << std::endl; auto channel_1_rpcs = shuffle_iter != iterators.end() - ? (*channel_1_iter)->outstanding_rpcs() + ? (*channel_1_iter)->average_outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; ++shuffle_iter; // typename - // std::vector>>::iterator + // std::vector>>::iterator auto channel_2_iter = *shuffle_iter; // We want to snapshot these outstanding_rpcs values. // std::cout << __PRETTY_FUNCTION__ // << ": check channel 2=" << (channel_2)->get() << std::endl; auto channel_2_rpcs = shuffle_iter != iterators.end() - ? (*channel_2_iter)->outstanding_rpcs() + ? (*channel_2_iter)->average_outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; // This is the ideal (and most common ) case so we try it first. // std::cout << __PRETTY_FUNCTION__ << ": compare channel rpcs" << @@ -217,7 +224,7 @@ class DynamicChannelPool // We have one or more bad channels. Spending time finding a good channel // will be cheaper than trying to use a bad channel in the long run. std::vector< - typename std::vector>>::iterator> + typename std::vector>>::iterator> bad_channel_iters; while (!channel_1_rpcs.ok() && shuffle_iter != iterators.end()) { @@ -225,7 +232,7 @@ class DynamicChannelPool ++shuffle_iter; channel_1_iter = *shuffle_iter; channel_1_rpcs = shuffle_iter != iterators.end() - ? (*channel_1_iter)->outstanding_rpcs() + ? (*channel_1_iter)->average_outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; } @@ -234,7 +241,7 @@ class DynamicChannelPool ++shuffle_iter; channel_2_iter = *shuffle_iter; channel_2_rpcs = shuffle_iter != iterators.end() - ? (*channel_2_iter)->outstanding_rpcs() + ? (*channel_2_iter)->average_outstanding_rpcs() : Status{StatusCode::kNotFound, ""}; } @@ -248,24 +255,34 @@ class DynamicChannelPool } if (channel_1_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_1 ok" << std::endl; + // Schedule repopulating the pool. + ScheduleAddChannel(lk); return *channel_1_iter; } if (channel_2_rpcs.ok()) { std::cout << __PRETTY_FUNCTION__ << ": ONLY channel_2 ok" << std::endl; + // Schedule repopulating the pool. + ScheduleAddChannel(lk); return *channel_2_iter; } - // TODO(sdhart): we have no usable channels in the entire pool; this is bad. + // We have no usable channels in the entire pool; this is bad. + // Create a channel immediately to unblock application. std::cout << __PRETTY_FUNCTION__ << ": NO USABLE CHANNELS" << std::endl; - return nullptr; + channels_.push_back(stub_factory_fn_(next_channel_id_++, true)); + std::swap(channels_.front(), channels_.back()); + // Schedule repopulating the pool. + ScheduleAddChannel(lk); + return channels_.front(); } private: - DynamicChannelPool(CompletionQueue cq, - std::vector>> - initial_wrapped_channels, - std::shared_ptr refresh_state, - StubFactoryFn stub_factory_fn, SizingPolicy sizing_policy) + DynamicChannelPool( + CompletionQueue cq, + std::vector>> initial_wrapped_channels, + std::shared_ptr refresh_state, + StubFactoryFn stub_factory_fn, + bigtable::experimental::DynamicChannelPoolSizingPolicy sizing_policy) : cq_(std::move(cq)), refresh_state_(std::move(refresh_state)), stub_factory_fn_(std::move(stub_factory_fn)), @@ -278,17 +295,23 @@ class DynamicChannelPool struct ChannelAddVisitor { std::size_t pool_size; explicit ChannelAddVisitor(std::size_t pool_size) : pool_size(pool_size) {} - std::size_t operator()(typename SizingPolicy::DiscreteChannels const& c) { + std::size_t operator()( + typename bigtable::experimental::DynamicChannelPoolSizingPolicy:: + DiscreteChannels const& c) { return c.number; } std::size_t operator()( - typename SizingPolicy::PercentageOfPoolSize const& c) { + typename bigtable::experimental::DynamicChannelPoolSizingPolicy:: + PercentageOfPoolSize const& c) { return static_cast( std::floor(static_cast(pool_size) * c.percentage)); } }; + // Determines the number of channels to add and reserves the channel ids to + // be used. Lastly, it calls CompletionQueue::RunAsync with a callback that + // executes AddChannel with the reserved ids. void ScheduleAddChannel(std::unique_lock const&) { std::size_t num_channels_to_add; // If we're undersized due to bad channels, get us back to the minimum size. @@ -316,12 +339,13 @@ class DynamicChannelPool }); } + // Creates the new channels using the stub_factory_fn and only after that + // locks the mutex to add the new channels. void AddChannel(std::vector const& new_channel_ids) { - std::vector>> new_stubs; + std::vector>> new_stubs; new_stubs.reserve(new_channel_ids.size()); for (auto const& id : new_channel_ids) { - new_stubs.push_back(stub_factory_fn_(id)); - // std::make_shared>(stub_factory_fn_(id))); + new_stubs.push_back(stub_factory_fn_(id, true)); } std::unique_lock lk(mu_); channels_.insert(channels_.end(), @@ -329,6 +353,9 @@ class DynamicChannelPool std::make_move_iterator(new_stubs.end())); } + // Calls CompletionQueuer::MakeRelativeTimer using + // remove_channel_polling_interval with a callback that executes + // RemoveChannel. void ScheduleRemoveChannel(std::unique_lock const&) { if (remove_channel_poll_timer_.valid()) return; std::cout << __PRETTY_FUNCTION__ << ": set remove_channel_poll_timer" @@ -347,19 +374,24 @@ class DynamicChannelPool }); } + // Locks the mutex, reverse sorts draining_channels_, calling pop_back until + // either draining_channels_ is empty or a channel with outstanding_rpcs is + // encountered. Calls ScheduleRemoveChannel if draining_channels_ is + // non-empty. void RemoveChannel() { std::unique_lock lk(mu_); std::sort(draining_channels_.begin(), draining_channels_.end(), - [](std::shared_ptr> const& a, - std::shared_ptr> b) { - auto rpcs_a = a->outstanding_rpcs(); - auto rpcs_b = b->outstanding_rpcs(); + [](std::shared_ptr> const& a, + std::shared_ptr> b) { + auto rpcs_a = a->instant_outstanding_rpcs(); + auto rpcs_b = b->instant_outstanding_rpcs(); if (!rpcs_a.ok()) return false; if (!rpcs_b.ok()) return true; return *rpcs_a > *rpcs_b; }); while (!draining_channels_.empty()) { - auto outstanding_rpcs = draining_channels_.back()->outstanding_rpcs(); + auto outstanding_rpcs = + draining_channels_.back()->instant_outstanding_rpcs(); if (outstanding_rpcs.ok() && *outstanding_rpcs > 0) { ScheduleRemoveChannel(lk); return; @@ -372,15 +404,16 @@ class DynamicChannelPool // difference between iterators_.capacity and channels_.size } - void EvictBadChannels(std::unique_lock const&, - std::vector>>::iterator>& - bad_channel_iters) { + void EvictBadChannels( + std::unique_lock const&, + std::vector< + typename std::vector>>::iterator>& + bad_channel_iters) { auto back_iter = channels_.rbegin(); for (auto& bad_channel_iter : bad_channel_iters) { bool swapped = false; while (!swapped) { - auto b = (*back_iter)->outstanding_rpcs(); + auto b = (*back_iter)->instant_outstanding_rpcs(); if (b.ok()) { std::swap(*back_iter, *bad_channel_iter); draining_channels_.push_back(std::move(*back_iter)); @@ -399,11 +432,16 @@ class DynamicChannelPool cq_.MakeRelativeTimer(sizing_policy_.pool_resize_cooldown_interval); } + // Computes the average_rpcs_pre_channel across all channels in the pool, + // excluding any channels that are awaiting removal in draining_channels_. + // The computed average is compared to the thresholds in the sizing policy + // and calls either ScheduleRemoveChannel or ScheduleAddChannel as + // appropriate. If either is called the resize_cooldown_timer is also set. void CheckPoolChannelHealth(std::unique_lock const& lk) { int average_rpc_per_channel = std::accumulate(channels_.begin(), channels_.end(), 0, [](int a, auto const& b) { - auto rpcs_b = b->outstanding_rpcs(); + auto rpcs_b = b->average_outstanding_rpcs(); return a + (rpcs_b.ok() ? *rpcs_b : 0); }) / static_cast(channels_.size()); @@ -440,16 +478,16 @@ class DynamicChannelPool google::cloud::internal::DefaultPRNG rng_; std::shared_ptr refresh_state_; StubFactoryFn stub_factory_fn_; - std::vector>> channels_; - SizingPolicy sizing_policy_; - std::vector>> draining_channels_; + std::vector>> channels_; + bigtable::experimental::DynamicChannelPoolSizingPolicy sizing_policy_; + std::vector>> draining_channels_; future remove_channel_poll_timer_; - absl::optional>> - pool_resize_cooldown_timer_ = absl::nullopt; + future> + pool_resize_cooldown_timer_; std::uint32_t next_channel_id_; // std::vector< // typename - // std::vector>>::iterator> + // std::vector>>::iterator> // iterators_; }; diff --git a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc index c43c3c2b677b2..176c40ef78aaa 100644 --- a/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc +++ b/google/cloud/bigtable/internal/dynamic_channel_pool_test.cc @@ -14,6 +14,8 @@ #include "google/cloud/bigtable/internal/dynamic_channel_pool.h" #include "google/cloud/bigtable/testing/mock_bigtable_stub.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/testing_util/fake_clock.h" #include "google/cloud/testing_util/fake_completion_queue_impl.h" #include "google/cloud/testing_util/status_matchers.h" #include @@ -26,6 +28,95 @@ namespace { using ::google::cloud::bigtable::testing::MockBigtableStub; using ::google::cloud::testing_util::FakeCompletionQueueImpl; +using ::google::cloud::testing_util::IsOkAndHolds; +using ::google::cloud::testing_util::StatusIs; +using ::testing::Eq; + +TEST(ChannelUsageTest, SetChannel) { + auto mock = std::make_shared(); + auto channel = std::make_shared>(); + EXPECT_THAT(channel->AcquireStub(), Eq(nullptr)); + channel->set_channel(mock); + EXPECT_THAT(channel->AcquireStub(), Eq(mock)); + auto mock2 = std::make_shared(); + channel->set_channel(mock2); + EXPECT_THAT(channel->AcquireStub(), Eq(mock)); +} + +TEST(ChannelUsageTest, InstantOutstandingRpcs) { + // auto clock = std::make_shared(); + auto mock = std::make_shared(); + auto channel = std::make_shared>(mock); + + auto stub = channel->AcquireStub(); + EXPECT_THAT(stub, Eq(mock)); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(1)); + stub = channel->AcquireStub(); + EXPECT_THAT(stub, Eq(mock)); + stub = channel->AcquireStub(); + EXPECT_THAT(stub, Eq(mock)); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(3)); + channel->ReleaseStub(); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(2)); + channel->ReleaseStub(); + channel->ReleaseStub(); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(0)); +} + +TEST(ChannelUsageTest, SetLastRefreshStatus) { + auto mock = std::make_shared(); + auto channel = std::make_shared>(mock); + Status expected_status = internal::InternalError("uh oh"); + (void)channel->AcquireStub(); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(1)); + channel->set_last_refresh_status(expected_status); + EXPECT_THAT(channel->instant_outstanding_rpcs(), + StatusIs(expected_status.code())); + EXPECT_THAT(channel->average_outstanding_rpcs(), + StatusIs(expected_status.code())); +} + +TEST(ChannelUsageTest, AverageOutstandingRpcs) { + auto clock = std::make_shared(); + auto mock = std::make_shared(); + auto channel = std::make_shared>(mock, clock); + EXPECT_THAT(channel->instant_outstanding_rpcs(), IsOkAndHolds(0)); + + auto start = std::chrono::steady_clock::now(); + clock->SetTime(start); + // measurements: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + for (int i = 0; i < 10; ++i) (void)channel->AcquireStub(); + + clock->AdvanceTime(std::chrono::seconds(20)); + // measurements: 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 + for (int i = 0; i < 10; ++i) (void)channel->AcquireStub(); + + clock->AdvanceTime(std::chrono::seconds(30)); + // measurements: 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 + for (int i = 0; i < 10; ++i) (void)channel->AcquireStub(); + + clock->AdvanceTime(std::chrono::seconds(20)); + // FLOOR(SUM(11...30) / 20) = 20 + EXPECT_THAT(channel->average_outstanding_rpcs(), IsOkAndHolds(20)); + + clock->AdvanceTime(std::chrono::seconds(20)); + // FLOOR(SUM(21...30) / 10) = 25 + EXPECT_THAT(channel->average_outstanding_rpcs(), IsOkAndHolds(25)); + // measurements: 29, 28, 27, 26, 25, 24, 23, 22, 21, 20 + for (int i = 0; i < 10; ++i) channel->ReleaseStub(); + // FLOOR((SUM(21...30) + SUM(20...29)) / 20) = 25 + EXPECT_THAT(channel->average_outstanding_rpcs(), IsOkAndHolds(25)); + + clock->AdvanceTime(std::chrono::seconds(61)); + // All measurements have aged out. + EXPECT_THAT(channel->average_outstanding_rpcs(), IsOkAndHolds(0)); +} + +TEST(ChannelUsageTest, MakeWeak) { + auto channel = std::make_shared>(); + auto weak = channel->MakeWeak(); + EXPECT_THAT(weak.lock(), Eq(channel)); +} TEST(DynamicChannelPoolTest, GetChannelRandomTwoLeastUsed) { auto fake_cq_impl = std::make_shared(); @@ -35,17 +126,17 @@ TEST(DynamicChannelPoolTest, GetChannelRandomTwoLeastUsed) { std::chrono::milliseconds(10)); auto stub_factory_fn = - [](int) -> std::shared_ptr> { + [](int, bool) -> std::shared_ptr> { auto mock = std::make_shared(); - return std::make_shared>(mock); + return std::make_shared>(mock); }; - DynamicChannelPool::SizingPolicy sizing_policy; + bigtable::experimental::DynamicChannelPoolSizingPolicy sizing_policy; - std::vector>> channels(10); + std::vector>> channels(10); int id = 0; std::generate(channels.begin(), channels.end(), - [&]() { return stub_factory_fn(id++); }); + [&]() { return stub_factory_fn(id++, false); }); auto pool = DynamicChannelPool::Create( CompletionQueue(fake_cq_impl), channels, refresh_state, stub_factory_fn, diff --git a/google/cloud/bigtable/options.h b/google/cloud/bigtable/options.h index 90ab14f1e5ec9..11e03700a2a31 100644 --- a/google/cloud/bigtable/options.h +++ b/google/cloud/bigtable/options.h @@ -212,6 +212,44 @@ struct ChannelPoolTypeOption { using Type = ChannelPoolType; }; +struct DynamicChannelPoolSizingPolicy { + // To avoid channel churn, the pool will not add or remove channels more + // frequently that this period. + std::chrono::milliseconds pool_resize_cooldown_interval = + std::chrono::seconds(60); + + struct DiscreteChannels { + int number; + }; + struct PercentageOfPoolSize { + double percentage; + }; + absl::variant + channels_to_add_per_resize = DiscreteChannels{1}; + + // If the average number of outstanding RPCs is below this threshold, + // the pool size will be decreased. + int minimum_average_outstanding_rpcs_per_channel = 1; + // If the average number of outstanding RPCs is above this threshold, + // the pool size will be increased. + int maximum_average_outstanding_rpcs_per_channel = 25; + + // When channels are removed from the pool, we have to wait until all + // outstanding RPCs on that channel are completed before destroying it. + std::chrono::milliseconds remove_channel_polling_interval = + std::chrono::seconds(30); + + // Limits how large the pool can grow. Default is twice the minimum_pool_size. + std::size_t maximum_channel_pool_size; + + // This is set to the value of GrpcNumChannelsOption. + std::size_t minimum_channel_pool_size; +}; + +struct DynamicChannelPoolSizingPolicyOption { + using Type = DynamicChannelPoolSizingPolicy; +}; + } // namespace experimental /// The complete list of options accepted by `bigtable::*Client`