Skip to content

Commit 06fa2ce

Browse files
authored
fix: add cleanup of expirred metadata for ReplicaSets and Deployments (#1621)
Summary: The existing cleanup of resources does not include these kinds. When kelvin encounters them in a cluster, a fatal error is raised and a traceback printed as kelvin crashes (in debug builds). In non-debug builds, the metadata service will just continue to collect these expired resources; never removing them. This patch adds cleanup of these resources to prevent crashing (in debug builds) and to reclaim resources and do housekeeping in non-debug builds. Relevant Issues: #1620 Type of change: /kind bugfix Test Plan: Tests in the metadata_state_test's CleanupExpiredMetadata were enhanced to include tests for these two kinds Signed-off-by: Tim Rupp <caphrim007@gmail.com>
1 parent 2e7b467 commit 06fa2ce

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

src/shared/metadata/metadata_state.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,18 @@ Status K8sMetadataState::CleanupExpiredMetadata(int64_t now, int64_t retention_t
532532
services_by_name_.erase({k8s_object->ns(), k8s_object->name()});
533533
}
534534
break;
535+
case K8sObjectType::kReplicaSet:
536+
if (ReplicaSetIDByName(std::make_pair(k8s_object->ns(), k8s_object->name())) ==
537+
k8s_object->uid()) {
538+
replica_sets_by_name_.erase({k8s_object->ns(), k8s_object->name()});
539+
}
540+
break;
541+
case K8sObjectType::kDeployment:
542+
if (DeploymentIDByName(std::make_pair(k8s_object->ns(), k8s_object->name())) ==
543+
k8s_object->uid()) {
544+
deployments_by_name_.erase({k8s_object->ns(), k8s_object->name()});
545+
}
546+
break;
535547
default:
536548
LOG(DFATAL) << absl::Substitute("Unexpected object type: $0",
537549
static_cast<int>(k8s_object->type()));

src/shared/metadata/metadata_state_test.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,29 @@ constexpr char kReplicaSetUpdatePbTxt[] = R"(
172172
}
173173
)";
174174

175+
constexpr char kReplicaSetUpdatePbTxt00[] = R"(
176+
uid: "rs0_uid"
177+
name: "rs0"
178+
start_timestamp_ns: 101
179+
stop_timestamp_ns: 1
180+
namespace: "ns0"
181+
replicas: 5
182+
fully_labeled_replicas: 5
183+
ready_replicas: 3
184+
available_replicas: 3
185+
observed_generation: 5
186+
requested_replicas: 5
187+
conditions: {
188+
type: "ready"
189+
status: CONDITION_STATUS_TRUE
190+
}
191+
owner_references: {
192+
kind: "Deployment"
193+
name: "deployment1"
194+
uid: "deployment_uid"
195+
}
196+
)";
197+
175198
constexpr char kDeploymentUpdatePbTxt00[] = R"(
176199
uid: "deployment_uid"
177200
name: "deployment1"
@@ -250,6 +273,29 @@ constexpr char kDeploymentUpdatePbTxt02[] = R"(
250273
}
251274
)";
252275

276+
constexpr char kDeploymentUpdatePbTxt03[] = R"(
277+
uid: "deployment_uid"
278+
name: "deployment1"
279+
start_timestamp_ns: 101
280+
stop_timestamp_ns: 1
281+
namespace: "ns0"
282+
replicas: 5
283+
updated_replicas: 5
284+
ready_replicas: 3
285+
available_replicas: 3
286+
unavailable_replicas: 2
287+
observed_generation: 5
288+
requested_replicas: 5
289+
conditions: {
290+
type: 1
291+
status: CONDITION_STATUS_TRUE
292+
}
293+
conditions: {
294+
type: 2
295+
status: CONDITION_STATUS_TRUE
296+
}
297+
)";
298+
253299
TEST(K8sMetadataStateTest, CloneCopiedCIDR) {
254300
K8sMetadataState state;
255301

@@ -591,12 +637,20 @@ TEST(K8sMetadataStateTest, CleanupExpiredMetadata) {
591637
K8sMetadataState::ServiceUpdate service_update;
592638
ASSERT_TRUE(TextFormat::MergeFromString(kRunningServiceUpdatePbTxt, &service_update));
593639

640+
K8sMetadataState::ReplicaSetUpdate replica_set_update;
641+
ASSERT_TRUE(TextFormat::MergeFromString(kReplicaSetUpdatePbTxt00, &replica_set_update));
642+
643+
K8sMetadataState::DeploymentUpdate deployment_update;
644+
ASSERT_TRUE(TextFormat::MergeFromString(kDeploymentUpdatePbTxt03, &deployment_update));
645+
594646
EXPECT_OK(state.HandleNamespaceUpdate(ns_update));
595647
EXPECT_OK(state.HandleContainerUpdate(container_update));
596648
EXPECT_OK(state.HandlePodUpdate(pod_update));
597649
EXPECT_OK(state.HandlePodUpdate(terminated_ip_pod_update));
598650
EXPECT_OK(state.HandlePodUpdate(running_ip_pod_update));
599651
EXPECT_OK(state.HandleServiceUpdate(service_update));
652+
EXPECT_OK(state.HandleReplicaSetUpdate(replica_set_update));
653+
EXPECT_OK(state.HandleDeploymentUpdate(deployment_update));
600654

601655
int64_t current_time = 150ULL;
602656

@@ -625,6 +679,12 @@ TEST(K8sMetadataStateTest, CleanupExpiredMetadata) {
625679

626680
const ContainerInfo* container_info = state.ContainerInfoByID("container0_uid");
627681
ASSERT_NE(container_info, nullptr);
682+
683+
const ReplicaSetInfo* replica_set_info = state.ReplicaSetInfoByID("rs0_uid");
684+
ASSERT_NE(replica_set_info, nullptr);
685+
686+
const DeploymentInfo* deployment_info = state.DeploymentInfoByID("deployment_uid");
687+
ASSERT_NE(deployment_info, nullptr);
628688
}
629689

630690
// Now we give them an expiry time that is short
@@ -652,6 +712,12 @@ TEST(K8sMetadataStateTest, CleanupExpiredMetadata) {
652712

653713
const ContainerInfo* container_info = state.ContainerInfoByID("container0_uid");
654714
ASSERT_EQ(container_info, nullptr);
715+
716+
const ReplicaSetInfo* replica_set_info = state.ReplicaSetInfoByID("rs0_uid");
717+
ASSERT_EQ(replica_set_info, nullptr);
718+
719+
const DeploymentInfo* deployment_info = state.DeploymentInfoByID("deployment_uid");
720+
ASSERT_EQ(deployment_info, nullptr);
655721
}
656722
}
657723

0 commit comments

Comments
 (0)