Skip to content

Commit 8cd432a

Browse files
authored
Use pod dns addrs instead of IPs to check status (#1489)
Summary: Using the pod advertised DNS addr instead of the IP address ensures that the SSL cert is valid and that we can scrape with TLS. Relevant Issues: Followup to a breakage caused by #1480 Type of change: /kind bug Test Plan: skaffold the operator to test. --------- Signed-off-by: Vihang Mehta <vihang@pixielabs.ai>
1 parent 19e3280 commit 8cd432a

File tree

6 files changed

+126
-19
lines changed

6 files changed

+126
-19
lines changed

src/operator/controllers/monitor.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ func getNATSState(client HTTPClient, pods *concurrentPodMap) *vizierState {
328328

329329
u := url.URL{
330330
Scheme: "http",
331-
Host: net.JoinHostPort(natsPod.pod.Status.PodIP, "8222"),
331+
Host: net.JoinHostPort(k8s.GetPodAddr(*natsPod.pod), "8222"),
332332
}
333333

334334
resp, err := client.Get(u.String())
@@ -780,7 +780,6 @@ func (m *VizierMonitor) runReconciler() {
780780

781781
// queryPodStatusz returns a pod's self-reported status as served by its statusz endpoint.
782782
func queryPodStatusz(client HTTPClient, pod *v1.Pod) (bool, string) {
783-
podIP := pod.Status.PodIP
784783
// Assume that the statusz endpoint is on the first port in the first container.
785784
var port int32
786785
if len(pod.Spec.Containers) > 0 && len(pod.Spec.Containers[0].Ports) > 0 {
@@ -789,7 +788,7 @@ func queryPodStatusz(client HTTPClient, pod *v1.Pod) (bool, string) {
789788

790789
u := url.URL{
791790
Scheme: "https",
792-
Host: net.JoinHostPort(podIP, fmt.Sprintf("%d", port)),
791+
Host: net.JoinHostPort(k8s.GetPodAddr(*pod), fmt.Sprintf("%d", port)),
793792
Path: "statusz",
794793
}
795794
resp, err := client.Get(u.String())

src/operator/controllers/monitor_test.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,24 @@ func (f *FakeHTTPClient) Get(url string) (*http.Response, error) {
7474
func TestMonitor_queryPodStatusz(t *testing.T) {
7575
httpClient := &FakeHTTPClient{
7676
responses: map[string]string{
77-
"https://127.0.0.1:8080/statusz": "",
78-
"https://127.0.0.3:50100/statusz": "CloudConnectFailed",
77+
"https://127-0-0-1.pl.pod.cluster.local:8080/statusz": "",
78+
"https://127-0-0-3.pl2.pod.cluster.local:50100/statusz": "CloudConnectFailed",
7979
},
8080
}
8181

8282
tests := []struct {
8383
name string
8484
podPort int32
8585
podIP string
86+
podNamespace string
8687
expectedStatus string
8788
expectedOK bool
8889
}{
8990
{
9091
name: "OK",
9192
podPort: 8080,
9293
podIP: "127.0.0.1",
94+
podNamespace: "pl",
9395
expectedStatus: "",
9496
expectedOK: true,
9597
},
@@ -104,6 +106,7 @@ func TestMonitor_queryPodStatusz(t *testing.T) {
104106
name: "unhealthy",
105107
podPort: 50100,
106108
podIP: "127.0.0.3",
109+
podNamespace: "pl2",
107110
expectedStatus: "CloudConnectFailed",
108111
expectedOK: false,
109112
},
@@ -115,6 +118,9 @@ func TestMonitor_queryPodStatusz(t *testing.T) {
115118
Status: v1.PodStatus{
116119
PodIP: test.podIP,
117120
},
121+
ObjectMeta: metav1.ObjectMeta{
122+
Namespace: test.podNamespace,
123+
},
118124
Spec: v1.PodSpec{
119125
Containers: []v1.Container{
120126
{
@@ -169,7 +175,7 @@ func TestMonitor_getCloudConnState(t *testing.T) {
169175
t.Run(test.name, func(t *testing.T) {
170176
httpClient := &FakeHTTPClient{
171177
responses: map[string]string{
172-
"https://127.0.0.1:8080/statusz": test.cloudConnStatusz,
178+
"https://127-0-0-1.pl.pod.cluster.local:8080/statusz": test.cloudConnStatusz,
173179
},
174180
}
175181

@@ -183,6 +189,9 @@ func TestMonitor_getCloudConnState(t *testing.T) {
183189
PodIP: "127.0.0.1",
184190
Phase: test.cloudConnPhase,
185191
},
192+
ObjectMeta: metav1.ObjectMeta{
193+
Namespace: "pl",
194+
},
186195
Spec: v1.PodSpec{
187196
Containers: []v1.Container{
188197
{
@@ -500,9 +509,7 @@ func TestMonitor_repairVizier_PVC(t *testing.T) {
500509

501510
func TestMonitor_getCloudConnState_SeveralCloudConns(t *testing.T) {
502511
httpClient := &FakeHTTPClient{
503-
responses: map[string]string{
504-
"https://127.0.0.1:8080/statusz": "",
505-
},
512+
responses: map[string]string{},
506513
}
507514

508515
pods := &concurrentPodMap{unsafeMap: make(map[string]map[string]*podWrapper)}
@@ -545,8 +552,8 @@ func TestMonitor_getCloudConnState_SeveralCloudConns(t *testing.T) {
545552
func TestMonitor_NATSPods(t *testing.T) {
546553
httpClient := &FakeHTTPClient{
547554
responses: map[string]string{
548-
"http://127.0.0.1:8222": "",
549-
"http://127.0.0.3:8222": "NATS Failed",
555+
"http://127-0-0-1.pl.pod.cluster.local:8222": "",
556+
"http://127-0-0-3.pl.pod.cluster.local:8222": "NATS Failed",
550557
},
551558
}
552559

@@ -624,6 +631,9 @@ func TestMonitor_NATSPods(t *testing.T) {
624631
PodIP: test.natsIP,
625632
Phase: test.natsPhase,
626633
},
634+
ObjectMeta: metav1.ObjectMeta{
635+
Namespace: "pl",
636+
},
627637
},
628638
},
629639
)

src/utils/shared/k8s/BUILD.bazel

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ go_library(
2323
"apply.go",
2424
"auth.go",
2525
"delete.go",
26+
"dns_addr.go",
2627
"kubectl.go",
2728
"logs.go",
2829
"secrets.go",
@@ -64,10 +65,15 @@ go_library(
6465

6566
pl_go_test(
6667
name = "k8s_test",
67-
srcs = ["apply_test.go"],
68+
srcs = [
69+
"apply_test.go",
70+
"dns_addr_test.go",
71+
],
6872
deps = [
6973
":k8s",
7074
"@com_github_stretchr_testify//assert",
7175
"@com_github_stretchr_testify//require",
76+
"@io_k8s_api//core/v1:core",
77+
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
7278
],
7379
)

src/utils/shared/k8s/dns_addr.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright 2018- The Pixie Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
package k8s
20+
21+
import (
22+
"fmt"
23+
"strings"
24+
25+
v1 "k8s.io/api/core/v1"
26+
)
27+
28+
func GetPodAddr(pod v1.Pod) string {
29+
// IPv4
30+
podIP := strings.ReplaceAll(pod.Status.PodIP, ".", "-")
31+
// IPv6
32+
podIP = strings.ReplaceAll(podIP, ":", "-")
33+
34+
return fmt.Sprintf("%s.%s.pod.cluster.local", podIP, pod.Namespace)
35+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2018- The Pixie Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
package k8s_test
20+
21+
import (
22+
"testing"
23+
24+
"github.com/stretchr/testify/assert"
25+
v1 "k8s.io/api/core/v1"
26+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
28+
"px.dev/pixie/src/utils/shared/k8s"
29+
)
30+
31+
func TestGetPodAddr(t *testing.T) {
32+
keyValueStringToMapTests := []struct {
33+
pod v1.Pod
34+
expectedAddr string
35+
}{
36+
{
37+
pod: v1.Pod{
38+
Status: v1.PodStatus{
39+
PodIP: "1.2.3.4",
40+
},
41+
ObjectMeta: metav1.ObjectMeta{
42+
Namespace: "test-ns",
43+
},
44+
},
45+
expectedAddr: "1-2-3-4.test-ns.pod.cluster.local",
46+
},
47+
{
48+
pod: v1.Pod{
49+
Status: v1.PodStatus{
50+
PodIP: "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
51+
},
52+
ObjectMeta: metav1.ObjectMeta{
53+
Namespace: "test-ns",
54+
},
55+
},
56+
expectedAddr: "2001-0db8-85a3-0000-0000-8a2e-0370-7334.test-ns.pod.cluster.local",
57+
},
58+
}
59+
60+
for _, tc := range keyValueStringToMapTests {
61+
assert.Equal(t, tc.expectedAddr, k8s.GetPodAddr(tc.pod))
62+
}
63+
}

src/vizier/services/cloud_connector/vzmetrics/scrape.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ import (
2222
"context"
2323
"crypto/tls"
2424
"crypto/x509"
25-
"fmt"
2625
"io"
2726
"net"
2827
"net/http"
2928
"net/url"
3029
"os"
31-
"strings"
3230
"time"
3331

3432
log "github.com/sirupsen/logrus"
@@ -124,12 +122,8 @@ func (s *scraperImpl) getEndpointsToScrape() ([]endpoint, error) {
124122
continue
125123
}
126124

127-
// IPv4
128-
podIP := strings.ReplaceAll(p.Status.PodIP, ".", "-")
129-
// IPv6
130-
podIP = strings.ReplaceAll(podIP, ":", "-")
131125
// Use k8s pod DNS format instead of just the pod IP, so that the cert is valid.
132-
host := fmt.Sprintf("%s.%s.pod.cluster.local", podIP, s.namespace)
126+
host := k8s.GetPodAddr(p)
133127

134128
u := url.URL{
135129
Scheme: "https",

0 commit comments

Comments
 (0)