Skip to content

Commit 5ed6968

Browse files
0xgoudapashagolub
andauthored
[*] deprecate --direct-os-stats flag (#1024)
* deprecate `--direct-os-stats` flag. Direct OS stats are now fetched automatically when pgwatch runs on the same host as PostgreSQL. - `IsDirectlyFetchableMetric()` now auto-detects same-host - Removed `r.Metrics.DirectOSStats` flag checks from reaper - Added `SourceConn.IsClientOnSameHost()` helper method * Improve comment clarity. * Tag `--direct-os-stats` with `hidden` due to deprecation. * Log warning if deprecated `--direct-os-stats` flag is used. * Remove `--direct-os-stats` from docs. * Mention that pgwatch now auto-detects this. * Add integration test for non-direct os stats via postgresql. * Improve comments in `IsClientOnSameHost()` * remove trailing whitespaces * Ensure `jsonSink` is truncated before test. --------- Co-authored-by: Pavlo Golub <pavlo.golub@gmail.com>
1 parent 78bd4bd commit 5ed6968

File tree

11 files changed

+62
-33
lines changed

11 files changed

+62
-33
lines changed

cmd/pgwatch/doc.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,13 @@
2727
// all monitored DBs when missing. Main
2828
// usage - pg_stat_statements
2929
// [$PW_TRY_CREATE_LISTED_EXTS_IF_MISSING]
30-
//
30+
// --create-helpers Create helper database objects from
31+
// metric definitions
32+
// [$PW_CREATE_HELPERS]
3133
// Metrics:
3234
//
3335
// -m, --metrics= File or folder of YAML files with
3436
// metrics definitions [$PW_METRICS]
35-
// --create-helpers Create helper database objects from
36-
// metric definitions
37-
// [$PW_CREATE_HELPERS]
38-
// --direct-os-stats Extract OS related psutil statistics
39-
// not via PL/Python wrappers but
40-
// directly on host
41-
// [$PW_DIRECT_OS_STATS]
4237
// --instance-level-cache-max-seconds= Max allowed staleness for instance
4338
// level metric data shared between DBs
4439
// of an instance. Affects 'continuous'

cmd/pgwatch/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ func main() {
9898
return
9999
}
100100

101+
if opts.Metrics.DirectOSStats {
102+
logger.Warning("--direct-os-stats flag is deprecated, direct OS access is now applied automatically for relevant metrics if on same host.")
103+
}
104+
101105
reaper := reaper.NewReaper(mainCtx, opts)
102106

103107
if _, err = webserver.Init(mainCtx, opts.WebUI, webui.WebUIFs, opts.MetricsReaderWriter,

cmd/pgwatch/main_integration_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ metrics:
5050
test_metric:
5151
sqls:
5252
11: select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, 42 as test_metric
53+
cpu_load:
54+
sqls:
55+
11: select 'metric fetched via PostgreSQL not OS'
5356
`), 0644))
5457

5558
pg, err := initTestContainer()
@@ -66,6 +69,7 @@ metrics:
6669
is_enabled: true
6770
custom_metrics:
6871
test_metric: 60
72+
cpu_load: 60
6973
`), 0644))
7074

7175
// Mock Exit to capture exit code
@@ -127,4 +131,24 @@ metrics:
127131
<-mainCtx.Done() // Wait for main to finish
128132
assert.Equal(t, cmdopts.ExitCodeWebUIError, gotExit, "port should be busy and fail to bind")
129133
})
134+
135+
t.Run("non-direct os stats", func(t *testing.T) {
136+
os.Remove(jsonSink) // truncate output file
137+
os.Args = []string{
138+
"pgwatch",
139+
"--sources", sourcesYaml,
140+
"--metrics", metricsYaml,
141+
"--sink", "jsonfile://" + jsonSink,
142+
"--web-disable",
143+
}
144+
145+
go main()
146+
<-time.After(5 * time.Second) // Wait for main to fetch metric
147+
cancel()
148+
<-mainCtx.Done() // Wait for main to finish
149+
150+
datab, err := os.ReadFile(jsonSink)
151+
assert.NoError(t, err)
152+
assert.Contains(t, string(datab), "metric fetched via PostgreSQL not OS")
153+
})
130154
}

docs/reference/cli_env.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,6 @@ It reads the configuration from the specified sources and metrics, then begins c
6363
Postgres URI, YAML file or folder of YAML files with metrics definitions.
6464
ENV: `$PW_METRICS`
6565

66-
- `--direct-os-stats`
67-
68-
Extract OS related psutil statistics not via PL/Python wrappers but directly on host.
69-
ENV: `$PW_DIRECT_OS_STATS`
70-
7166
- `--instance-level-cache-max-seconds=`
7267

7368
Max allowed staleness for instance level metric data shared between DBs of an instance. Set to 0 to disable (default: 30).

docs/tutorial/preparing_databases.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -190,21 +190,21 @@ pgwatch metric print-init psutil_cpu psutil_mem psutil_disk psutil_disk_io_total
190190
based on the Linux distro / Kernel version used, so small
191191
adjustments might be needed there (e.g. to remove a non-existent
192192
column). Minimum usable Kernel version required is 3.3.
193-
- When running the gatherer locally one can enable the `--direct-os-stats`
194-
parameter to signal that we can fetch the data for the default `psutil*` metrics
195-
directly from OS counters. If direct OS fetching fails though, the
196-
fallback is still to try via PL/Python wrappers.
193+
- When pgwatch runs on the same host as a monitored source,
194+
it auto-detects this and tries to fetch the default `psutil*`
195+
metrics data directly from OS counters. If this direct OS fetch fails,
196+
it falls back to using PL/Python wrappers.
197197
- In rare cases when some "helpers" have been installed, and when
198198
doing a binary PostgreSQL upgrade at some later point in time via
199199
`pg_upgrade`, this could result in error messages
200200
thrown. Then just drop those failing helpers on the "to be
201201
upgraded" cluster and re-create them after the upgrade process.
202202

203203
!!! Info
204-
If despite all the warnings you still want to run the pgwatch
205-
with a sufficient user account (e.g. a superuser) you can also
204+
If, despite all the warnings, you still want to run pgwatch
205+
with a sufficient user account (e.g., a superuser), you can
206206
use the `--create-helpers` parameter to automatically create all
207-
needed helper functions in the monitored databases.
207+
needed helper functions in the monitored databases on startup.
208208

209209
## Different source types explained
210210

internal/db/conn.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,16 @@ func MarshallParamToJSONB(v any) any {
7171
func IsClientOnSameHost(conn PgxIface) (bool, error) {
7272
ctx := context.Background()
7373

74-
// Step 1: Check connection type using SQL
74+
// Option 1: Check if connection type is unix socket
7575
var isUnixSocket bool
7676
err := conn.QueryRow(ctx, "SELECT COALESCE(inet_client_addr(), inet_server_addr()) IS NULL").Scan(&isUnixSocket)
7777
if err != nil || isUnixSocket {
7878
return isUnixSocket, err
7979
}
8080

81-
// Step 2: Retrieve unique cluster identifier
81+
// Option 2: Compare system identifier from file system vs cluster identifier
82+
83+
// Step 1: Retrieve unique cluster identifier
8284
var dataDirectory string
8385
if err := conn.QueryRow(ctx, "SHOW data_directory").Scan(&dataDirectory); err != nil {
8486
return false, err
@@ -89,7 +91,7 @@ func IsClientOnSameHost(conn PgxIface) (bool, error) {
8991
return false, err
9092
}
9193

92-
// Step 3: Compare system identifier from file system
94+
// Step 2: Retrieve system identifier from file system
9395
pgControlFile := filepath.Join(dataDirectory, "global", "pg_control")
9496
file, err := os.Open(pgControlFile)
9597
if err != nil {
@@ -102,5 +104,6 @@ func IsClientOnSameHost(conn PgxIface) (bool, error) {
102104
return false, err
103105
}
104106

107+
// Compare file system identifier and cluster identifier
105108
return fileSystemIdentifier == systemIdentifier, nil
106109
}

internal/metrics/cmdopts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
// CmdOpts specifies metric command-line options
88
type CmdOpts struct {
99
Metrics string `short:"m" long:"metrics" mapstructure:"metrics" description:"Postgres URI or path to YAML file with metrics definitions" env:"PW_METRICS"`
10-
DirectOSStats bool `long:"direct-os-stats" mapstructure:"direct-os-stats" description:"Extract OS related psutil statistics not via PL/Python wrappers but directly on host" env:"PW_DIRECT_OS_STATS"`
10+
DirectOSStats bool `hidden:"true" long:"direct-os-stats" mapstructure:"direct-os-stats" description:"Extract OS related psutil statistics not via PL/Python wrappers but directly on host" env:"PW_DIRECT_OS_STATS"`
1111
InstanceLevelCacheMaxSeconds int64 `long:"instance-level-cache-max-seconds" mapstructure:"instance-level-cache-max-seconds" description:"Max allowed staleness for instance level metric data shared between DBs of an instance. Set to 0 to disable" env:"PW_INSTANCE_LEVEL_CACHE_MAX_SECONDS" default:"30"`
1212
EmergencyPauseTriggerfile string `long:"emergency-pause-triggerfile" mapstructure:"emergency-pause-triggerfile" description:"When the file exists no metrics will be temporarily fetched" env:"PW_EMERGENCY_PAUSE_TRIGGERFILE" default:"/tmp/pgwatch-emergency-pause"`
1313
}

internal/reaper/file.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ const (
3737

3838
var directlyFetchableOSMetrics = []string{metricPsutilCPU, metricPsutilDisk, metricPsutilDiskIoTotal, metricPsutilMem, metricCPULoad}
3939

40-
func IsDirectlyFetchableMetric(metric string) bool {
41-
return slices.Contains(directlyFetchableOSMetrics, metric)
40+
func IsDirectlyFetchableMetric(md *sources.SourceConn, metric string) bool {
41+
return slices.Contains(directlyFetchableOSMetrics, metric) && md.IsClientOnSameHost()
4242
}
4343

4444
func (r *Reaper) FetchStatsDirectlyFromOS(ctx context.Context, md *sources.SourceConn, metricName string) (*metrics.MeasurementEnvelope, error) {

internal/reaper/metric_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,13 @@ var (
2929
func TestReaper_FetchStatsDirectlyFromOS(t *testing.T) {
3030
a := assert.New(t)
3131
r := &Reaper{}
32-
conn, _ := pgxmock.NewPool()
32+
conn, _ := pgxmock.NewPool(pgxmock.QueryMatcherOption(pgxmock.QueryMatcherEqual))
33+
expq := conn.ExpectQuery("SELECT COALESCE(inet_client_addr(), inet_server_addr()) IS NULL")
34+
expq.Times(uint(len(directlyFetchableOSMetrics)))
3335
md := &sources.SourceConn{Conn: conn}
3436
for _, m := range directlyFetchableOSMetrics {
35-
a.True(IsDirectlyFetchableMetric(m), "Expected %s to be directly fetchable", m)
37+
expq.WillReturnRows(pgxmock.NewRows([]string{"is_unix_socket"}).AddRow(true))
38+
a.True(IsDirectlyFetchableMetric(md, m), "Expected %s to be directly fetchable", m)
3639
a.NotPanics(func() {
3740
_, _ = r.FetchStatsDirectlyFromOS(context.Background(), md, m)
3841
})

internal/reaper/reaper.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,13 @@ func (r *Reaper) reapMetricMeasurements(ctx context.Context, md *sources.SourceC
312312

313313
var metricStoreMessages *metrics.MeasurementEnvelope
314314

315-
// 1st try local overrides for some metrics if operating in push mode
316-
if r.Metrics.DirectOSStats && IsDirectlyFetchableMetric(metricName) {
317-
metricStoreMessages, err = r.FetchStatsDirectlyFromOS(ctx, md, metricName)
318-
if err != nil {
315+
t1 := time.Now()
316+
// 1st try local overrides for system metrics if operating on the same host
317+
if IsDirectlyFetchableMetric(md, metricName) {
318+
if metricStoreMessages, err = r.FetchStatsDirectlyFromOS(ctx, md, metricName); err != nil {
319319
l.WithError(err).Errorf("Could not read metric directly from OS")
320320
}
321321
}
322-
t1 := time.Now()
323322
if metricStoreMessages == nil {
324323
metricStoreMessages, err = r.FetchMetric(ctx, md, metricName)
325324
}

0 commit comments

Comments
 (0)