Skip to content

Commit ae12228

Browse files
authored
Add user tag to running job metrics (#22072)
* Add user tag to running job metrics * Add changelog
1 parent 45918e7 commit ae12228

File tree

6 files changed

+39
-16
lines changed

6 files changed

+39
-16
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a user tag to running job metrics.

ibm_spectrum_lsf/datadog_checks/ibm_spectrum_lsf/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def bjobs(self) -> tuple[str, str, int]:
5252
return self._run_command(
5353
'bjobs',
5454
'-o',
55-
"jobid stat queue from_host:80 exec_host:80 run_time cpu_used mem time_left swap idle_factor %complete delimiter='|'", # noqa: E501
55+
"jobid stat queue user:80 from_host:80 exec_host:80 run_time cpu_used mem time_left swap idle_factor %complete delimiter='|'", # noqa: E501
5656
"-u",
5757
"all",
5858
)

ibm_spectrum_lsf/datadog_checks/ibm_spectrum_lsf/processors.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ def __init__(self, client: LSFClient, config: InstanceConfig, logger: CheckLoggi
360360
super().__init__(
361361
name='bjobs',
362362
prefix='job',
363-
expected_columns=12,
363+
expected_columns=13,
364364
delimiter='|',
365365
client=client,
366366
config=config,
@@ -377,17 +377,18 @@ def process_metrics(self) -> list[LSFMetric]:
377377
LSFTagMapping('full_job_id', 0, transform_tag),
378378
LSFTagMapping('status', 1, transform_tag),
379379
LSFTagMapping('queue', 2, transform_tag),
380-
LSFTagMapping('from_host', 3, transform_tag),
381-
LSFTagMapping('exec_host', 4, transform_tag),
380+
LSFTagMapping('user', 3, transform_tag),
381+
LSFTagMapping('from_host', 4, transform_tag),
382+
LSFTagMapping('exec_host', 5, transform_tag),
382383
]
383384
metrics = [
384-
LSFMetricMapping('run_time', 5, transform_float),
385-
LSFMetricMapping('cpu_used', 6, transform_float),
386-
LSFMetricMapping('mem', 7, transform_float),
387-
LSFMetricMapping('time_left', 8, transform_time_left),
388-
LSFMetricMapping('swap', 9, transform_float),
389-
LSFMetricMapping('idle_factor', 10, transform_float),
390-
LSFMetricMapping('percent_complete', 11, transform_float),
385+
LSFMetricMapping('run_time', 6, transform_float),
386+
LSFMetricMapping('cpu_used', 7, transform_float),
387+
LSFMetricMapping('mem', 8, transform_float),
388+
LSFMetricMapping('time_left', 9, transform_time_left),
389+
LSFMetricMapping('swap', 10, transform_float),
390+
LSFMetricMapping('idle_factor', 11, transform_float),
391+
LSFMetricMapping('percent_complete', 12, transform_float),
391392
]
392393

393394
return self.parse_table_command(metrics, tags)

ibm_spectrum_lsf/tests/common.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@
656656
'lsf_cluster_name:test-cluster',
657657
'queue:normal',
658658
'status:RUN',
659+
'user:test-user',
659660
],
660661
"val": 42,
661662
},
@@ -669,6 +670,7 @@
669670
'lsf_cluster_name:test-cluster',
670671
'queue:normal',
671672
'status:RUN',
673+
'user:test-user',
672674
],
673675
"val": 0.54,
674676
},
@@ -682,6 +684,7 @@
682684
'lsf_cluster_name:test-cluster',
683685
'queue:normal',
684686
'status:RUN',
687+
'user:test-user',
685688
],
686689
"val": 7,
687690
},
@@ -695,6 +698,7 @@
695698
'lsf_cluster_name:test-cluster',
696699
'queue:normal',
697700
'status:RUN',
701+
'user:test-user',
698702
],
699703
"val": 22,
700704
},
@@ -708,6 +712,7 @@
708712
'lsf_cluster_name:test-cluster',
709713
'queue:normal',
710714
'status:RUN',
715+
'user:test-user',
711716
],
712717
"val": 79,
713718
},
@@ -721,6 +726,7 @@
721726
'lsf_cluster_name:test-cluster',
722727
'queue:normal',
723728
'status:RUN',
729+
'user:test-user',
724730
],
725731
"val": 0,
726732
},
@@ -734,6 +740,7 @@
734740
'lsf_cluster_name:test-cluster',
735741
'queue:normal',
736742
'status:RUN',
743+
'user:test-user',
737744
],
738745
"val": 60,
739746
},
@@ -747,6 +754,7 @@
747754
'lsf_cluster_name:test-cluster',
748755
'queue:normal',
749756
'status:PEND',
757+
'user:test-user',
750758
],
751759
"val": -1,
752760
},
@@ -759,6 +767,7 @@
759767
'lsf_cluster_name:test-cluster',
760768
'queue:normal',
761769
'status:PEND',
770+
'user:test-user',
762771
],
763772
"val": -1,
764773
},
@@ -771,6 +780,7 @@
771780
'lsf_cluster_name:test-cluster',
772781
'queue:normal',
773782
'status:PEND',
783+
'user:test-user',
774784
],
775785
"val": -1,
776786
},
@@ -783,6 +793,7 @@
783793
'lsf_cluster_name:test-cluster',
784794
'queue:normal',
785795
'status:PEND',
796+
'user:test-user',
786797
],
787798
"val": -1,
788799
},
@@ -795,6 +806,7 @@
795806
'lsf_cluster_name:test-cluster',
796807
'queue:normal',
797808
'status:PEND',
809+
'user:test-user',
798810
],
799811
"val": 0,
800812
},
@@ -807,6 +819,7 @@
807819
'lsf_cluster_name:test-cluster',
808820
'queue:normal',
809821
'status:PEND',
822+
'user:test-user',
810823
],
811824
"val": -1,
812825
},
@@ -819,6 +832,7 @@
819832
'lsf_cluster_name:test-cluster',
820833
'queue:normal',
821834
'status:PEND',
835+
'user:test-user',
822836
],
823837
"val": 120,
824838
},
@@ -832,6 +846,7 @@
832846
'lsf_cluster_name:test-cluster',
833847
'queue:normal',
834848
'status:PEND',
849+
'user:test-user',
835850
],
836851
"val": -1,
837852
},
@@ -844,6 +859,7 @@
844859
'lsf_cluster_name:test-cluster',
845860
'queue:normal',
846861
'status:PEND',
862+
'user:test-user',
847863
],
848864
"val": -1,
849865
},
@@ -856,6 +872,7 @@
856872
'lsf_cluster_name:test-cluster',
857873
'queue:normal',
858874
'status:PEND',
875+
'user:test-user',
859876
],
860877
"val": -1,
861878
},
@@ -868,6 +885,7 @@
868885
'lsf_cluster_name:test-cluster',
869886
'queue:normal',
870887
'status:PEND',
888+
'user:test-user',
871889
],
872890
"val": -1,
873891
},
@@ -880,6 +898,7 @@
880898
'lsf_cluster_name:test-cluster',
881899
'queue:normal',
882900
'status:PEND',
901+
'user:test-user',
883902
],
884903
"val": 0,
885904
},
@@ -892,6 +911,7 @@
892911
'lsf_cluster_name:test-cluster',
893912
'queue:normal',
894913
'status:PEND',
914+
'user:test-user',
895915
],
896916
"val": -1,
897917
},
@@ -904,6 +924,7 @@
904924
'lsf_cluster_name:test-cluster',
905925
'queue:normal',
906926
'status:PEND',
927+
'user:test-user',
907928
],
908929
"val": -1,
909930
},
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
JOBID|STAT|QUEUE|FROM_HOST |EXEC_HOST |RUN_TIME|CPU_USED|MEM|TIME_LEFT|SWAP|IDLE_FACTOR|%COMPLETE
2-
173|RUN|normal|ip-11-21-111-198.ec2.internal |ip-11-21-111-198.ec2.internal|79 second(s)|42 second(s)|7 Mbytes|0:1 L|0 Mbytes|0.54|22.00% L
3-
174|PEND|normal|ip-11-21-111-198.ec2.internal | - |0 second(s)|-|-|02|-|-|-
4-
175|PEND|normal|ip-11-21-111-198.ec2.internal | - |0 second(s)|-|-|-|-|-|-
1+
JOBID|STAT|QUEUE|USER|FROM_HOST |EXEC_HOST |RUN_TIME|CPU_USED|MEM|TIME_LEFT|SWAP|IDLE_FACTOR|%COMPLETE
2+
173|RUN|normal|test-user|ip-11-21-111-198.ec2.internal |ip-11-21-111-198.ec2.internal|79 second(s)|42 second(s)|7 Mbytes|0:1 L|0 Mbytes|0.54|22.00% L
3+
174|PEND|normal|test-user|ip-11-21-111-198.ec2.internal | - |0 second(s)|-|-|02|-|-|-
4+
175|PEND|normal|test-user|ip-11-21-111-198.ec2.internal | - |0 second(s)|-|-|-|-|-|-

ibm_spectrum_lsf/tests/test_unit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def test_bjobs_no_output(mock_client, dd_run_check, aggregator, instance, caplog
169169

170170
assert_metrics(ALL_DEFAULT_METRICS, BJOBS_METRICS, aggregator)
171171

172-
assert "Skipping bjobs metrics; unexpected cli command output. Number of columns: 1, expected: 12" in caplog.text
172+
assert "Skipping bjobs metrics; unexpected cli command output. Number of columns: 1, expected: 13" in caplog.text
173173

174174
aggregator.assert_all_metrics_covered()
175175
aggregator.assert_metrics_using_metadata(get_metadata_metrics())

0 commit comments

Comments
 (0)