Skip to content

Commit 904d09e

Browse files
committed
Update notebook analysis statistics and tests: refine calculations and add percentile metrics
1 parent 2233e31 commit 904d09e

File tree

3 files changed

+55
-23
lines changed

3 files changed

+55
-23
lines changed

docs/reference/api-full.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ For a curated, example-driven API guide, see **[api.md](api.md)**.
1010
> - **[CLI Reference](cli.md)** - Command-line interface
1111
> - **[DSL Reference](dsl.md)** - YAML syntax guide
1212
13-
**Generated from source code on:** June 16, 2025 at 20:37 UTC
13+
**Generated from source code on:** June 16, 2025 at 21:12 UTC
1414

1515
**Modules auto-discovered:** 42
1616

ngraph/workflow/notebook_analysis.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,34 @@ def _calculate_statistics(self, capacity_matrix: pd.DataFrame) -> Dict[str, Any]
168168
if len(non_zero_values) == 0:
169169
return {"has_data": False}
170170

171+
# Count all non-self-loop connections for flow analysis
172+
non_self_loop_connections = 0
173+
174+
for source in capacity_matrix.index:
175+
for dest in capacity_matrix.columns:
176+
if source != dest: # Exclude self-loops
177+
non_self_loop_connections += 1
178+
179+
# Calculate meaningful connection density
180+
num_nodes = len(capacity_matrix.index)
181+
total_possible_connections = num_nodes * (num_nodes - 1) # Exclude self-loops
182+
connection_density = (
183+
non_self_loop_connections / total_possible_connections * 100
184+
if total_possible_connections > 0
185+
else 0
186+
)
187+
171188
return {
172189
"has_data": True,
173-
"total_connections": len(non_zero_values),
174-
"total_possible": capacity_matrix.size,
175-
"connection_density": len(non_zero_values) / capacity_matrix.size * 100,
190+
"total_connections": non_self_loop_connections,
191+
"total_possible": total_possible_connections,
192+
"connection_density": connection_density,
176193
"capacity_min": float(non_zero_values.min()),
177194
"capacity_max": float(non_zero_values.max()),
178195
"capacity_mean": float(non_zero_values.mean()),
196+
"capacity_p25": float(pd.Series(non_zero_values).quantile(0.25)),
197+
"capacity_p50": float(pd.Series(non_zero_values).quantile(0.50)),
198+
"capacity_p75": float(pd.Series(non_zero_values).quantile(0.75)),
179199
"num_sources": len(capacity_matrix.index),
180200
"num_destinations": len(capacity_matrix.columns),
181201
}
@@ -207,15 +227,19 @@ def display_analysis(self, analysis: Dict[str, Any], **kwargs) -> None:
207227
return
208228

209229
print("Matrix Statistics:")
210-
print(f" Sources: {stats['num_sources']} nodes")
211-
print(f" Destinations: {stats['num_destinations']} nodes")
230+
print(f" Sources: {stats['num_sources']:,} nodes")
231+
print(f" Destinations: {stats['num_destinations']:,} nodes")
212232
print(
213-
f" Connections: {stats['total_connections']}/{stats['total_possible']} ({stats['connection_density']:.1f}%)"
233+
f" Connections: {stats['total_connections']:,}/{stats['total_possible']:,} ({stats['connection_density']:.1f}%)"
214234
)
215235
print(
216-
f" Capacity range: {stats['capacity_min']:.2f} - {stats['capacity_max']:.2f}"
236+
f" Capacity range: {stats['capacity_min']:,.2f} - {stats['capacity_max']:,.2f}"
217237
)
218-
print(f" Average capacity: {stats['capacity_mean']:.2f}")
238+
print(" Capacity statistics:")
239+
print(f" Mean: {stats['capacity_mean']:,.2f}")
240+
print(f" P25: {stats['capacity_p25']:,.2f}")
241+
print(f" P50 (median): {stats['capacity_p50']:,.2f}")
242+
print(f" P75: {stats['capacity_p75']:,.2f}")
219243

220244
viz_data = analysis["visualization_data"]
221245
if viz_data["has_data"]:
@@ -316,11 +340,11 @@ def display_analysis(self, analysis: Dict[str, Any], **kwargs) -> None:
316340

317341
stats = analysis["statistics"]
318342
print("Flow Statistics:")
319-
print(f" Total flows: {stats['total_flows']}")
320-
print(f" Analysis steps: {stats['unique_steps']}")
321-
print(f" Flow range: {stats['min_flow']:.2f} - {stats['max_flow']:.2f}")
322-
print(f" Average flow: {stats['avg_flow']:.2f}")
323-
print(f" Total capacity: {stats['total_capacity']:.2f}")
343+
print(f" Total flows: {stats['total_flows']:,}")
344+
print(f" Analysis steps: {stats['unique_steps']:,}")
345+
print(f" Flow range: {stats['min_flow']:,.2f} - {stats['max_flow']:,.2f}")
346+
print(f" Average flow: {stats['avg_flow']:,.2f}")
347+
print(f" Total capacity: {stats['total_capacity']:,.2f}")
324348

325349
flow_df = analysis["dataframe"]
326350

@@ -477,7 +501,7 @@ def load_results(json_path: Union[str, Path]) -> Dict[str, Any]:
477501
{
478502
"success": True,
479503
"results": results,
480-
"message": f"Loaded {len(results)} analysis steps from {json_path.name}",
504+
"message": f"Loaded {len(results):,} analysis steps from {json_path.name}",
481505
"step_count": len(results),
482506
"step_names": list(results.keys()),
483507
}
@@ -531,14 +555,14 @@ def display_analysis(self, analysis: Dict[str, Any], **kwargs) -> None:
531555
print("=" * 40)
532556

533557
stats = analysis
534-
print(f"Total Analysis Steps: {stats['total_steps']}")
535-
print(f"Capacity Envelope Steps: {stats['capacity_steps']}")
536-
print(f"Flow Analysis Steps: {stats['flow_steps']}")
537-
print(f"Other Data Steps: {stats['other_steps']}")
558+
print(f"Total Analysis Steps: {stats['total_steps']:,}")
559+
print(f"Capacity Envelope Steps: {stats['capacity_steps']:,}")
560+
print(f"Flow Analysis Steps: {stats['flow_steps']:,}")
561+
print(f"Other Data Steps: {stats['other_steps']:,}")
538562

539563
if stats["total_steps"] > 0:
540564
print(
541-
f"\n✅ Analysis complete. Processed {stats['total_steps']} workflow steps."
565+
f"\n✅ Analysis complete. Processed {stats['total_steps']:,} workflow steps."
542566
)
543567
else:
544568
print("\n❌ No analysis results found.")

tests/workflow/test_notebook_analysis.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,16 @@ def test_calculate_statistics_with_data(self) -> None:
244244
stats = self.analyzer._calculate_statistics(capacity_matrix)
245245

246246
assert stats["has_data"] is True
247-
assert stats["total_connections"] == 4 # Non-zero values
248-
assert stats["total_possible"] == 9 # 3x3 matrix
247+
assert (
248+
stats["total_connections"] == 6
249+
) # All non-self-loop positions: A->B, A->C, B->A, B->C, C->A, C->B
250+
assert stats["total_possible"] == 6 # 3x(3-1) excluding self-loops
249251
assert stats["capacity_min"] == 50.0
250-
assert stats["capacity_max"] == 200.0
252+
assert stats["capacity_max"] == 200.0 # Includes all non-zero values
251253
assert "capacity_mean" in stats
254+
assert "capacity_p25" in stats
255+
assert "capacity_p50" in stats
256+
assert "capacity_p75" in stats
252257
assert stats["num_sources"] == 3
253258
assert stats["num_destinations"] == 3
254259

@@ -312,6 +317,9 @@ def test_display_analysis_success(
312317
"capacity_min": 50.0,
313318
"capacity_max": 200.0,
314319
"capacity_mean": 125.0,
320+
"capacity_p25": 75.0,
321+
"capacity_p50": 125.0,
322+
"capacity_p75": 175.0,
315323
},
316324
"visualization_data": {
317325
"has_data": True,

0 commit comments

Comments
 (0)