Skip to content

Commit e19ce85

Browse files
author
Gin
committed
add onnx model script
1 parent a5ebdb7 commit e19ce85

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Remove privacy-sensitive metadata (file paths in stack trace) from ONNX models.
4+
5+
Usage:
6+
python fix_onnx_model_metadata.py <input_onnx_file> [output_onnx_file]
7+
8+
If output file is not specified, it will overwrite the input file.
9+
"""
10+
11+
import onnx
12+
import sys
13+
import os
14+
15+
16+
def clean_onnx_metadata(model_path, output_path=None):
17+
"""
18+
Remove privacy-sensitive metadata from an ONNX model.
19+
20+
Args:
21+
model_path: Path to input ONNX model
22+
output_path: Path to save cleaned model (defaults to overwriting input)
23+
"""
24+
print(f"Loading ONNX model from: {model_path}")
25+
model = onnx.load(model_path)
26+
27+
# What we don't clean currently:
28+
# model.doc_string: top-level doc string
29+
# model.metadata_props: top-level metadata
30+
# model.graph.doc_string: graph doc string
31+
# model.graph.node[...].docstring: node's doc string
32+
# model.graph.initializer[...].metadata_props: intializer (tensor)'s metadata
33+
# model.functions[...].doc_string: function doc string
34+
35+
# Track what we're cleaning
36+
changes_made = []
37+
removed_values = set()
38+
39+
# Clean metadata_props from nodes (this is where PyTorch stores stack traces)
40+
node_metadata_cleaned = 0
41+
for node in model.graph.node:
42+
if not node.metadata_props:
43+
continue
44+
# Remove metadata containing paths or stack traces
45+
kept_props = []
46+
for prop in node.metadata_props:
47+
# Remove stack traces and name scopes that contain file paths
48+
if prop.key in ['pkg.torch.onnx.stack_trace', 'pkg.torch.onnx.name_scopes'] and ('/' in prop.value or '\\' in prop.value):
49+
removed_values.add(prop.value)
50+
node_metadata_cleaned += 1
51+
continue # Skip this prop
52+
kept_props.append(prop)
53+
54+
# Clear and re-add only kept props:
55+
# We can't directly assign value to node.metadat_props, otherwise will have following error:
56+
# "Assignment not allowed to map or repeated field "metadata_props" in protocol message object."
57+
while len(node.metadata_props) > 0:
58+
node.metadata_props.pop()
59+
for prop in kept_props:
60+
node.metadata_props.append(prop)
61+
62+
if node_metadata_cleaned > 0:
63+
print(f" Removed {node_metadata_cleaned} metadata_props entries from nodes")
64+
changes_made.append(f"{node_metadata_cleaned} node metadata_props")
65+
66+
# Verify the model is still valid
67+
print(" Verifying cleaned model...")
68+
try:
69+
onnx.checker.check_model(model)
70+
print(" ✓ Model validation passed")
71+
except Exception as e:
72+
print(f" ✗ Model validation failed: {e}")
73+
print(" Aborting - model may be corrupted")
74+
return False
75+
76+
# Save the cleaned model
77+
if output_path is None:
78+
output_path = model_path
79+
80+
print(f"Saving cleaned model to: {output_path}")
81+
onnx.save(model, output_path)
82+
83+
# Report what was cleaned
84+
if changes_made:
85+
print(f"\nCleaned metadata:")
86+
for change in changes_made:
87+
print(f" - {change}")
88+
89+
print(f"Removed {len(removed_values)} values")
90+
else:
91+
print("\nNo privacy-sensitive filepath metadata found")
92+
93+
# Verify the paths are gone
94+
print("\nVerifying paths are removed...")
95+
with open(output_path, 'rb') as f:
96+
content = f.read()
97+
# Check for common path indicators
98+
if b'/Users/' in content or b'C:\\' in content or b'/home/' in content:
99+
print(" ⚠ Warning: Some path-like strings may still be present")
100+
print(" (This could be in tensor names or other non-metadata)")
101+
else:
102+
print(" ✓ No obvious file paths detected in binary")
103+
104+
print(f"\n✓ Successfully cleaned ONNX model!")
105+
return True
106+
107+
108+
def main():
109+
if len(sys.argv) < 2:
110+
print(__doc__)
111+
print("\nError: No input file specified")
112+
sys.exit(1)
113+
114+
input_path = sys.argv[1]
115+
output_path = sys.argv[2] if len(sys.argv) > 2 else None
116+
117+
if not os.path.exists(input_path):
118+
print(f"Error: Input file not found: {input_path}")
119+
sys.exit(1)
120+
121+
success = clean_onnx_metadata(input_path, output_path)
122+
sys.exit(0 if success else 1)
123+
124+
125+
if __name__ == "__main__":
126+
main()

0 commit comments

Comments
 (0)