Skip to content

Commit e80a2bb

Browse files
committed
Fix malformed schema traversal to report validation errors instead of internal exceptions
1 parent 1b3e90a commit e80a2bb

File tree

4 files changed

+198
-38
lines changed

4 files changed

+198
-38
lines changed

openapi_spec_validator/validation/keywords.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import string
2-
from collections.abc import Iterator
32
from collections.abc import Callable
3+
from collections.abc import Iterator
4+
from collections.abc import Mapping
45
from collections.abc import Sequence
56
from typing import TYPE_CHECKING
67
from typing import Any
78
from typing import cast
89

910
from jsonschema._format import FormatChecker
11+
from jsonschema.exceptions import SchemaError
1012
from jsonschema.exceptions import ValidationError
1113
from jsonschema.protocols import Validator
1214
from jsonschema_path.paths import SchemaPath
@@ -19,6 +21,7 @@
1921
DuplicateOperationIDError,
2022
)
2123
from openapi_spec_validator.validation.exceptions import ExtraParametersError
24+
from openapi_spec_validator.validation.exceptions import OpenAPIValidationError
2225
from openapi_spec_validator.validation.exceptions import (
2326
ParameterDuplicateError,
2427
)
@@ -67,7 +70,11 @@ class SchemaValidator(KeywordValidator):
6770
def __init__(self, registry: "KeywordValidatorRegistry"):
6871
super().__init__(registry)
6972

70-
self.schema_ids_registry: list[int] | None = []
73+
# recursion/visit dedupe registry
74+
self.visited_schema_ids: list[int] | None = []
75+
# meta-schema-check dedupe registry
76+
# to avoid validating the same schema multiple times
77+
self.meta_checked_schema_ids: list[int] | None = []
7178

7279
@property
7380
def default_validator(self) -> ValueValidator:
@@ -95,23 +102,48 @@ def _collect_properties(self, schema: SchemaPath) -> set[str]:
95102
return props
96103

97104
def __call__(
98-
self, schema: SchemaPath, require_properties: bool = True
105+
self,
106+
schema: SchemaPath,
107+
require_properties: bool = True,
108+
meta_checked: bool = False,
99109
) -> Iterator[ValidationError]:
100110
schema_value = schema.read_value()
101-
if not hasattr(schema_value, "__getitem__"):
111+
if not isinstance(schema_value, (Mapping, bool)):
112+
yield OpenAPIValidationError(
113+
f"{schema_value!r} is not of type 'object', 'boolean'"
114+
)
102115
return
103116

104-
assert self.schema_ids_registry is not None
117+
if not meta_checked:
118+
assert self.meta_checked_schema_ids is not None
119+
schema_id = id(schema_value)
120+
if schema_id not in self.meta_checked_schema_ids:
121+
try:
122+
schema_check = getattr(
123+
self.default_validator.value_validator_cls,
124+
"check_schema",
125+
)
126+
schema_check(schema_value)
127+
except (SchemaError, ValidationError) as err:
128+
yield OpenAPIValidationError.create_from(err)
129+
return
130+
self.meta_checked_schema_ids.append(schema_id)
131+
132+
assert self.visited_schema_ids is not None
105133
schema_id = id(schema_value)
106-
if schema_id in self.schema_ids_registry:
134+
if schema_id in self.visited_schema_ids:
107135
return
108-
self.schema_ids_registry.append(schema_id)
136+
self.visited_schema_ids.append(schema_id)
109137

110138
nested_properties = []
111139
if "allOf" in schema:
112140
all_of = schema / "allOf"
113141
for inner_schema in all_of:
114-
yield from self(inner_schema, require_properties=False)
142+
yield from self(
143+
inner_schema,
144+
require_properties=False,
145+
meta_checked=True,
146+
)
115147
nested_properties += list(
116148
self._collect_properties(inner_schema)
117149
)
@@ -122,6 +154,7 @@ def __call__(
122154
yield from self(
123155
inner_schema,
124156
require_properties=False,
157+
meta_checked=True,
125158
)
126159

127160
if "oneOf" in schema:
@@ -130,20 +163,23 @@ def __call__(
130163
yield from self(
131164
inner_schema,
132165
require_properties=False,
166+
meta_checked=True,
133167
)
134168

135169
if "not" in schema:
136170
not_schema = schema / "not"
137171
yield from self(
138172
not_schema,
139173
require_properties=False,
174+
meta_checked=True,
140175
)
141176

142177
if "items" in schema:
143178
array_schema = schema / "items"
144179
yield from self(
145180
array_schema,
146181
require_properties=False,
182+
meta_checked=True,
147183
)
148184

149185
if "properties" in schema:
@@ -152,6 +188,7 @@ def __call__(
152188
yield from self(
153189
prop_schema,
154190
require_properties=False,
191+
meta_checked=True,
155192
)
156193

157194
required = (

tests/bench/runner.py

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,23 @@
1010
import argparse
1111
import cProfile
1212
import gc
13-
from io import StringIO
1413
import json
1514
import pstats
1615
import statistics
1716
import time
17+
from collections.abc import Iterator
1818
from dataclasses import dataclass
1919
from functools import cached_property
20+
from io import StringIO
2021
from pathlib import Path
2122
from typing import Any
22-
from collections.abc import Iterator
2323

2424
from jsonschema_path import SchemaPath
2525
from jsonschema_path.typing import Schema
2626

27+
from openapi_spec_validator import schemas
2728
from openapi_spec_validator import validate
2829
from openapi_spec_validator.readers import read_from_filename
29-
from openapi_spec_validator import schemas
3030
from openapi_spec_validator.shortcuts import get_validator_cls
3131

3232

@@ -128,7 +128,10 @@ def benchmark_spec_file(
128128
spec_size_kb = spec_path.stat().st_size / 1024
129129
spec, _ = read_from_filename(str(spec_path))
130130
return benchmark_spec(
131-
spec, repeats, warmup, no_gc,
131+
spec,
132+
repeats,
133+
warmup,
134+
no_gc,
132135
spec_name=spec_name,
133136
spec_size_kb=spec_size_kb,
134137
)
@@ -148,15 +151,17 @@ def benchmark_spec(
148151
spec_version = get_spec_version(spec)
149152
paths_count = count_paths(spec)
150153
schemas_count = count_schemas(spec)
151-
print(f"⚡ Benchmarking {spec_name} spec (version {spec_version}, {paths_count} paths, {schemas_count} schemas)...")
152-
154+
print(
155+
f"⚡ Benchmarking {spec_name} spec (version {spec_version}, {paths_count} paths, {schemas_count} schemas)..."
156+
)
157+
153158
if no_gc:
154159
gc.disable()
155-
160+
156161
# Warmup
157162
for _ in range(warmup):
158163
run_once(spec)
159-
164+
160165
pr: cProfile.Profile | None = None
161166
if profile:
162167
print("\n🔬 Profiling mode enabled...")
@@ -174,18 +179,18 @@ def benchmark_spec(
174179

175180
# Print profile stats
176181
s = StringIO()
177-
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
182+
ps = pstats.Stats(pr, stream=s).sort_stats("cumulative")
178183
ps.print_stats(30)
179184
print(s.getvalue())
180-
185+
181186
# Save profile data
182187
pr.dump_stats(profile)
183188
print(f"💾 Profile data saved to {profile}")
184189
print(f" View with: python -m pstats {profile}")
185190

186191
if no_gc:
187192
gc.enable()
188-
193+
189194
return BenchResult(
190195
spec_name=spec_name,
191196
spec_version=spec_version,
@@ -197,7 +202,7 @@ def benchmark_spec(
197202
seconds=seconds,
198203
success=True,
199204
)
200-
205+
201206
except Exception as e:
202207
return BenchResult(
203208
spec_name=spec_name,
@@ -228,30 +233,37 @@ def generate_synthetic_spec(
228233
"description": "Success",
229234
"content": {
230235
"application/json": {
231-
"schema": {"$ref": f"#/components/schemas/Schema{i % schemas}"}
236+
"schema": {
237+
"$ref": f"#/components/schemas/Schema{i % schemas}"
238+
}
232239
}
233-
}
240+
},
234241
}
235242
}
236243
}
237244
}
238-
245+
239246
schemas_obj = {}
240247
for i in range(schemas):
241248
schemas_obj[f"Schema{i}"] = {
242249
"type": "object",
243250
"properties": {
244251
"id": {"type": "integer"},
245252
"name": {"type": "string"},
246-
"nested": {"$ref": f"#/components/schemas/Schema{(i + 1) % schemas}"}
247-
}
253+
"nested": {
254+
"$ref": f"#/components/schemas/Schema{(i + 1) % schemas}"
255+
},
256+
},
248257
}
249-
258+
250259
return {
251260
"openapi": version,
252-
"info": {"title": f"Synthetic API ({paths} paths, {schemas} schemas)", "version": "1.0.0"},
261+
"info": {
262+
"title": f"Synthetic API ({paths} paths, {schemas} schemas)",
263+
"version": "1.0.0",
264+
},
253265
"paths": paths_obj,
254-
"components": {"schemas": schemas_obj}
266+
"components": {"schemas": schemas_obj},
255267
}
256268

257269

@@ -274,13 +286,28 @@ def get_specs_iterator(
274286

275287

276288
def main():
277-
parser = argparse.ArgumentParser(description="Benchmark openapi-spec-validator")
278-
parser.add_argument("specs", type=Path, nargs='*', help="File(s) with custom specs to benchmark, otherwise use synthetic specs.")
279-
parser.add_argument("--repeats", type=int, default=1, help="Number of benchmark repeats")
280-
parser.add_argument("--warmup", type=int, default=0, help="Number of warmup runs")
281-
parser.add_argument("--no-gc", action="store_true", help="Disable GC during benchmark")
289+
parser = argparse.ArgumentParser(
290+
description="Benchmark openapi-spec-validator"
291+
)
292+
parser.add_argument(
293+
"specs",
294+
type=Path,
295+
nargs="*",
296+
help="File(s) with custom specs to benchmark, otherwise use synthetic specs.",
297+
)
298+
parser.add_argument(
299+
"--repeats", type=int, default=1, help="Number of benchmark repeats"
300+
)
301+
parser.add_argument(
302+
"--warmup", type=int, default=0, help="Number of warmup runs"
303+
)
304+
parser.add_argument(
305+
"--no-gc", action="store_true", help="Disable GC during benchmark"
306+
)
282307
parser.add_argument("--output", type=str, help="Output JSON file path")
283-
parser.add_argument("--profile", type=str, help="Profile file path (cProfile)")
308+
parser.add_argument(
309+
"--profile", type=str, help="Profile file path (cProfile)"
310+
)
284311
args = parser.parse_args()
285312

286313
results: list[dict[str, Any]] = []
@@ -291,7 +318,9 @@ def main():
291318

292319
# Benchmark custom specs
293320
if args.specs:
294-
print(f"\n🔍 Testing with custom specs {[str(spec) for spec in args.specs]}")
321+
print(
322+
f"\n🔍 Testing with custom specs {[str(spec) for spec in args.specs]}"
323+
)
295324
spec_iterator = get_specs_iterator(args.specs)
296325

297326
# Synthetic specs for stress testing
@@ -318,7 +347,9 @@ def main():
318347
)
319348
results.append(result.as_dict())
320349
if result.success:
321-
print(f" ✅ {result.median_s:.4f}s, {result.validations_per_sec:.2f} val/s")
350+
print(
351+
f" ✅ {result.median_s:.4f}s, {result.validations_per_sec:.2f} val/s"
352+
)
322353
else:
323354
print(f" ❌ Error: {result.error}")
324355

@@ -331,10 +362,10 @@ def main():
331362
},
332363
"results": results,
333364
}
334-
365+
335366
print(f"\n📊 Summary: {len(results)} specs benchmarked")
336367
print(json.dumps(output, indent=2))
337-
368+
338369
if args.output:
339370
with open(args.output, "w") as f:
340371
json.dump(output, f, indent=2)

tests/integration/test_main.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,34 @@ def test_schema_stdin(capsys):
193193
assert "stdin: OK\n" in out
194194

195195

196+
def test_malformed_schema_stdin(capsys):
197+
"""Malformed schema from STDIN reports validation error."""
198+
spec_io = StringIO(
199+
"""
200+
openapi: 3.1.0
201+
info:
202+
version: "1"
203+
title: "Title"
204+
components:
205+
schemas:
206+
Component:
207+
type: object
208+
properties:
209+
name: string
210+
"""
211+
)
212+
213+
testargs = ["--schema", "3.1.0", "-"]
214+
with mock.patch("openapi_spec_validator.__main__.sys.stdin", spec_io):
215+
with pytest.raises(SystemExit):
216+
main(testargs)
217+
218+
out, err = capsys.readouterr()
219+
assert not err
220+
assert "stdin: Validation Error:" in out
221+
assert "stdin: OK" not in out
222+
223+
196224
def test_version(capsys):
197225
"""Test --version flag outputs correct version."""
198226
testargs = ["--version"]

0 commit comments

Comments
 (0)