diff --git a/camelot/parsers/hybrid.py b/camelot/parsers/hybrid.py index bfa4cf38..5bf70c0e 100644 --- a/camelot/parsers/hybrid.py +++ b/camelot/parsers/hybrid.py @@ -247,6 +247,7 @@ def _merge_bbox_analysis(self, lattice_bbox, network_bbox): self.table_bbox_parses[augmented_bbox] = self.network_parser def _generate_table_bbox(self): + self.table_bbox_parses = {} # Collect bboxes from both parsers self.lattice_parser._generate_table_bbox() _lattice_bboxes = sorted( diff --git a/tests/files/hybrid_multipage.pdf b/tests/files/hybrid_multipage.pdf new file mode 100644 index 00000000..3d0f256d Binary files /dev/null and b/tests/files/hybrid_multipage.pdf differ diff --git a/tests/test_hybrid.py b/tests/test_hybrid.py index a33f71ea..4a398737 100644 --- a/tests/test_hybrid.py +++ b/tests/test_hybrid.py @@ -142,3 +142,10 @@ def test_hybrid_keyerror(testdir): filename = os.path.join(testdir, "tabula/schools.pdf") tables = camelot.read_pdf(filename, flavor="hybrid", pages="4-5") assert len(tables) >= 1 + + +def test_hybrid_multipage(testdir): + """Hybrid parser should clear table bboxes on each new page.""" + filename = os.path.join(testdir, "hybrid_multipage.pdf") + tables = camelot.read_pdf(filename, flavor="hybrid", pages="1-2") + assert len(tables) == 2 # not 3