Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ python/benchmark/*.html
.env
.vscode
env
# pixi environments
.pixi/*
!.pixi/config.toml
3 changes: 3 additions & 0 deletions python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ build
.*.swp
.*.swo
*/.ipynb_checkpoints
# pixi environments
.pixi/*
!.pixi/config.toml
31 changes: 23 additions & 8 deletions python/tests/test_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,12 @@ def test_format_too_old_raised_for_hdf5(self):

with pytest.raises(
exceptions.FileFormatError,
match="appears to be in HDF5 format",
match=f"{filename}.*appears to be in HDF5 format",
):
tskit.load(path)
with pytest.raises(
exceptions.FileFormatError,
match="appears to be in HDF5 format",
match=f"{filename}.*appears to be in HDF5 format",
):
tskit.TableCollection.load(path)

Expand All @@ -284,9 +284,15 @@ class TestErrors(TestFileFormat):
def test_tszip_file(self):
ts = msprime.simulate(5)
tszip.compress(ts, self.temp_file)
with pytest.raises(tskit.FileFormatError, match="appears to be in zip format"):
with pytest.raises(
tskit.FileFormatError,
match=f"{self.temp_file}.*appears to be in zip format",
):
tskit.load(self.temp_file)
with pytest.raises(tskit.FileFormatError, match="appears to be in zip format"):
with pytest.raises(
tskit.FileFormatError,
match=f"{self.temp_file}.*appears to be in zip format",
):
tskit.TableCollection.load(self.temp_file)


Expand Down Expand Up @@ -897,7 +903,10 @@ def test_format_name_error(self):
data = dict(store)
data["format/name"] = np.array(bytearray(bad_name.encode()), dtype=np.int8)
kastore.dump(data, self.temp_file)
with pytest.raises(exceptions.FileFormatError):
with pytest.raises(
exceptions.FileFormatError,
match=f"While trying to load {self.temp_file}",
):
tskit.load(self.temp_file)

def test_load_bad_formats(self):
Expand All @@ -908,12 +917,16 @@ def test_load_bad_formats(self):
# Now some ascii text
with open(self.temp_file, "wb") as f:
f.write(b"Some ASCII text")
with pytest.raises(exceptions.FileFormatError):
with pytest.raises(
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
):
tskit.load(self.temp_file)
# Now write 8k of random bytes
with open(self.temp_file, "wb") as f:
f.write(os.urandom(8192))
with pytest.raises(exceptions.FileFormatError):
with pytest.raises(
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
):
tskit.load(self.temp_file)

def test_load_bad_formats_fileobj(self):
Expand All @@ -925,7 +938,9 @@ def load():
load()
with open(self.temp_file, "wb") as f:
f.write(b"Some ASCII text")
with pytest.raises(exceptions.FileFormatError):
with pytest.raises(
exceptions.FileFormatError, match=f"While trying to load {self.temp_file}"
):
load()


Expand Down
9 changes: 5 additions & 4 deletions python/tskit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ def raise_known_file_format_errors(open_file, existing_exception):
Sniffs the file for pk-zip or hdf header bytes, then raises an exception
if these are detected, if not raises the existing exception.
"""
existing_exception.add_note(f"While trying to load {open_file.name}")
# Check for HDF5 header bytes
try:
open_file.seek(0)
Expand All @@ -887,16 +888,16 @@ def raise_known_file_format_errors(open_file, existing_exception):
raise existing_exception
if header == b"\x89HDF":
raise tskit.FileFormatError(
"The specified file appears to be in HDF5 format. This file "
f"The file {open_file.name} appears to be in HDF5 format. This file "
"may have been generated by msprime < 0.6.0 (June 2018) which "
"can no longer be read directly. Please convert to the new "
"kastore format using the ``tskit upgrade`` command from tskit version<0.6.2"
) from existing_exception
if header[:2] == b"\x50\x4b":
raise tskit.FileFormatError(
"The specified file appears to be in zip format, so may be a compressed "
"tree sequence. Try using the tszip module to decompress this file before "
"loading. `pip install tszip; tsunzip <filename>` or use "
f"The file {open_file.name} appears to be in zip format, so may be a "
"compressed tree sequence. Try using the tszip module to decompress this "
"file before loading. `pip install tszip; tsunzip <filename>` or use "
"`tszip.decompress` in Python code."
) from existing_exception
raise existing_exception
Loading