Skip to content

Commit 3fc9279

Browse files
talmoclaude
andauthored
fix(io): raise helpful error when SLP metadata JSON attribute is missing (#429) (#446)
`read_metadata` previously surfaced a raw h5py `KeyError` ("Can't open attribute (can't locate attribute: 'json')") when an `.slp` file's `metadata` group exists but is missing its `json` attribute, giving users no context or recovery path. This has been seen on user-corrupted files (talmolab/sleap#2146). Wrap only the `read_hdf5_attrs` call in `read_metadata` in a `try/except KeyError` and re-raise a `ValueError(...) from e` that names the file, explains it is likely corrupt, and gives an h5py recovery snippet. The decode/`json.loads` lines stay outside the try so a present-but-malformed blob still surfaces as its own decode error rather than being mislabeled as corruption. The blanket `except KeyError` also covers a fully-absent `metadata` group; the "likely corrupt" wording stays accurate for both. The error propagates automatically through `read_labels` via `read_skeletons`, so no extra plumbing is added. Adds four focused tests covering the missing-attribute, missing-group, read_labels-propagation, and malformed-blob-not-remasked cases. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ad6aadb commit 3fc9279

2 files changed

Lines changed: 70 additions & 1 deletion

File tree

sleap_io/io/slp.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1496,8 +1496,28 @@ def read_metadata(labels_path: str, *, _hdf5_file: h5py.File | None = None) -> d
14961496
14971497
Returns:
14981498
A dict containing the metadata from a SLEAP labels file.
1499+
1500+
Raises:
1501+
ValueError: If the ``metadata`` group is missing its ``json`` attribute
1502+
(or the group itself is absent), indicating the file is likely
1503+
corrupt.
14991504
"""
1500-
md = read_hdf5_attrs(labels_path, "metadata", "json", _hdf5_file=_hdf5_file)
1505+
try:
1506+
md = read_hdf5_attrs(labels_path, "metadata", "json", _hdf5_file=_hdf5_file)
1507+
except KeyError as e:
1508+
raise ValueError(
1509+
f"The SLEAP labels file {labels_path!r} is missing its required "
1510+
"metadata JSON blob (the 'metadata' HDF5 group has no readable 'json' "
1511+
"attribute) and is likely corrupt. If you have a working .slp file "
1512+
"with the same skeleton, you can copy the attribute into a BACKUP "
1513+
"COPY of the corrupt file with h5py (back up first):\n"
1514+
" import h5py\n"
1515+
" with h5py.File('working.slp', 'r') as src, "
1516+
"h5py.File('corrupt_copy.slp', 'a') as dst:\n"
1517+
" dst['metadata'].attrs['json'] = src['metadata'].attrs['json']\n"
1518+
"Only do this if the skeletons match exactly, otherwise the loaded "
1519+
"data will be wrong."
1520+
) from e
15011521
if isinstance(md, bytes):
15021522
md = md.decode()
15031523
elif isinstance(md, np.ndarray):

tests/io/test_slp.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5414,6 +5414,55 @@ def test_read_metadata_ndarray(tmp_path):
54145414
assert result == metadata
54155415

54165416

5417+
def test_read_metadata_missing_json_attr_raises_valueerror(tmp_path):
5418+
"""Test read_metadata raises ValueError when 'json' attr is missing."""
5419+
path = str(tmp_path / "missing_json.slp")
5420+
with h5py.File(path, "w") as f:
5421+
grp = f.require_group("metadata")
5422+
# Group exists but has no 'json' attribute (only an unrelated attr).
5423+
grp.attrs["format_id"] = 1.1
5424+
5425+
with pytest.raises(ValueError, match="missing its required metadata JSON blob"):
5426+
read_metadata(path)
5427+
5428+
5429+
def test_read_labels_missing_json_attr_raises_valueerror(slp_minimal, tmp_path):
5430+
"""Test ValueError propagates to read_labels when 'json' attr is missing."""
5431+
# Start from a real .slp file so all other datasets are present, then
5432+
# corrupt a copy by deleting only the 'metadata/json' attribute.
5433+
path = str(tmp_path / "corrupt.slp")
5434+
shutil.copy(slp_minimal, path)
5435+
with h5py.File(path, "a") as f:
5436+
del f["metadata"].attrs["json"]
5437+
5438+
with pytest.raises(ValueError, match="likely corrupt"):
5439+
read_labels(path)
5440+
5441+
5442+
def test_read_metadata_missing_metadata_group_raises_valueerror(tmp_path):
5443+
"""Test read_metadata raises ValueError when 'metadata' group is absent."""
5444+
path = str(tmp_path / "no_metadata_group.slp")
5445+
with h5py.File(path, "w") as f:
5446+
# No 'metadata' group at all.
5447+
f.require_group("other")
5448+
5449+
with pytest.raises(ValueError, match="likely corrupt"):
5450+
read_metadata(path)
5451+
5452+
5453+
def test_read_metadata_malformed_json_not_remasked(tmp_path):
5454+
"""Test malformed (present) JSON surfaces as a decode error, not corruption."""
5455+
path = str(tmp_path / "malformed_json.slp")
5456+
with h5py.File(path, "w") as f:
5457+
grp = f.require_group("metadata")
5458+
grp.attrs["json"] = b"{not valid json"
5459+
5460+
# The corruption message must NOT be raised here; json parsing should fail.
5461+
with pytest.raises(ValueError) as exc_info:
5462+
read_metadata(path)
5463+
assert "missing its required metadata JSON blob" not in str(exc_info.value)
5464+
5465+
54175466
def test_read_h5wasm_instances_float64_indices(tmp_path):
54185467
"""Test that float64 index values from h5wasm are handled in read_instances."""
54195468
path = str(tmp_path / "instances.h5")

0 commit comments

Comments
 (0)