Source code for sleap.io.format.filehandle
"""
File object which can be passed to adaptors.
We use this since multiple file adaptors may need to open/read the file while
dispatch is determining which adaptor to use, and the `FileHandle` allows us
to keep any results from previous reads.
"""
import os
from typing import Optional
import attr
import h5py
from sleap.util import json_loads
[docs]@attr.s(auto_attribs=True)
class FileHandle(object):
"""Reference to a file; can hold loaded data so it needn't be read twice."""
filename: str
_is_hdf5: bool = False
_is_json: Optional[bool] = None
_is_open: bool = False
_file: object = None
_text: str = None
_json: object = None
def __enter__(self):
self.open()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.close()
[docs] def open(self):
"""Opens the file (if it's not already open)."""
if not os.path.exists(self.filename):
raise FileNotFoundError(f"Could not find {self.filename}")
if self._file is None:
try:
self._file = h5py.File(self.filename, "r")
self._is_hdf5 = True
except OSError as e:
# We get OSError when trying to read non-HDF5 file with h5py
pass
if self._file is None:
self._file = open(self.filename, "r")
self._is_hdf5 = False
[docs] def close(self):
"""Closes the file."""
if self._file is not None:
self._file.close()
@property
def file(self):
"""The raw file object."""
self.open()
return self._file
@property
def text(self):
"""The text from a text file."""
if self._text is None:
self._text = self.file.read()
return self._text
@property
def json(self):
"""The loaded JSON dictionary (for a JSON file)."""
if self._json is None:
self._json = json_loads(self.text)
return self._json
@property
def is_json(self):
"""Whether file is JSON."""
if self._is_json is None:
try:
self.json
self._is_json = True
except Exception as e:
self._is_json = False
return self._is_json
@property
def is_hdf5(self):
"""Whether file is HDF5."""
self.open()
return self._is_hdf5
@property
def format_id(self):
"""
Returns an ID from the metadata we store in some HDF5 or JSON formats.
This can be used if we need to distinguish multiple formats with a
common underlying file type, e.g., HDF5-based file formats. See
`LabelsV1Adaptor` for an example (the format id is here used to
determine whether to convert from "gridline" to "midpixel" coordinates).
"""
if self.is_hdf5:
if "metadata" in self.file:
meta_group = self.file.require_group("metadata")
if "format_id" in meta_group.attrs:
return meta_group.attrs["format_id"]
elif self.is_json:
if "format_id" in self.json:
return self.json["format_id"]
return None