Source code for singlejson.fileutils

"""Utils for handling IO and JSON operations."""

from __future__ import annotations

import json
import logging
import os
import shutil
import threading
import warnings
from copy import deepcopy
from dataclasses import dataclass
from json import dumps
from json import load as json_load
from pathlib import Path
from tempfile import NamedTemporaryFile
from types import TracebackType
from typing import Any, TypeAlias

JSONSerializable: TypeAlias = (
    dict[str, "JSONSerializable"]
    | list["JSONSerializable"]
    | str
    | int
    | float
    | bool
    | None
)

PathOrSimilar = str | os.PathLike[str]

logger = logging.getLogger(__name__)


[docs] @dataclass(frozen=True) class JsonSerializationSettings: indent: int = 4 sort_keys: bool = True ensure_ascii: bool = False encoding: str = "utf-8"
[docs] def abs_filename(file: PathOrSimilar) -> Path: """ Return the absolute path of a file as :class:`pathlib.Path`. :param file: File to get the absolute path of :return: Absolute Path of file """ return Path(file).expanduser().resolve()
def _atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None: """ Write text to a path atomically by writing to a temp file and then replacing. Ensures the directory exists. Uses os.replace for atomicity so readers never see a partial write. """ try: if str(path.parent): # Avoid creating '' path.parent.mkdir(parents=True, exist_ok=True) # write to a temp file in same directory then replace with NamedTemporaryFile( "w", encoding=encoding, dir=path.parent, delete=False, suffix=".tmp" ) as tf: tf.write(text) temp_name = tf.name os.replace(temp_name, path) except Exception as e: raise FileAccessError( f"Could not atomically write data to file '{path}'.\nError: {e}" ) from e def _atomic_copy_file(src: Path, dest: Path) -> None: """Copy a file into dest atomically by copying to a temp file and then replacing.""" dest.parent.mkdir(parents=True, exist_ok=True) # create temp file name in destination dir with NamedTemporaryFile("wb", dir=dest.parent, delete=False, suffix=".tmp") as tf: temp_name = tf.name try: shutil.copyfile(src, temp_name) os.replace(temp_name, dest) except Exception as orig_e: # best-effort cleanup try: if os.path.exists(temp_name): os.remove(temp_name) except Exception as e: raise FileAccessError( f"Error while copying '{src}' (default of a file) " f"to '{dest}'. Could not remove temporary file because of {e}!\n" f"Original error: {orig_e}" ) from orig_e raise FileAccessError( f"Error while copying '{src}' (default of a file) to '{dest}'.\n" f"Error: {orig_e}" ) from orig_e
[docs] class FileAccessError(Exception): """Raised when the file cannot be accessed due to permissions or IO errors."""
[docs] class DefaultNotJSONSerializableError(Exception): """Raised when the provided default data is not JSON-serializable."""
[docs] class JSONDeserializationError(Exception): """Raised when JSON data loaded from a file cannot be deserialized."""
[docs] class JSONFile: """A .json file on the disk.""" __path: Path # Full absolute path json: Any """Python representation of the JSON data.""" __default_data: JSONSerializable | None = None #: If not None, default data to use when the file at path is missing or corrupted __default_path: PathOrSimilar | None = None #: If not None, path to JSON file to use as default data settings: JsonSerializationSettings """Serialization settings of this instance""" __auto_save: bool def __init__( self, path: PathOrSimilar, default_data: JSONSerializable = None, default_path: PathOrSimilar | None = None, *, settings: JsonSerializationSettings | None = None, auto_save: bool = True, strict: bool = True, load_file: bool = True, ) -> None: """ Create a new JSONFile instance and load data from disk Specify defaults preferably with default_data or default_path. :param path: path to file (str or PathLike) :param default_data: Default data to use if file at path is nonexistent or corrupted. Keep in mind that None is serializable as JSON "null" - will not throw an error if not specified. :param default_path: **Overrides** default_data if provided. Path to a JSON file to use as default data. :param settings: JsonSerializationSettings object :param auto_save: if True, context manager will save on exit :param strict: if True, will throw error if file cannot be read or if default_data is not JSON-serializable :param load_file: True by default, causes file to be loaded on init. Set to False to suppress loading. :raises ~singlejson.fileutils.FileAccessError: if file cannot be accessed (always) :raises ~singlejson.fileutils.JSONDeserializationError: if strict is True and an error occurs during loading :raises ~singlejson.fileutils.DefaultNotJSONSerializableError: if strict is True and default_data is not JSON-serializable """ self.__path = abs_filename(path) self.settings = settings or DEFAULT_SERIALIZATION_SETTINGS self.__auto_save = auto_save # Per-instance reentrant lock to make file operations thread-safe self._lock = threading.RLock() if default_path: if strict: # Ensure default file can be loaded with json.loads path = Path(default_path) if path.exists(): # Load from file try: with path.open("r", encoding=self.settings.encoding) as file: json_load(file) # If this works without errors, fine! except (PermissionError, OSError) as e: raise FileAccessError( f"Cannot access default JSON file '{path}': {e}" ) from e except Exception as e: raise DefaultNotJSONSerializableError( f"Cannot load default JSON from file '{path}': {e}" ) from e else: raise DefaultNotJSONSerializableError( f"Default JSON file '{path}' does not exist." ) # Whether checked or not, use default_path default initialization method. self.__default_path = default_path else: # Default data and no default_path if strict: try: dumps( default_data, indent=self.settings.indent, sort_keys=self.settings.sort_keys, ensure_ascii=self.settings.ensure_ascii, ) # If this works without errors, fine! except (TypeError, ValueError, json.JSONDecodeError) as e: raise DefaultNotJSONSerializableError( f"default_data for '{self.__path}' isn't JSON-serializable: {e}" ) from e # No matter the validity, set default data. self.__default_data = deepcopy(default_data) # Load from disk (this will create the file if needed and apply defaults) if load_file: self.reload(recover=strict) else: self.json = None def __reinstantiate_default(self, recover: bool) -> None: """ Revert the file to the default. :param recover: If True, recover when an error occurs during default loading. Otherwise will throw DefaultNotJSONSerializableError. """ with self._lock: if self.__default_path: default_path = Path(self.__default_path) if default_path.exists(): # Valid default file, copy _atomic_copy_file(default_path, self.__path) else: # Default file does not exist, create empty file if not recover: raise DefaultNotJSONSerializableError( f"Default JSON file '{default_path}' does not exist!" ) _atomic_write_text( self.__path, "{}", encoding=self.settings.encoding ) else: # Default is dict, write it to file and then open it. # No need to deepcopy again as default is # saved to file and then re-constructed try: text = dumps( self.__default_data, indent=self.settings.indent, sort_keys=self.settings.sort_keys, ensure_ascii=self.settings.ensure_ascii, ) _atomic_write_text( self.__path, text, encoding=self.settings.encoding ) except (TypeError, ValueError, json.JSONDecodeError) as e: if not recover: raise DefaultNotJSONSerializableError( f"Default for file '{self.__path}' is not serializable!" f"\nError: {e}" ) from e _atomic_write_text( self.__path, "{}", encoding=self.settings.encoding ) # Continue to load file as normal @property def path(self) -> Path: """ Return the absolute path of the file. :return: absolute path """ return self.__path
[docs] def reload(self, *, recover: bool = True) -> None: """ Reload from disk, recovering to default on invalid JSON. Always raises FileAccessError on permission issues. :param recover: If True, recover when an error occurs during default loading. If False {} will be used if default loading fails. :type recover: bool :raises ~singlejson.fileutils.FileAccessError: if file cannot be accessed (always) :raises ~singlejson.fileutils.DefaultNotJSONSerializableError: if recover is False and JSON is invalid """ # Use the per-instance lock to guard load/recovery operations with self._lock: # 1: See if file exists if not self.__path.exists(): # Create file with no data self.__reinstantiate_default(recover) # 2: File now surely exists try: with self.__path.open("r", encoding=self.settings.encoding) as file: self.json = json_load(file) except (PermissionError, OSError) as e: raise FileAccessError(f"Cannot read file '{self.__path}': {e}") from e except json.JSONDecodeError as e: # Loading failed. Recover to default if allowed. if not recover: # If a default_path is configured, # the error likely came from copying an invalid default file. if self.__default_path: raise DefaultNotJSONSerializableError( f"Default JSON file '{self.__default_path}' " f"is not valid JSON: {e}" ) from e raise JSONDeserializationError( f"Cannot read json from file '{self.__path}': {e}" ) from e logger.warning( "Cannot read json from file '%s'. Using default!\n" "Decoding error: %s", self.__path, e, ) self.__reinstantiate_default(recover) # Try loading again (single safe retry to avoid infinite recursion) try: with self.__path.open("r", encoding=self.settings.encoding) as file: self.json = json_load(file) except json.JSONDecodeError as e2: # No need to check for recover=False here, we are already recovering logger.warning( "Recovery also failed for '%s'. Falling back to empty object." "Decoding error: %s", self.__path, e2, ) _atomic_write_text( self.__path, "{}", encoding=self.settings.encoding ) self.json = {}
[docs] def save(self, settings: JsonSerializationSettings | None = None) -> None: """ Save the data to the disk (atomically by default). :param settings: :class:`JsonSerializationSettings` object (``None`` for instance settings) """ settings = settings or self.settings # guard save with the per-instance lock with self._lock: try: # Ensure directory exists self.__path.parent.mkdir(parents=True, exist_ok=True) # Serialize to text then atomically write data_to_save = self.json text = dumps( data_to_save, indent=settings.indent, sort_keys=settings.sort_keys, ensure_ascii=settings.ensure_ascii, ) _atomic_write_text(self.__path, text, encoding=self.settings.encoding) except (PermissionError, OSError) as e: raise FileAccessError(f"Cannot write file '{self.__path}': {e}") from e
[docs] def save_atomic(self, tmp_suffix: str = ".tmp") -> None: """ Deprecated alias for `save()` — saves atomically by default. """ warnings.warn( "JSONFile.save_atomic is deprecated; use JSONFile.save() " "which is atomic by default", DeprecationWarning, stacklevel=2, ) # delegate to new save implementation return self.save()
# Context manager support def __enter__(self) -> JSONFile: """ Enter the context manager. :return: self """ return self def __exit__( self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, ) -> None: """ Exit the context manager and save if auto_save is True and no exception occurred. :param exc_type: exception type :param exc: exception instance :param tb: traceback """ if exc_type is None and self.__auto_save: self.save()
# Default settings instance used by JSONFile.save() when not provided DEFAULT_SERIALIZATION_SETTINGS = JsonSerializationSettings() """Default JsonSerializationSettings used by JSONFile instances with indent=4, sort_keys=True, ensure_ascii=False"""