Source code for singlejson.fileutils

"""Utils for handling IO and JSON operations."""

from __future__ import annotations

import json
import logging
import os
import shutil
import threading
from copy import deepcopy
from dataclasses import dataclass
from json import dumps
from json import load as json_load
from json import loads as json_loads
from pathlib import Path
from tempfile import NamedTemporaryFile
from types import TracebackType
from typing import Any, TypeAlias

JSONFields: TypeAlias = (
    dict[str, "JSONFields"] | list["JSONFields"] | str | int | float | bool | None
)
"""
A type alias for valid JSON fields (inside a json).
"""

SensibleTopLevelJSON: TypeAlias = dict[str, "JSONFields"] | list["JSONFields"] | str
"""
A type alias for valid top level JSON objects (only for use in default_data)
"""
# Note: floats, ints etc. are also valid top level JSON but unsupported with strict=True

PathOrSimilar = str | os.PathLike[str]

logger = logging.getLogger(__name__)


[docs] @dataclass(frozen=True) class JsonSerializationSettings: indent: int = 4 sort_keys: bool = True ensure_ascii: bool = False encoding: str = "utf-8"
[docs] def abs_filename(file: PathOrSimilar) -> Path: """ Return the absolute path of a file as :class:`pathlib.Path`. :param file: File to get the absolute path of :return: Absolute Path of file """ return Path(file).expanduser().resolve()
def _atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None: """ Write text to a path atomically by writing to a temp file and then replacing. Ensures the directory exists. Uses os.replace for atomicity so readers never see a partial write. :param path: Path to write to :param text: Text content to write to the file :param encoding: Encoding to use """ try: if str(path.parent): # Avoid creating '' path.parent.mkdir(parents=True, exist_ok=True) # write to a temp file in same directory then replace with NamedTemporaryFile( "w", encoding=encoding, dir=path.parent, delete=False, suffix=".tmp" ) as tf: tf.write(text) temp_name = tf.name os.replace(temp_name, path) except Exception as e: raise FileAccessError( f"Could not atomically write data to file '{path}'.\nError: {e}" ) from e def _atomic_copy_file(src: Path, dest: Path) -> None: """ Copy a file into dest atomically by copying to a temp file and then replacing. :param src: filepath to copy from :param dest: filepath to copy to """ dest.parent.mkdir(parents=True, exist_ok=True) # create temp file name in destination dir with NamedTemporaryFile("wb", dir=dest.parent, delete=False, suffix=".tmp") as tf: temp_name = tf.name try: shutil.copyfile(src, temp_name) os.replace(temp_name, dest) except Exception as orig_e: # best-effort cleanup try: if os.path.exists(temp_name): os.remove(temp_name) except Exception as e: raise FileAccessError( f"Error while copying '{src}' (default of a file) " f"to '{dest}'. Could not remove temporary file because of {e}!\n" f"Original error: {orig_e}" ) from orig_e raise FileAccessError( f"Error while copying '{src}' (default of a file) to '{dest}'.\n" f"Error: {orig_e}" ) from orig_e
[docs] class FileAccessError(Exception): """Raised when the file cannot be accessed due to permissions or IO errors."""
[docs] class DefaultNotJSONSerializableError(Exception): """Raised when the provided default data is not JSON-serializable."""
[docs] class JSONDeserializationError(Exception): """Raised when JSON data loaded from a file cannot be deserialized."""
[docs] class JSONFile: """A .json file on the disk.""" __path: Path # Full absolute path json: Any """Python representation of the JSON data.""" __default_data: SensibleTopLevelJSON | None = None #: If not None, default data to use when the file at path is missing or corrupted __default_path: PathOrSimilar | None = None #: If not None, path to JSON file to use as default data settings: JsonSerializationSettings """Serialization settings of this instance""" __auto_save: bool __preserve: bool def __init__( self, path: PathOrSimilar, default_data: SensibleTopLevelJSON | None = None, default_path: PathOrSimilar | None = None, *, settings: JsonSerializationSettings | None = None, auto_save: bool = True, preserve: bool | None = None, strict: bool = False, load_file: bool = True, ) -> None: """ Create a new JSONFile instance and load data from disk Specify defaults preferably with default_data or default_path. :param path: path to file (str or PathLike) :param default_data: Default data to use if file at path is nonexistent or corrupted. Keep in mind that None is serializable as JSON "null" - will not throw an error if not specified. :param default_path: **Overrides** default_data if provided. Path to a JSON file to use as default data. :param settings: JsonSerializationSettings object :param auto_save: if True, context manager will save on exit :param preserve: Preserve the existing file by renaming it to <filename>.old.x.ext before writing defaults during recovery. ``None`` uses the instance default (False unless set later). :param strict: if True, will throw error if file cannot be read or if default_data or json in default_path is not JSON-serializable if False, will recover gracefully. Read :ref:`error_handling` for more info :param load_file: True by default, causes file to be loaded on init. Set to False to suppress loading. :raises ~singlejson.fileutils.FileAccessError: if file cannot be accessed (always) :raises ~singlejson.fileutils.JSONDeserializationError: if ``strict`` is True and an error occurs during loading :raises ~singlejson.fileutils.DefaultNotJSONSerializableError: if ``strict`` is True and default_data is not JSON-serializable """ self.__path = abs_filename(path) self.settings = settings or DEFAULT_SERIALIZATION_SETTINGS self.__auto_save = auto_save self.__preserve = bool(preserve) if preserve is not None else False # Per-instance reentrant lock to make file operations thread-safe self._lock = threading.RLock() if default_path: if strict: # Ensure default file can be loaded with json.loads path = Path(default_path) if path.exists(): # Load from file try: with path.open("r", encoding=self.settings.encoding) as file: json_load(file) # If this works without errors, fine! except (PermissionError, OSError) as e: raise FileAccessError( f"Cannot access default JSON file '{path}': {e}" ) from e except Exception as e: raise DefaultNotJSONSerializableError( f"Cannot load default JSON from file '{path}': {e}" ) from e else: raise DefaultNotJSONSerializableError( f"Default JSON file '{path}' does not exist." ) # Whether checked or not, use default_path default initialization method. self.__default_path = abs_filename(default_path) elif default_data is not None: # Default data and no default_path if not isinstance(default_data, (str, list, dict)) and strict: # Only throw error if strict raise DefaultNotJSONSerializableError( f"Default data for '{self.__path}' is not JSON-serializable! \n" "It must be a dict, list or string! \n" f"Got type: {type(default_data)}" ) elif isinstance(default_data, str) and strict: try: json_loads(default_data) self.__default_data = deepcopy(default_data) except (TypeError, ValueError, json.JSONDecodeError) as e: raise DefaultNotJSONSerializableError( f"default_data for '{self.__path}' isn't JSON-serializable!" ) from e elif strict: # default data is list or dict so should be valid unless # it contains non-serializable types inside try: dumps( default_data, indent=self.settings.indent, sort_keys=self.settings.sort_keys, ensure_ascii=self.settings.ensure_ascii, ) # If this works without errors, fine! self.__default_data = deepcopy(default_data) except (TypeError, ValueError, json.JSONDecodeError) as e: raise DefaultNotJSONSerializableError( f"default_data for '{self.__path}' is not " f"JSON-serializable: {e}" ) from e else: # No matter the validity, set default data. self.__default_data = deepcopy(default_data) else: # No default specified, use empty dict self.__default_data = {} # Load from disk (this will create the file if needed and apply defaults) if load_file: self.reload(strict=strict, preserve=preserve) else: self.json = None @property def preserve(self) -> bool: """Whether to keep backups of existing files during recovery.""" return self.__preserve @preserve.setter def preserve(self, value: bool) -> None: self.__preserve = bool(value)
[docs] def restore_default( self, strict: bool = False, preserve: bool | None = None ) -> None: """ Revert the file to the default either by copying the default to the file path or by writing the default data to the file. :param strict: if True, will throw error if file cannot be read or if default_data or json in default_path is not JSON-serializable if False, will recover gracefully. Read :ref:`error_handling` for more info :param preserve: Preserve the existing file by renaming it to <filename>.old.x.ext before writing defaults during recovery. ``None`` uses the instance setting. :raises ~singlejson.fileutils.DefaultNotJSONSerializableError: if default data is not JSON-serializable and ``strict`` is true :raises ~singlejson.fileutils.FileAccessError: if file cannot be accessed (always) """ def _next_preserved_path(path: Path) -> Path: suffix = "".join(path.suffixes) name = path.name stem = name[: -len(suffix)] if suffix else name counter = 1 while True: candidate = path.with_name(f"{stem}.old.{counter}{suffix}") if not candidate.exists(): return candidate counter += 1 actual_preserve = self.__preserve if preserve is None else preserve def _preserve_current_file() -> None: if not actual_preserve or not self.__path.exists(): return try: target = _next_preserved_path(self.__path) self.__path.rename(target) except Exception as e: raise FileAccessError( f"Could not preserve existing file '{self.__path}': {e}" ) from e with self._lock: if self.__default_path: default_path = Path(self.__default_path) if default_path.exists(): # Valid default file, copy if strict: # Validate JSON is valid try: with default_path.open( "r", encoding=self.settings.encoding ) as file: json_load(file) # If this works without errors, fine! except (PermissionError, OSError) as e: raise FileAccessError( f"Cannot access default JSON file '{default_path}': {e}" ) from e except Exception as e: raise DefaultNotJSONSerializableError( f"Cannot load default JSON from file " f"'{default_path}': {e}" ) from e _preserve_current_file() _atomic_copy_file(default_path, self.__path) else: # Default file does not exist, create empty file if strict: raise DefaultNotJSONSerializableError( f"Default JSON file '{default_path}' does not exist!" ) logger.warning( "Default JSON file '%s' does not exist!\nWriting empty {}!", default_path, ) _preserve_current_file() _atomic_write_text( self.__path, "{}", encoding=self.settings.encoding ) else: if not isinstance(self.__default_data, (str, list, dict)) and strict: raise DefaultNotJSONSerializableError( f"Default data for '{self.__path}' is not JSON-serializable! \n" "It must be a dict, list or string! \n" f"Got type: {type(self.__default_data)}" ) elif isinstance(self.__default_data, str) and strict: # Validate str defaults ('{"a":1}' etc) try: json_loads(self.__default_data) except (TypeError, ValueError, json.JSONDecodeError) as e: raise DefaultNotJSONSerializableError( f"default_data for '{self.__path}' isn't JSON-serializable!" ) from e try: if isinstance(self.__default_data, str): # For string defaults, treat the text as JSON content directly text = self.__default_data # we check if it's valid JSON above if strict=True else: text = dumps( self.__default_data, indent=self.settings.indent, sort_keys=self.settings.sort_keys, ensure_ascii=self.settings.ensure_ascii, ) except (TypeError, ValueError, json.JSONDecodeError) as e: if strict: raise DefaultNotJSONSerializableError( f"Default for file '{self.__path}' is not serializable!" f"\nError: {e}" ) from e logger.warning( "Default data for json file '%s' is not serializable!\n" "Got error: %s\n" "Writing empty {}!", self.__path, e, ) text = "{}" _preserve_current_file() _atomic_write_text(self.__path, text, encoding=self.settings.encoding) # Now try loading the default we just wrote try: with self.__path.open("r", encoding=self.settings.encoding) as file: self.json = json_load(file) except json.JSONDecodeError as e2: # No need to check for strict here, we are already recovering # because if strict = True JSONDeserializationError # would have been raised. logger.warning( "Recovery also failed for '%s'. Falling back to empty object." "Decoding error: %s", self.__path, e2, ) _atomic_write_text(self.__path, "{}", encoding=self.settings.encoding) self.json = {}
[docs] def reload(self, strict: bool = False, preserve: bool | None = None) -> None: """ Reload from disk, recovering to default on invalid JSON. Always raises FileAccessError on permission issues. :param strict: if True, will throw error if file cannot be read or if default_data or json in default_path is not JSON-serializable if False, will recover gracefully. Read :ref:`error_handling` for more info :param preserve: Preserve the existing file by renaming it to <filename>.old.x.ext before writing defaults during recovery. ``None`` uses the instance setting (False unless changed). :type strict: bool :raises ~singlejson.fileutils.FileAccessError: if file cannot be accessed (always) :raises ~singlejson.fileutils.DefaultNotJSONSerializableError: if strict is True and JSON is invalid """ # Use the per-instance lock to guard load/recovery operations with self._lock: actual_preserve = self.__preserve if preserve is None else preserve # 1: See if file exists if not self.__path.exists(): # Create file with no data self.restore_default(strict, preserve=actual_preserve) # 2: File now surely exists try: with self.__path.open("r", encoding=self.settings.encoding) as file: self.json = json_load(file) except (PermissionError, OSError) as e: raise FileAccessError(f"Cannot read file '{self.__path}': {e}") from e except json.JSONDecodeError as e: # Loading failed. Recover to default if allowed. if strict: raise JSONDeserializationError( f"Cannot read json from file '{self.__path}': {e}" ) from e logger.warning( "Cannot read json from file '%s'. Using default!\n" "Decoding error: %s", self.__path, e, ) self.restore_default(strict, preserve=actual_preserve)
# Don't retry loading here; restore_default() now handles recovery
[docs] def save(self, settings: JsonSerializationSettings | None = None) -> None: """ Save the data to the disk (atomically by default). :param settings: :class:`JsonSerializationSettings` object (``None`` for instance settings) """ settings = settings or self.settings # guard save with the per-instance lock with self._lock: try: # Ensure directory exists self.__path.parent.mkdir(parents=True, exist_ok=True) # Serialize to text then atomically write data_to_save = self.json text = dumps( data_to_save, indent=settings.indent, sort_keys=settings.sort_keys, ensure_ascii=settings.ensure_ascii, ) _atomic_write_text(self.__path, text, encoding=settings.encoding) except (PermissionError, OSError) as e: raise FileAccessError(f"Cannot write file '{self.__path}': {e}") from e
# Context manager support def __enter__(self) -> JSONFile: """ Enter the context manager. :return: self """ return self def __exit__( self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, ) -> None: """ Exit the context manager and save if auto_save is True and no exception occurred. :param exc_type: exception type :param exc: exception instance :param tb: traceback """ if exc_type is None and self.__auto_save: self.save()
# Default settings instance used by JSONFile.save() when not provided DEFAULT_SERIALIZATION_SETTINGS = JsonSerializationSettings() """Default JsonSerializationSettings used by JSONFile instances with indent=4, sort_keys=True, ensure_ascii=False"""