Source code for pyretis.core.provenance
# Copyright (c) 2026, PyRETIS Development Team.
# Distributed under the LGPLv2.1+ License. See LICENSE for more info.
"""Run provenance metadata (P7.3).
Collects enough metadata to detect "same directory, different
code/settings" mistakes in production runs. Not a full environment
capture — just the fields that matter for scientific auditing.
"""
import hashlib
import logging
import os
import subprocess # nosec B404
import sys
import numpy as np
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
[docs]def collect_provenance(input_file=None):
"""Return a dict of run provenance metadata.
Parameters
----------
input_file : str or path-like, optional
Path to the TOML/rst input file. Its SHA-256 hash is recorded
so a restart can detect if the settings changed underneath.
Returns
-------
info : dict
Keys: ``pyretis_version``, ``python_version``,
``numpy_version``, ``git_commit`` (or ``"unknown"``),
``input_hash`` (or ``None``).
"""
py_v = sys.version_info
info = {
'pyretis_version': _pyretis_version(),
'python_version': f'{py_v.major}.{py_v.minor}.{py_v.micro}',
'numpy_version': np.__version__,
'git_commit': _git_commit(),
}
if input_file is not None:
file_hash = _file_sha256(input_file)
if file_hash is not None:
info['input_hash'] = file_hash
return info
def _pyretis_version():
try:
from pyretis.version import VERSION
return VERSION
except ImportError:
return 'unknown'
def _git_commit():
try:
result = subprocess.run( # nosec B603 B607
['git', 'rev-parse', 'HEAD'],
capture_output=True, text=True, timeout=5,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
if result.returncode == 0:
return result.stdout.strip()[:12]
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
pass
try:
from pyretis.version import GIT_REVISION
return GIT_REVISION[:12]
except ImportError:
return 'unknown'
def _file_sha256(path):
try:
h = hashlib.sha256()
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(8192), b''):
h.update(chunk)
return h.hexdigest()
except (OSError, IOError):
logger.warning('Could not hash input file: %s', path)
return None