app.hasher

Evidence hashing utilities for forensic integrity verification.

Provides functions to compute SHA-256 and MD5 digests of evidence files in a single streaming pass. These hashes are recorded during evidence intake and re-verified before report generation to ensure that the evidence has not been modified during analysis.

The file is read in chunks of CHUNK_SIZE bytes to keep memory usage bounded even for multi-gigabyte disk images. An optional progress callback is supported for UI feedback during long-running hash operations.

Attributes:
  • CHUNK_SIZE: Number of bytes read per iteration (4 MiB).
  1"""Evidence hashing utilities for forensic integrity verification.
  2
  3Provides functions to compute SHA-256 and MD5 digests of evidence files in
  4a single streaming pass.  These hashes are recorded during evidence intake
  5and re-verified before report generation to ensure that the evidence has
  6not been modified during analysis.
  7
  8The file is read in chunks of :data:`CHUNK_SIZE` bytes to keep memory
  9usage bounded even for multi-gigabyte disk images.  An optional progress
 10callback is supported for UI feedback during long-running hash operations.
 11
 12Attributes:
 13    CHUNK_SIZE: Number of bytes read per iteration (4 MiB).
 14"""
 15
 16from __future__ import annotations
 17
 18from hashlib import md5, sha256
 19from pathlib import Path
 20from typing import Callable, Protocol, TypedDict
 21
 22__all__ = [
 23    "compute_hashes",
 24    "compute_hashes_multi",
 25    "verify_hash",
 26    "verify_hashes_multi",
 27]
 28
 29CHUNK_SIZE = 4 * 1024 * 1024
 30
 31
 32class HashResult(TypedDict):
 33    """Hash output produced for one evidence file."""
 34
 35    sha256: str
 36    md5: str
 37    size_bytes: int
 38
 39
 40class _Hasher(Protocol):
 41    """Structural protocol matching :mod:`hashlib` hash objects."""
 42
 43    def update(self, data: bytes, /) -> None: ...
 44    def hexdigest(self) -> str: ...
 45
 46
 47def _compute_digests(
 48    filepath: str | Path,
 49    hashers: dict[str, _Hasher],
 50    progress_callback: Callable[[int, int], None] | None = None,
 51) -> tuple[dict[str, str], int]:
 52    """Stream a file through one or more hash algorithms simultaneously.
 53
 54    Args:
 55        filepath: Path to the file to hash.
 56        hashers: Mapping of algorithm name to hasher instance
 57            (e.g. ``{"sha256": sha256()}``).
 58        progress_callback: Optional ``(bytes_read, total_bytes)`` callback
 59            invoked after each chunk.
 60
 61    Returns:
 62        A tuple of ``(digests, total_bytes)`` where *digests* maps each
 63        algorithm name to its hex digest string.
 64    """
 65    path = Path(filepath)
 66    total_bytes = path.stat().st_size
 67    bytes_read = 0
 68
 69    if progress_callback is not None:
 70        progress_callback(0, total_bytes)
 71
 72    with path.open("rb") as evidence_file:
 73        while True:
 74            chunk = evidence_file.read(CHUNK_SIZE)
 75            if not chunk:
 76                break
 77
 78            for hasher in hashers.values():
 79                hasher.update(chunk)
 80            bytes_read += len(chunk)
 81
 82            if progress_callback is not None:
 83                progress_callback(bytes_read, total_bytes)
 84
 85    return {name: hasher.hexdigest() for name, hasher in hashers.items()}, total_bytes
 86
 87
 88def compute_hashes(
 89    filepath: str | Path,
 90    progress_callback: Callable[[int, int], None] | None = None,
 91) -> HashResult:
 92    """Compute SHA-256 and MD5 digests in a single streaming pass.
 93
 94    Args:
 95        filepath: Path to the evidence file.
 96        progress_callback: Optional ``(bytes_read, total_bytes)`` callback
 97            invoked after each 4 MiB chunk for progress reporting.
 98
 99    Returns:
100        A :class:`HashResult` dictionary containing ``sha256``, ``md5``,
101        and ``size_bytes`` keys.
102    """
103    digests, total_bytes = _compute_digests(
104        filepath,
105        {"sha256": sha256(), "md5": md5()},
106        progress_callback=progress_callback,
107    )
108    return {
109        "sha256": digests["sha256"],
110        "md5": digests["md5"],
111        "size_bytes": total_bytes,
112    }
113
114
115def compute_sha256(filepath: str | Path) -> str:
116    """Compute the SHA-256 hex digest for a single file.
117
118    Args:
119        filepath: Path to the file to hash.
120
121    Returns:
122        Lowercase hex-encoded SHA-256 digest string.
123    """
124    digests, _ = _compute_digests(filepath, {"sha256": sha256()})
125    return digests["sha256"]
126
127
128def verify_hash(
129    filepath: str | Path,
130    expected_sha256: str,
131    return_computed: bool = False,
132) -> bool | tuple[bool, str]:
133    """Re-compute SHA-256 for a file and compare against an expected value.
134
135    Used before report generation to verify that evidence has not been
136    modified since intake.
137
138    Args:
139        filepath: Path to the evidence file.
140        expected_sha256: The SHA-256 digest recorded at intake.
141        return_computed: When *True*, return both the match result and the
142            computed digest.
143
144    Returns:
145        ``True`` / ``False`` when *return_computed* is *False*, or a tuple
146        ``(match, computed_sha256)`` when it is *True*.
147    """
148    computed_sha256 = compute_sha256(filepath)
149    matches = computed_sha256 == expected_sha256.strip().lower()
150    if return_computed:
151        return matches, computed_sha256
152    return matches
153
154
155def compute_hashes_multi(
156    filepaths: list[Path],
157    progress_callback: Callable[[int, int], None] | None = None,
158) -> list[HashResult]:
159    """Compute SHA-256 and MD5 digests for each file in a list.
160
161    Each file is hashed independently via :func:`compute_hashes`.  The
162    returned list preserves the input order and augments each result with
163    a ``path`` key so the caller can correlate results back to files.
164
165    Args:
166        filepaths: List of evidence file paths to hash.
167        progress_callback: Optional ``(bytes_read, total_bytes)`` callback
168            forwarded to :func:`compute_hashes` for each file.
169
170    Returns:
171        A list of :class:`HashResult` dicts, each with an additional
172        ``path`` key containing the string representation of the file.
173    """
174    results: list[HashResult] = []
175    for filepath in filepaths:
176        result = compute_hashes(filepath, progress_callback)
177        result["path"] = str(filepath)  # type: ignore[typeddict-unknown-key]
178        results.append(result)
179    return results
180
181
182def verify_hashes_multi(
183    file_hash_entries: list[dict[str, str | int]],
184) -> tuple[bool, list[dict[str, object]]]:
185    """Verify multiple evidence files against their recorded SHA-256 digests.
186
187    Each entry in *file_hash_entries* must have ``path`` and ``sha256``
188    keys.  Missing files are reported as failures.
189
190    Args:
191        file_hash_entries: List of dicts with ``path`` (str) and
192            ``sha256`` (str) keys from intake-time hashing.
193
194    Returns:
195        A tuple ``(all_passed, details)`` where *all_passed* is ``True``
196        only if every file matches, and *details* is a list of per-file
197        result dicts with ``path``, ``match``, ``expected``, and
198        ``computed`` keys.
199    """
200    all_ok = True
201    details: list[dict[str, object]] = []
202    for entry in file_hash_entries:
203        path = Path(str(entry["path"]))
204        expected = str(entry["sha256"]).strip().lower()
205        if not path.exists():
206            details.append({
207                "path": str(path),
208                "match": False,
209                "expected": expected,
210                "computed": "FILE_MISSING",
211            })
212            all_ok = False
213            continue
214        computed = compute_sha256(path)
215        match = computed == expected
216        details.append({
217            "path": str(path),
218            "match": match,
219            "expected": expected,
220            "computed": computed,
221        })
222        if not match:
223            all_ok = False
224    return all_ok, details
def compute_hashes( filepath: str | pathlib.Path, progress_callback: Optional[Callable[[int, int], NoneType]] = None) -> app.hasher.HashResult:
 89def compute_hashes(
 90    filepath: str | Path,
 91    progress_callback: Callable[[int, int], None] | None = None,
 92) -> HashResult:
 93    """Compute SHA-256 and MD5 digests in a single streaming pass.
 94
 95    Args:
 96        filepath: Path to the evidence file.
 97        progress_callback: Optional ``(bytes_read, total_bytes)`` callback
 98            invoked after each 4 MiB chunk for progress reporting.
 99
100    Returns:
101        A :class:`HashResult` dictionary containing ``sha256``, ``md5``,
102        and ``size_bytes`` keys.
103    """
104    digests, total_bytes = _compute_digests(
105        filepath,
106        {"sha256": sha256(), "md5": md5()},
107        progress_callback=progress_callback,
108    )
109    return {
110        "sha256": digests["sha256"],
111        "md5": digests["md5"],
112        "size_bytes": total_bytes,
113    }

Compute SHA-256 and MD5 digests in a single streaming pass.

Arguments:
  • filepath: Path to the evidence file.
  • progress_callback: Optional (bytes_read, total_bytes) callback invoked after each 4 MiB chunk for progress reporting.
Returns:

A HashResult dictionary containing sha256, md5, and size_bytes keys.

def compute_hashes_multi( filepaths: list[pathlib.Path], progress_callback: Optional[Callable[[int, int], NoneType]] = None) -> list[app.hasher.HashResult]:
156def compute_hashes_multi(
157    filepaths: list[Path],
158    progress_callback: Callable[[int, int], None] | None = None,
159) -> list[HashResult]:
160    """Compute SHA-256 and MD5 digests for each file in a list.
161
162    Each file is hashed independently via :func:`compute_hashes`.  The
163    returned list preserves the input order and augments each result with
164    a ``path`` key so the caller can correlate results back to files.
165
166    Args:
167        filepaths: List of evidence file paths to hash.
168        progress_callback: Optional ``(bytes_read, total_bytes)`` callback
169            forwarded to :func:`compute_hashes` for each file.
170
171    Returns:
172        A list of :class:`HashResult` dicts, each with an additional
173        ``path`` key containing the string representation of the file.
174    """
175    results: list[HashResult] = []
176    for filepath in filepaths:
177        result = compute_hashes(filepath, progress_callback)
178        result["path"] = str(filepath)  # type: ignore[typeddict-unknown-key]
179        results.append(result)
180    return results

Compute SHA-256 and MD5 digests for each file in a list.

Each file is hashed independently via compute_hashes(). The returned list preserves the input order and augments each result with a path key so the caller can correlate results back to files.

Arguments:
  • filepaths: List of evidence file paths to hash.
  • progress_callback: Optional (bytes_read, total_bytes) callback forwarded to compute_hashes() for each file.
Returns:

A list of HashResult dicts, each with an additional path key containing the string representation of the file.

def verify_hash( filepath: str | pathlib.Path, expected_sha256: str, return_computed: bool = False) -> bool | tuple[bool, str]:
129def verify_hash(
130    filepath: str | Path,
131    expected_sha256: str,
132    return_computed: bool = False,
133) -> bool | tuple[bool, str]:
134    """Re-compute SHA-256 for a file and compare against an expected value.
135
136    Used before report generation to verify that evidence has not been
137    modified since intake.
138
139    Args:
140        filepath: Path to the evidence file.
141        expected_sha256: The SHA-256 digest recorded at intake.
142        return_computed: When *True*, return both the match result and the
143            computed digest.
144
145    Returns:
146        ``True`` / ``False`` when *return_computed* is *False*, or a tuple
147        ``(match, computed_sha256)`` when it is *True*.
148    """
149    computed_sha256 = compute_sha256(filepath)
150    matches = computed_sha256 == expected_sha256.strip().lower()
151    if return_computed:
152        return matches, computed_sha256
153    return matches

Re-compute SHA-256 for a file and compare against an expected value.

Used before report generation to verify that evidence has not been modified since intake.

Arguments:
  • filepath: Path to the evidence file.
  • expected_sha256: The SHA-256 digest recorded at intake.
  • return_computed: When True, return both the match result and the computed digest.
Returns:

True / False when return_computed is False, or a tuple (match, computed_sha256) when it is True.

def verify_hashes_multi( file_hash_entries: list[dict[str, str | int]]) -> tuple[bool, list[dict[str, object]]]:
183def verify_hashes_multi(
184    file_hash_entries: list[dict[str, str | int]],
185) -> tuple[bool, list[dict[str, object]]]:
186    """Verify multiple evidence files against their recorded SHA-256 digests.
187
188    Each entry in *file_hash_entries* must have ``path`` and ``sha256``
189    keys.  Missing files are reported as failures.
190
191    Args:
192        file_hash_entries: List of dicts with ``path`` (str) and
193            ``sha256`` (str) keys from intake-time hashing.
194
195    Returns:
196        A tuple ``(all_passed, details)`` where *all_passed* is ``True``
197        only if every file matches, and *details* is a list of per-file
198        result dicts with ``path``, ``match``, ``expected``, and
199        ``computed`` keys.
200    """
201    all_ok = True
202    details: list[dict[str, object]] = []
203    for entry in file_hash_entries:
204        path = Path(str(entry["path"]))
205        expected = str(entry["sha256"]).strip().lower()
206        if not path.exists():
207            details.append({
208                "path": str(path),
209                "match": False,
210                "expected": expected,
211                "computed": "FILE_MISSING",
212            })
213            all_ok = False
214            continue
215        computed = compute_sha256(path)
216        match = computed == expected
217        details.append({
218            "path": str(path),
219            "match": match,
220            "expected": expected,
221            "computed": computed,
222        })
223        if not match:
224            all_ok = False
225    return all_ok, details

Verify multiple evidence files against their recorded SHA-256 digests.

Each entry in file_hash_entries must have path and sha256 keys. Missing files are reported as failures.

Arguments:
  • file_hash_entries: List of dicts with path (str) and sha256 (str) keys from intake-time hashing.
Returns:

A tuple (all_passed, details) where all_passed is True only if every file matches, and details is a list of per-file result dicts with path, match, expected, and computed keys.