app.hasher
Evidence hashing utilities for forensic integrity verification.
Provides functions to compute SHA-256 and MD5 digests of evidence files in a single streaming pass. These hashes are recorded during evidence intake and re-verified before report generation to ensure that the evidence has not been modified during analysis.
The file is read in chunks of CHUNK_SIZE bytes to keep memory
usage bounded even for multi-gigabyte disk images. An optional progress
callback is supported for UI feedback during long-running hash operations.
Attributes:
- CHUNK_SIZE: Number of bytes read per iteration (4 MiB).
1"""Evidence hashing utilities for forensic integrity verification. 2 3Provides functions to compute SHA-256 and MD5 digests of evidence files in 4a single streaming pass. These hashes are recorded during evidence intake 5and re-verified before report generation to ensure that the evidence has 6not been modified during analysis. 7 8The file is read in chunks of :data:`CHUNK_SIZE` bytes to keep memory 9usage bounded even for multi-gigabyte disk images. An optional progress 10callback is supported for UI feedback during long-running hash operations. 11 12Attributes: 13 CHUNK_SIZE: Number of bytes read per iteration (4 MiB). 14""" 15 16from __future__ import annotations 17 18from hashlib import md5, sha256 19from pathlib import Path 20from typing import Callable, Protocol, TypedDict 21 22__all__ = [ 23 "compute_hashes", 24 "compute_hashes_multi", 25 "verify_hash", 26 "verify_hashes_multi", 27] 28 29CHUNK_SIZE = 4 * 1024 * 1024 30 31 32class HashResult(TypedDict): 33 """Hash output produced for one evidence file.""" 34 35 sha256: str 36 md5: str 37 size_bytes: int 38 39 40class _Hasher(Protocol): 41 """Structural protocol matching :mod:`hashlib` hash objects.""" 42 43 def update(self, data: bytes, /) -> None: ... 44 def hexdigest(self) -> str: ... 45 46 47def _compute_digests( 48 filepath: str | Path, 49 hashers: dict[str, _Hasher], 50 progress_callback: Callable[[int, int], None] | None = None, 51) -> tuple[dict[str, str], int]: 52 """Stream a file through one or more hash algorithms simultaneously. 53 54 Args: 55 filepath: Path to the file to hash. 56 hashers: Mapping of algorithm name to hasher instance 57 (e.g. ``{"sha256": sha256()}``). 58 progress_callback: Optional ``(bytes_read, total_bytes)`` callback 59 invoked after each chunk. 60 61 Returns: 62 A tuple of ``(digests, total_bytes)`` where *digests* maps each 63 algorithm name to its hex digest string. 64 """ 65 path = Path(filepath) 66 total_bytes = path.stat().st_size 67 bytes_read = 0 68 69 if progress_callback is not None: 70 progress_callback(0, total_bytes) 71 72 with path.open("rb") as evidence_file: 73 while True: 74 chunk = evidence_file.read(CHUNK_SIZE) 75 if not chunk: 76 break 77 78 for hasher in hashers.values(): 79 hasher.update(chunk) 80 bytes_read += len(chunk) 81 82 if progress_callback is not None: 83 progress_callback(bytes_read, total_bytes) 84 85 return {name: hasher.hexdigest() for name, hasher in hashers.items()}, total_bytes 86 87 88def compute_hashes( 89 filepath: str | Path, 90 progress_callback: Callable[[int, int], None] | None = None, 91) -> HashResult: 92 """Compute SHA-256 and MD5 digests in a single streaming pass. 93 94 Args: 95 filepath: Path to the evidence file. 96 progress_callback: Optional ``(bytes_read, total_bytes)`` callback 97 invoked after each 4 MiB chunk for progress reporting. 98 99 Returns: 100 A :class:`HashResult` dictionary containing ``sha256``, ``md5``, 101 and ``size_bytes`` keys. 102 """ 103 digests, total_bytes = _compute_digests( 104 filepath, 105 {"sha256": sha256(), "md5": md5()}, 106 progress_callback=progress_callback, 107 ) 108 return { 109 "sha256": digests["sha256"], 110 "md5": digests["md5"], 111 "size_bytes": total_bytes, 112 } 113 114 115def compute_sha256(filepath: str | Path) -> str: 116 """Compute the SHA-256 hex digest for a single file. 117 118 Args: 119 filepath: Path to the file to hash. 120 121 Returns: 122 Lowercase hex-encoded SHA-256 digest string. 123 """ 124 digests, _ = _compute_digests(filepath, {"sha256": sha256()}) 125 return digests["sha256"] 126 127 128def verify_hash( 129 filepath: str | Path, 130 expected_sha256: str, 131 return_computed: bool = False, 132) -> bool | tuple[bool, str]: 133 """Re-compute SHA-256 for a file and compare against an expected value. 134 135 Used before report generation to verify that evidence has not been 136 modified since intake. 137 138 Args: 139 filepath: Path to the evidence file. 140 expected_sha256: The SHA-256 digest recorded at intake. 141 return_computed: When *True*, return both the match result and the 142 computed digest. 143 144 Returns: 145 ``True`` / ``False`` when *return_computed* is *False*, or a tuple 146 ``(match, computed_sha256)`` when it is *True*. 147 """ 148 computed_sha256 = compute_sha256(filepath) 149 matches = computed_sha256 == expected_sha256.strip().lower() 150 if return_computed: 151 return matches, computed_sha256 152 return matches 153 154 155def compute_hashes_multi( 156 filepaths: list[Path], 157 progress_callback: Callable[[int, int], None] | None = None, 158) -> list[HashResult]: 159 """Compute SHA-256 and MD5 digests for each file in a list. 160 161 Each file is hashed independently via :func:`compute_hashes`. The 162 returned list preserves the input order and augments each result with 163 a ``path`` key so the caller can correlate results back to files. 164 165 Args: 166 filepaths: List of evidence file paths to hash. 167 progress_callback: Optional ``(bytes_read, total_bytes)`` callback 168 forwarded to :func:`compute_hashes` for each file. 169 170 Returns: 171 A list of :class:`HashResult` dicts, each with an additional 172 ``path`` key containing the string representation of the file. 173 """ 174 results: list[HashResult] = [] 175 for filepath in filepaths: 176 result = compute_hashes(filepath, progress_callback) 177 result["path"] = str(filepath) # type: ignore[typeddict-unknown-key] 178 results.append(result) 179 return results 180 181 182def verify_hashes_multi( 183 file_hash_entries: list[dict[str, str | int]], 184) -> tuple[bool, list[dict[str, object]]]: 185 """Verify multiple evidence files against their recorded SHA-256 digests. 186 187 Each entry in *file_hash_entries* must have ``path`` and ``sha256`` 188 keys. Missing files are reported as failures. 189 190 Args: 191 file_hash_entries: List of dicts with ``path`` (str) and 192 ``sha256`` (str) keys from intake-time hashing. 193 194 Returns: 195 A tuple ``(all_passed, details)`` where *all_passed* is ``True`` 196 only if every file matches, and *details* is a list of per-file 197 result dicts with ``path``, ``match``, ``expected``, and 198 ``computed`` keys. 199 """ 200 all_ok = True 201 details: list[dict[str, object]] = [] 202 for entry in file_hash_entries: 203 path = Path(str(entry["path"])) 204 expected = str(entry["sha256"]).strip().lower() 205 if not path.exists(): 206 details.append({ 207 "path": str(path), 208 "match": False, 209 "expected": expected, 210 "computed": "FILE_MISSING", 211 }) 212 all_ok = False 213 continue 214 computed = compute_sha256(path) 215 match = computed == expected 216 details.append({ 217 "path": str(path), 218 "match": match, 219 "expected": expected, 220 "computed": computed, 221 }) 222 if not match: 223 all_ok = False 224 return all_ok, details
89def compute_hashes( 90 filepath: str | Path, 91 progress_callback: Callable[[int, int], None] | None = None, 92) -> HashResult: 93 """Compute SHA-256 and MD5 digests in a single streaming pass. 94 95 Args: 96 filepath: Path to the evidence file. 97 progress_callback: Optional ``(bytes_read, total_bytes)`` callback 98 invoked after each 4 MiB chunk for progress reporting. 99 100 Returns: 101 A :class:`HashResult` dictionary containing ``sha256``, ``md5``, 102 and ``size_bytes`` keys. 103 """ 104 digests, total_bytes = _compute_digests( 105 filepath, 106 {"sha256": sha256(), "md5": md5()}, 107 progress_callback=progress_callback, 108 ) 109 return { 110 "sha256": digests["sha256"], 111 "md5": digests["md5"], 112 "size_bytes": total_bytes, 113 }
Compute SHA-256 and MD5 digests in a single streaming pass.
Arguments:
- filepath: Path to the evidence file.
- progress_callback: Optional
(bytes_read, total_bytes)callback invoked after each 4 MiB chunk for progress reporting.
Returns:
A
HashResultdictionary containingsha256,md5, andsize_byteskeys.
156def compute_hashes_multi( 157 filepaths: list[Path], 158 progress_callback: Callable[[int, int], None] | None = None, 159) -> list[HashResult]: 160 """Compute SHA-256 and MD5 digests for each file in a list. 161 162 Each file is hashed independently via :func:`compute_hashes`. The 163 returned list preserves the input order and augments each result with 164 a ``path`` key so the caller can correlate results back to files. 165 166 Args: 167 filepaths: List of evidence file paths to hash. 168 progress_callback: Optional ``(bytes_read, total_bytes)`` callback 169 forwarded to :func:`compute_hashes` for each file. 170 171 Returns: 172 A list of :class:`HashResult` dicts, each with an additional 173 ``path`` key containing the string representation of the file. 174 """ 175 results: list[HashResult] = [] 176 for filepath in filepaths: 177 result = compute_hashes(filepath, progress_callback) 178 result["path"] = str(filepath) # type: ignore[typeddict-unknown-key] 179 results.append(result) 180 return results
Compute SHA-256 and MD5 digests for each file in a list.
Each file is hashed independently via compute_hashes(). The
returned list preserves the input order and augments each result with
a path key so the caller can correlate results back to files.
Arguments:
- filepaths: List of evidence file paths to hash.
- progress_callback: Optional
(bytes_read, total_bytes)callback forwarded tocompute_hashes()for each file.
Returns:
A list of
HashResultdicts, each with an additionalpathkey containing the string representation of the file.
129def verify_hash( 130 filepath: str | Path, 131 expected_sha256: str, 132 return_computed: bool = False, 133) -> bool | tuple[bool, str]: 134 """Re-compute SHA-256 for a file and compare against an expected value. 135 136 Used before report generation to verify that evidence has not been 137 modified since intake. 138 139 Args: 140 filepath: Path to the evidence file. 141 expected_sha256: The SHA-256 digest recorded at intake. 142 return_computed: When *True*, return both the match result and the 143 computed digest. 144 145 Returns: 146 ``True`` / ``False`` when *return_computed* is *False*, or a tuple 147 ``(match, computed_sha256)`` when it is *True*. 148 """ 149 computed_sha256 = compute_sha256(filepath) 150 matches = computed_sha256 == expected_sha256.strip().lower() 151 if return_computed: 152 return matches, computed_sha256 153 return matches
Re-compute SHA-256 for a file and compare against an expected value.
Used before report generation to verify that evidence has not been modified since intake.
Arguments:
- filepath: Path to the evidence file.
- expected_sha256: The SHA-256 digest recorded at intake.
- return_computed: When True, return both the match result and the computed digest.
Returns:
True/Falsewhen return_computed is False, or a tuple(match, computed_sha256)when it is True.
183def verify_hashes_multi( 184 file_hash_entries: list[dict[str, str | int]], 185) -> tuple[bool, list[dict[str, object]]]: 186 """Verify multiple evidence files against their recorded SHA-256 digests. 187 188 Each entry in *file_hash_entries* must have ``path`` and ``sha256`` 189 keys. Missing files are reported as failures. 190 191 Args: 192 file_hash_entries: List of dicts with ``path`` (str) and 193 ``sha256`` (str) keys from intake-time hashing. 194 195 Returns: 196 A tuple ``(all_passed, details)`` where *all_passed* is ``True`` 197 only if every file matches, and *details* is a list of per-file 198 result dicts with ``path``, ``match``, ``expected``, and 199 ``computed`` keys. 200 """ 201 all_ok = True 202 details: list[dict[str, object]] = [] 203 for entry in file_hash_entries: 204 path = Path(str(entry["path"])) 205 expected = str(entry["sha256"]).strip().lower() 206 if not path.exists(): 207 details.append({ 208 "path": str(path), 209 "match": False, 210 "expected": expected, 211 "computed": "FILE_MISSING", 212 }) 213 all_ok = False 214 continue 215 computed = compute_sha256(path) 216 match = computed == expected 217 details.append({ 218 "path": str(path), 219 "match": match, 220 "expected": expected, 221 "computed": computed, 222 }) 223 if not match: 224 all_ok = False 225 return all_ok, details
Verify multiple evidence files against their recorded SHA-256 digests.
Each entry in file_hash_entries must have path and sha256
keys. Missing files are reported as failures.
Arguments:
- file_hash_entries: List of dicts with
path(str) andsha256(str) keys from intake-time hashing.
Returns:
A tuple
(all_passed, details)where all_passed isTrueonly if every file matches, and details is a list of per-file result dicts withpath,match,expected, andcomputedkeys.