app.parser.registry

Artifact registry and prompt-loading helpers for forensic parsing.

Maintains OS-specific artifact registries that map each supported forensic artifact to its Dissect function name, category, human-readable description, and analysis guidance. Guidance text is loaded from Markdown files in prompts/artifact_instructions/ (Windows) or prompts/artifact_instructions_linux/ (Linux) when available, falling back to inline analysis_hint values.

Attributes:
  • WINDOWS_ARTIFACT_REGISTRY: Windows artifact catalogue.
  • LINUX_ARTIFACT_REGISTRY: Linux artifact catalogue.
  1"""Artifact registry and prompt-loading helpers for forensic parsing.
  2
  3Maintains OS-specific artifact registries that map each supported forensic
  4artifact to its Dissect function name, category, human-readable description,
  5and analysis guidance.  Guidance text is loaded from Markdown files in
  6``prompts/artifact_instructions/`` (Windows) or
  7``prompts/artifact_instructions_linux/`` (Linux) when available, falling back to inline
  8``analysis_hint`` values.
  9
 10Attributes:
 11    WINDOWS_ARTIFACT_REGISTRY: Windows artifact catalogue.
 12    LINUX_ARTIFACT_REGISTRY: Linux artifact catalogue.
 13"""
 14
 15from __future__ import annotations
 16
 17from pathlib import Path
 18
 19from ..os_utils import normalize_os_type
 20
 21__all__ = [
 22    "LINUX_ARTIFACT_REGISTRY",
 23    "WINDOWS_ARTIFACT_REGISTRY",
 24    "get_artifact_registry",
 25]
 26
 27_ARTIFACT_PROMPTS_DIR = Path(__file__).resolve().parents[2] / "prompts" / "artifact_instructions"
 28_LINUX_PROMPTS_DIR = Path(__file__).resolve().parents[2] / "prompts" / "artifact_instructions_linux"
 29
 30
 31def _artifact_prompt_name_candidates(artifact_key: str) -> list[str]:
 32    """Generate candidate file stems for loading an artifact guidance prompt.
 33
 34    Produces variants with dots replaced by underscores and vice-versa so
 35    that ``browser.history`` matches ``browser_history.md``.
 36
 37    Args:
 38        artifact_key: Artifact identifier (e.g. ``"browser.history"``).
 39
 40    Returns:
 41        List of lowercased candidate stems, deduplicated.
 42    """
 43    base = str(artifact_key).strip().lower()
 44    if not base:
 45        return []
 46
 47    candidates: list[str] = []
 48    for value in (base, base.replace(".", "_"), base.replace("_", ".")):
 49        stem = value.strip()
 50        if not stem or stem in candidates:
 51            continue
 52        candidates.append(stem)
 53    return candidates
 54
 55
 56def _load_artifact_guidance_prompt(
 57    artifact_key: str,
 58    prompts_dir: Path | None = None,
 59) -> str:
 60    """Load a Markdown guidance prompt for an artifact from a prompts directory.
 61
 62    Args:
 63        artifact_key: Artifact identifier to look up.
 64        prompts_dir: Directory to search for prompt files.  Defaults to
 65            :data:`_ARTIFACT_PROMPTS_DIR` (Windows prompts).
 66
 67    Returns:
 68        The prompt text, or an empty string if no matching file is found.
 69    """
 70    search_dir = prompts_dir if prompts_dir is not None else _ARTIFACT_PROMPTS_DIR
 71    for prompt_stem in _artifact_prompt_name_candidates(artifact_key):
 72        prompt_path = search_dir / f"{prompt_stem}.md"
 73        try:
 74            if prompt_path.is_file():
 75                prompt_text = prompt_path.read_text(encoding="utf-8").strip()
 76                if prompt_text:
 77                    return prompt_text
 78        except (OSError, UnicodeDecodeError):
 79            continue
 80    return ""
 81
 82
 83def _apply_artifact_guidance_from_prompts(
 84    registry: dict[str, dict[str, str]],
 85    prompts_dir: Path | None = None,
 86) -> None:
 87    """Populate ``artifact_guidance`` on each registry entry from prompt files.
 88
 89    For every artifact, attempts to load a matching Markdown prompt from
 90    the given prompts directory.  Falls back to the inline
 91    ``analysis_instructions`` or ``analysis_hint`` when no file exists.
 92
 93    Args:
 94        registry: The mutable artifact registry dictionary.
 95        prompts_dir: Directory to search for prompt files.  Defaults to
 96            :data:`_ARTIFACT_PROMPTS_DIR` (Windows prompts).
 97    """
 98    for artifact_key, artifact_details in registry.items():
 99        prompt_guidance = _load_artifact_guidance_prompt(artifact_key, prompts_dir)
100        if prompt_guidance:
101            artifact_details["artifact_guidance"] = prompt_guidance
102            continue
103
104        fallback_guidance = str(
105            artifact_details.get("analysis_instructions")
106            or artifact_details.get("analysis_hint")
107            or ""
108        ).strip()
109        if fallback_guidance:
110            artifact_details.setdefault("artifact_guidance", fallback_guidance)
111            artifact_details.setdefault("analysis_instructions", fallback_guidance)
112
113
114WINDOWS_ARTIFACT_REGISTRY: dict[str, dict[str, str]] = {
115    "runkeys": {
116        "name": "Run/RunOnce Keys",
117        "category": "Persistence",
118        "function": "runkeys",
119        "description": (
120            "Registry autorun entries that launch programs at user logon or system boot. "
121            "These keys commonly store malware persistence command lines and loader stubs."
122        ),
123        "analysis_hint": (
124            "Prioritize entries launching from user-writable paths like AppData, Temp, or Public. "
125            "Flag encoded PowerShell, LOLBins, and commands added near the suspected compromise window."
126        ),
127    },
128    "tasks": {
129        "name": "Scheduled Tasks",
130        "category": "Persistence",
131        "function": "tasks",
132        "description": (
133            "Windows Task Scheduler definitions including triggers, actions, principals, and timing. "
134            "Adversaries frequently use tasks for periodic execution and delayed payload launch."
135        ),
136        "analysis_hint": (
137            "Look for newly created or modified tasks with hidden settings, unusual run accounts, or actions "
138            "pointing to scripts/binaries outside Program Files and Windows directories."
139        ),
140    },
141    "services": {
142        "name": "Services",
143        "category": "Persistence",
144        "function": "services",
145        "description": (
146            "Windows service configuration and startup metadata, including image paths and service accounts. "
147            "Malicious services can provide boot persistence and privilege escalation."
148        ),
149        "analysis_hint": (
150            "Investigate auto-start services with suspicious image paths, weakly named binaries, or unexpected "
151            "accounts. Correlate install/start times with process creation and event log artifacts."
152        ),
153    },
154    "cim": {
155        "name": "WMI Persistence",
156        "category": "Persistence",
157        "function": "cim",
158        "description": (
159            "WMI repository data such as event filters, consumers, and bindings used for event-driven execution. "
160            "This is a common stealth persistence mechanism in fileless intrusions."
161        ),
162        "analysis_hint": (
163            "Focus on suspicious __EventFilter, CommandLineEventConsumer, and ActiveScriptEventConsumer objects. "
164            "Flag PowerShell, cmd, or script host commands triggered by system/user logon events."
165        ),
166    },
167    "shimcache": {
168        "name": "Shimcache",
169        "category": "Execution",
170        "function": "shimcache",
171        "description": (
172            "Application Compatibility Cache entries containing executable paths and file metadata observed by the OS. "
173            "Entries provide execution context but do not independently prove a successful run."
174        ),
175        "analysis_hint": (
176            "Use Shimcache to surface suspicious paths, then confirm execution with Prefetch, Amcache, or event logs. "
177            "Pay attention to unsigned tools, archive extraction paths, and deleted binaries."
178        ),
179    },
180    "amcache": {
181        "name": "Amcache",
182        "category": "Execution",
183        "function": "amcache",
184        "description": (
185            "Application and file inventory from Amcache.hve, often including path, hash, compile info, and first-seen data. "
186            "Useful for identifying executed or installed binaries and their provenance."
187        ),
188        "analysis_hint": (
189            "Prioritize recently introduced executables with unknown publishers or rare install locations. "
190            "Compare hashes and file names against threat intelligence and other execution artifacts."
191        ),
192    },
193    "prefetch": {
194        "name": "Prefetch",
195        "category": "Execution",
196        "function": "prefetch",
197        "description": (
198            "Windows Prefetch artifacts recording executable run metadata such as run counts, last run times, and referenced files. "
199            "They are high-value evidence for userland execution on supported systems."
200        ),
201        "analysis_hint": (
202            "Hunt for recently first-run utilities, script hosts, and remote administration tools. "
203            "Review loaded file references for dropped DLLs and staging directories."
204        ),
205    },
206    "bam": {
207        "name": "BAM/DAM",
208        "category": "Execution",
209        "function": "bam",
210        "description": (
211            "Background Activity Moderator and Desktop Activity Moderator execution tracking tied to user SIDs. "
212            "These entries help attribute process activity to specific user contexts."
213        ),
214        "analysis_hint": (
215            "Correlate BAM/DAM timestamps with logons and process events to identify who launched suspicious binaries. "
216            "Highlight administrative tools and scripts executed outside normal business patterns."
217        ),
218    },
219    "userassist": {
220        "name": "UserAssist",
221        "category": "Execution",
222        "function": "userassist",
223        "description": (
224            "Per-user Explorer-driven program execution traces stored in ROT13-encoded registry values. "
225            "Includes run counts and last execution times for GUI-launched applications."
226        ),
227        "analysis_hint": (
228            "Decode and review rarely used programs, renamed binaries, and LOLBins launched through Explorer. "
229            "Use run-count deltas and last-run times to identify unusual user behavior."
230        ),
231    },
232    "evtx": {
233        "name": "Windows Event Logs",
234        "category": "Event Logs",
235        "function": "evtx",
236        "description": (
237            "Windows event channel records covering authentication, process creation, services, policy changes, and system health. "
238            "EVTX is often the backbone for timeline and intrusion reconstruction."
239        ),
240        "analysis_hint": (
241            "Pivot on high-signal event IDs for logon, process creation, service installs, account changes, and log clearing. "
242            "Correlate actor account, host, and parent-child process chains across Security/System channels."
243        ),
244    },
245    "defender.evtx": {
246        "name": "Defender Logs",
247        "category": "Event Logs",
248        "function": "defender.evtx",
249        "description": (
250            "Microsoft Defender event logs describing detections, remediation actions, exclusions, and protection state changes. "
251            "These records show what malware was seen and how protection responded."
252        ),
253        "analysis_hint": (
254            "Identify detection names, severity, and action outcomes (blocked, quarantined, allowed, failed). "
255            "Flag tamper protection events, exclusion changes, and repeated detections of the same path."
256        ),
257    },
258    "mft": {
259        "name": "MFT",
260        "category": "File System",
261        "function": "mft",
262        "description": (
263            "Master File Table metadata for NTFS files and directories, including timestamps, attributes, and record references. "
264            "MFT helps reconstruct file lifecycle and artifact provenance at scale."
265        ),
266        "analysis_hint": (
267            "Focus on executable/script creation in user profile, temp, and startup paths near incident time. "
268            "Check for timestamp anomalies and suspicious rename/move patterns suggesting anti-forensics."
269        ),
270    },
271    "usnjrnl": {
272        "name": "USN Journal",
273        "category": "File System",
274        "function": "usnjrnl",
275        "description": (
276            "NTFS change journal entries capturing create, modify, rename, and delete operations over time. "
277            "USN is valuable for short-lived files that no longer exist on disk."
278        ),
279        "analysis_hint": (
280            "Track rapid create-delete or rename chains involving scripts, archives, and binaries. "
281            "Correlate change reasons and timestamps with execution and network artifacts for full activity flow."
282        ),
283    },
284    "recyclebin": {
285        "name": "Recycle Bin",
286        "category": "File System",
287        "function": "recyclebin",
288        "description": (
289            "Deleted-item metadata including original paths, deletion times, and owning user context. "
290            "Useful for identifying post-activity cleanup and attempted evidence removal."
291        ),
292        "analysis_hint": (
293            "Prioritize deleted tools, scripts, archives, and credential files tied to suspicious users. "
294            "Compare deletion timestamps against detection events and command history."
295        ),
296    },
297    "browser.history": {
298        "name": "Browser History",
299        "category": "User Activity",
300        "function": "browser.history",
301        "description": (
302            "Visited URL records with titles and timestamps from supported web browsers. "
303            "These entries reveal user browsing intent, reconnaissance, and web-based attack paths."
304        ),
305        "analysis_hint": (
306            "Look for phishing domains, file-sharing links, admin portals, and malware delivery infrastructure. "
307            "Align visit times with downloads, process execution, and authentication events."
308        ),
309    },
310    "browser.downloads": {
311        "name": "Browser Downloads",
312        "category": "User Activity",
313        "function": "browser.downloads",
314        "description": (
315            "Browser download records linking source URLs to local file paths and timing. "
316            "This artifact is key for tracing initial payload ingress and user-acquired tools."
317        ),
318        "analysis_hint": (
319            "Flag executable, script, archive, and disk-image downloads from untrusted domains. "
320            "Correlate downloaded file names and times with Prefetch, Amcache, and Defender activity."
321        ),
322    },
323    "powershell_history": {
324        "name": "PowerShell History",
325        "category": "User Activity",
326        "function": "powershell_history",
327        "description": (
328            "PSReadLine command history capturing interactive PowerShell commands entered by users. "
329            "Often exposes attacker tradecraft such as reconnaissance, staging, and command-and-control setup."
330        ),
331        "analysis_hint": (
332            "Hunt for encoded commands, download cradles, credential access, and remote execution cmdlets. "
333            "Note gaps or abrupt truncation that may indicate history clearing or alternate execution methods."
334        ),
335    },
336    "activitiescache": {
337        "name": "Activities Cache",
338        "category": "User Activity",
339        "function": "activitiescache",
340        "description": (
341            "Windows Timeline activity records reflecting user interactions with apps, documents, and URLs. "
342            "Provides broader behavioral context across applications and time."
343        ),
344        "analysis_hint": (
345            "Use it to build user intent timelines around suspicious periods and identify staging behavior. "
346            "Prioritize activity involving remote access tools, cloud storage, and sensitive document paths."
347        ),
348    },
349    "sru.network_data": {
350        "name": "SRUM Network Data",
351        "category": "Network",
352        "function": "sru.network_data",
353        "description": (
354            "System Resource Usage Monitor network telemetry with per-application usage over time. "
355            "Shows which apps consumed network bandwidth and when."
356        ),
357        "analysis_hint": (
358            "Identify unusual outbound-heavy applications, especially unsigned or rarely seen executables. "
359            "Correlate spikes with execution artifacts and possible data exfiltration windows."
360        ),
361    },
362    "sru.application": {
363        "name": "SRUM Application",
364        "category": "Network",
365        "function": "sru.application",
366        "description": (
367            "SRUM application resource usage records that provide process-level activity context across time slices. "
368            "Helpful for spotting persistence or background abuse patterns."
369        ),
370        "analysis_hint": (
371            "Surface low-prevalence applications active during the incident period or outside baseline hours. "
372            "Cross-check with BAM, Prefetch, and network logs to confirm suspicious sustained activity."
373        ),
374    },
375    "shellbags": {
376        "name": "Shellbags",
377        "category": "Registry",
378        "function": "shellbags",
379        "description": (
380            "Registry traces of folders viewed in Explorer, including local, removable, and network paths. "
381            "Shellbags can preserve evidence even after files or folders are deleted."
382        ),
383        "analysis_hint": (
384            "Look for access to hidden folders, USB volumes, network shares, and unusual archive locations. "
385            "Use viewed-path chronology to support staging and collection hypotheses."
386        ),
387    },
388    "usb": {
389        "name": "USB History",
390        "category": "Registry",
391        "function": "usb",
392        "description": (
393            "Registry evidence of connected USB devices, including identifiers and connection history metadata. "
394            "Useful for tracking removable media usage and potential data transfer vectors."
395        ),
396        "analysis_hint": (
397            "Identify unknown devices and compare first/last seen times with suspicious file and user activity. "
398            "Focus on storage-class devices connected near possible exfiltration or staging events."
399        ),
400    },
401    "muicache": {
402        "name": "MUIcache",
403        "category": "Registry",
404        "function": "muicache",
405        "description": (
406            "Cache of executable display strings written when programs are launched via the shell. "
407            "Can provide residual execution clues for binaries no longer present."
408        ),
409        "analysis_hint": (
410            "Hunt for suspicious executable paths and uncommon tool names absent from standard software inventories. "
411            "Correlate entries with UserAssist and Shimcache for stronger execution confidence."
412        ),
413    },
414    "sam": {
415        "name": "SAM Users",
416        "category": "Security",
417        "function": "sam",
418        "description": (
419            "Local Security Account Manager user account records and account state metadata. "
420            "This artifact supports detection of unauthorized local account creation and privilege abuse."
421        ),
422        "analysis_hint": (
423            "Flag newly created, enabled, or reactivated local accounts, especially admin-capable users. "
424            "Correlate account changes with logon events and lateral movement artifacts."
425        ),
426    },
427    "defender.quarantine": {
428        "name": "Defender Quarantine",
429        "category": "Security",
430        "function": "defender.quarantine",
431        "description": (
432            "Metadata about items quarantined by Microsoft Defender, including source path and detection context. "
433            "Indicates which suspicious files were contained and where they originated."
434        ),
435        "analysis_hint": (
436            "Confirm whether detections were successfully quarantined and whether the same paths reappear later. "
437            "Use quarantine artifacts to pivot into file system, execution, and persistence traces."
438        ),
439    },
440}
441
442_apply_artifact_guidance_from_prompts(WINDOWS_ARTIFACT_REGISTRY)
443
444# ---------------------------------------------------------------------------
445# Linux artifact registry
446# ---------------------------------------------------------------------------
447
448LINUX_ARTIFACT_REGISTRY: dict[str, dict[str, str]] = {
449    # -- Persistence --------------------------------------------------------
450    "cronjobs": {
451        "name": "Cron Jobs",
452        "category": "Persistence",
453        "function": "cronjobs",
454        "description": (
455            "Scheduled tasks defined in user crontabs and system-wide /etc/cron.* directories. "
456            "Cron is a common persistence and periodic-execution mechanism on Linux systems."
457        ),
458        "analysis_hint": (
459            "Flag cron entries that download or execute from /tmp, /dev/shm, or user-writable paths. "
460            "Look for base64-encoded commands, reverse shells, and entries added near the incident window."
461        ),
462    },
463    "services": {
464        "name": "Systemd Services",
465        "category": "Persistence",
466        "function": "services",
467        "description": (
468            "Systemd unit files describing services, their startup configuration, and current state. "
469            "Dissect's services function is OS-aware and returns Linux systemd units on Linux targets."
470        ),
471        "analysis_hint": (
472            "Identify services with ExecStart pointing to unusual paths (/tmp, /var/tmp, user home dirs). "
473            "Flag recently created or modified unit files, units set to restart on failure, and masked units."
474        ),
475    },
476    # -- Shell History ------------------------------------------------------
477    "bash_history": {
478        "name": "Bash History",
479        "category": "Shell History",
480        "function": "bash_history",
481        "description": (
482            "Per-user .bash_history files recording interactive shell commands. "
483            "Highest-value artifact on Linux for understanding attacker activity."
484        ),
485        "analysis_hint": (
486            "Hunt for curl/wget downloads, base64 encoding/decoding, reverse shells (bash -i, /dev/tcp), "
487            "credential access (cat /etc/shadow), reconnaissance (id, whoami, uname -a), persistence "
488            "installation (crontab -e, systemctl enable), and log tampering (truncate, shred, rm /var/log). "
489            "Sparse or empty history for active accounts may indicate clearing (history -c, HISTFILE=/dev/null)."
490        ),
491    },
492    "zsh_history": {
493        "name": "Zsh History",
494        "category": "Shell History",
495        "function": "zsh_history",
496        "description": (
497            "Per-user .zsh_history files recording Zsh shell commands with optional timestamps. "
498            "Zsh history may include timing data not present in bash history."
499        ),
500        "analysis_hint": (
501            "Apply the same suspicious-command patterns as bash_history. "
502            "Zsh extended history format includes timestamps — use them for timeline correlation."
503        ),
504    },
505    "fish_history": {
506        "name": "Fish History",
507        "category": "Shell History",
508        "function": "fish_history",
509        "description": (
510            "Per-user Fish shell history stored in YAML-like format with timestamps. "
511            "Less common but may capture activity missed by bash/zsh."
512        ),
513        "analysis_hint": (
514            "Apply the same suspicious-command patterns as bash_history. "
515            "Fish history includes timestamps per command — correlate with login records."
516        ),
517    },
518    "python_history": {
519        "name": "Python History",
520        "category": "Shell History",
521        "function": "python_history",
522        "description": (
523            "Python REPL history from interactive interpreter sessions. "
524            "May reveal attacker use of Python for scripting, exploitation, or data manipulation."
525        ),
526        "analysis_hint": (
527            "Look for import of socket/subprocess/os modules, file read/write operations on "
528            "sensitive paths, and network connection attempts. Python is commonly used for "
529            "exploit development and post-exploitation tooling."
530        ),
531    },
532    # -- Authentication -----------------------------------------------------
533    "wtmp": {
534        "name": "Login Records (wtmp)",
535        "category": "Authentication",
536        "function": "wtmp",
537        "description": (
538            "Successful login/logout records including user, terminal, source IP, and timestamps. "
539            "Linux equivalent of Windows logon events."
540        ),
541        "analysis_hint": (
542            "Flag logins from unexpected IPs, logins at unusual hours, root logins via SSH, "
543            "and logins from accounts that should not be interactive. Cross-check with auth logs "
544            "and shell history. wtmp can be tampered with — missing records or time gaps may "
545            "indicate editing."
546        ),
547    },
548    "btmp": {
549        "name": "Failed Logins (btmp)",
550        "category": "Authentication",
551        "function": "btmp",
552        "description": (
553            "Failed login attempt records including user, source IP, and timestamps. "
554            "High volumes indicate brute-force attacks or credential stuffing."
555        ),
556        "analysis_hint": (
557            "Look for high-frequency failures from single IPs (brute force), failures for "
558            "non-existent accounts (enumeration), and failures immediately before a successful "
559            "wtmp login (successful brute force). Correlate source IPs with successful logins."
560        ),
561    },
562    "lastlog": {
563        "name": "Last Login Records",
564        "category": "Authentication",
565        "function": "lastlog",
566        "description": (
567            "Last login timestamp and source for each user account on the system. "
568            "Provides a quick overview of account usage recency."
569        ),
570        "analysis_hint": (
571            "Identify accounts with recent logins that should be dormant or disabled. "
572            "Compare with wtmp for consistency — discrepancies may indicate log tampering."
573        ),
574    },
575    "users": {
576        "name": "User Accounts",
577        "category": "Authentication",
578        "function": "users",
579        "description": (
580            "User account information parsed from /etc/passwd and /etc/shadow, including "
581            "UIDs, shells, home directories, and password metadata."
582        ),
583        "analysis_hint": (
584            "Flag accounts with UID 0 (root-equivalent), accounts with login shells that "
585            "should have /sbin/nologin, recently created accounts (check shadow dates), and "
586            "accounts with empty password fields."
587        ),
588    },
589    "groups": {
590        "name": "Groups",
591        "category": "Authentication",
592        "function": "groups",
593        "description": (
594            "Group definitions from /etc/group including group members. "
595            "Shows privilege group membership such as sudo, wheel, and docker."
596        ),
597        "analysis_hint": (
598            "Check membership of privileged groups (sudo, wheel, docker, adm, root). "
599            "Flag unexpected users in administrative groups."
600        ),
601    },
602    "sudoers": {
603        "name": "Sudoers Config",
604        "category": "Authentication",
605        "function": "sudoers",
606        "description": (
607            "Sudo configuration from /etc/sudoers and /etc/sudoers.d/, defining which "
608            "users can run which commands with elevated privileges."
609        ),
610        "analysis_hint": (
611            "Flag NOPASSWD entries, overly broad command allowances (ALL), and rules for "
612            "unexpected users. Attackers often modify sudoers for passwordless privilege escalation."
613        ),
614    },
615    # -- Network ------------------------------------------------------------
616    "network.interfaces": {
617        "name": "Network Interfaces",
618        "category": "Network",
619        "function": "network.interfaces",
620        "description": (
621            "Network interface configuration including IP addresses, subnets, and interface names. "
622            "Provides context for understanding the system's network position."
623        ),
624        "analysis_hint": (
625            "Document all configured interfaces and IPs for correlation with login source IPs "
626            "and network artifacts from other systems. Flag unexpected interfaces (tunnels, bridges)."
627        ),
628    },
629    # -- Logs ---------------------------------------------------------------
630    "syslog": {
631        "name": "Syslog",
632        "category": "Logs",
633        "function": "syslog",
634        "description": (
635            "System log entries from /var/log/syslog, /var/log/messages, and /var/log/auth.log. "
636            "Central log source for authentication, service, and kernel events on Linux."
637        ),
638        "analysis_hint": (
639            "Filter for sshd, sudo, su, and PAM messages to reconstruct authentication activity. "
640            "Look for service start/stop events, kernel warnings, and log gaps that may indicate "
641            "tampering or system downtime."
642        ),
643    },
644    "journalctl": {
645        "name": "Systemd Journal",
646        "category": "Logs",
647        "function": "journalctl",
648        "description": (
649            "Structured journal entries from systemd-journald, covering services, kernel, and "
650            "user-session events with rich metadata."
651        ),
652        "analysis_hint": (
653            "Use unit and priority fields to filter for security-relevant events. "
654            "Journal entries complement syslog and may contain structured fields not "
655            "present in plain-text logs."
656        ),
657    },
658    "packagemanager": {
659        "name": "Package History",
660        "category": "Logs",
661        "function": "packagemanager",
662        "description": (
663            "Package installation, removal, and update history from apt, yum, dnf, or other "
664            "package managers. Shows software changes over time."
665        ),
666        "analysis_hint": (
667            "Flag recently installed packages, especially compilers (gcc, make), network tools "
668            "(nmap, netcat, socat), and packages installed outside normal maintenance windows. "
669            "Package removal near incident time may indicate cleanup."
670        ),
671    },
672    # -- SSH ----------------------------------------------------------------
673    "ssh.authorized_keys": {
674        "name": "SSH Authorized Keys",
675        "category": "SSH",
676        "function": "ssh.authorized_keys",
677        "description": (
678            "Per-user authorized_keys files listing public keys allowed for SSH authentication. "
679            "A primary persistence mechanism for SSH-based access."
680        ),
681        "analysis_hint": (
682            "Flag keys added recently or for unexpected accounts. Compare key fingerprints "
683            "across systems to identify lateral movement. Look for command-restricted keys "
684            "and keys with 'from=' options limiting source IPs."
685        ),
686    },
687    "ssh.known_hosts": {
688        "name": "SSH Known Hosts",
689        "category": "SSH",
690        "function": "ssh.known_hosts",
691        "description": (
692            "Per-user known_hosts files recording SSH server fingerprints the user has connected to. "
693            "Reveals outbound SSH connections and lateral movement targets."
694        ),
695        "analysis_hint": (
696            "Identify internal hosts the user SSHed to (lateral movement) and external hosts "
697            "(potential C2 or data exfiltration). Hashed known_hosts entries obscure hostnames "
698            "but IP-based entries may still be readable."
699        ),
700    },
701}
702
703_apply_artifact_guidance_from_prompts(LINUX_ARTIFACT_REGISTRY, _LINUX_PROMPTS_DIR)
704
705
706def get_artifact_registry(os_type: str) -> dict[str, dict[str, str]]:
707    """Return the artifact registry appropriate for the given OS type.
708
709    Uses :func:`~app.os_utils.normalize_os_type` for consistent
710    normalisation across the codebase.
711
712    Args:
713        os_type: Operating system identifier as returned by Dissect's
714            ``target.os`` (e.g. ``"windows"``, ``"linux"``).  The value
715            is normalised to lowercase before comparison.
716
717    Returns:
718        The OS-specific artifact registry dictionary.  Defaults to
719        :data:`WINDOWS_ARTIFACT_REGISTRY` for unrecognised OS types.
720    """
721    if normalize_os_type(os_type) == "linux":
722        return LINUX_ARTIFACT_REGISTRY
723    return WINDOWS_ARTIFACT_REGISTRY
LINUX_ARTIFACT_REGISTRY: dict[str, dict[str, str]] = {'cronjobs': {'name': 'Cron Jobs', 'category': 'Persistence', 'function': 'cronjobs', 'description': 'Scheduled tasks defined in user crontabs and system-wide /etc/cron.* directories. Cron is a common persistence and periodic-execution mechanism on Linux systems.', 'analysis_hint': 'Flag cron entries that download or execute from /tmp, /dev/shm, or user-writable paths. Look for base64-encoded commands, reverse shells, and entries added near the incident window.', 'artifact_guidance': 'Scheduled tasks — primary persistence mechanism on Linux.\n- Suspicious: entries running scripts from /tmp, /dev/shm, or user-writable directories; entries executing curl/wget/python/bash with URLs or encoded payloads; entries owned by unexpected users; unusual schedules (every minute, @reboot).\n- Locations: /var/spool/cron/crontabs/ (per-user), /etc/crontab, /etc/cron.d/, /etc/cron.{hourly,daily,weekly,monthly}/.\n- @reboot entries are high-priority — they survive reboots without appearing in regular cron schedules.\n- Cron execution should appear in syslog (CRON entries). Missing log entries for known cron jobs may indicate log tampering.'}, 'services': {'name': 'Systemd Services', 'category': 'Persistence', 'function': 'services', 'description': "Systemd unit files describing services, their startup configuration, and current state. Dissect's services function is OS-aware and returns Linux systemd units on Linux targets.", 'analysis_hint': 'Identify services with ExecStart pointing to unusual paths (/tmp, /var/tmp, user home dirs). Flag recently created or modified unit files, units set to restart on failure, and masked units.', 'artifact_guidance': "Systemd service units and init scripts — key persistence and privilege artifact on Linux.\n- Suspicious: unit files in /etc/systemd/system/ referencing unusual binaries, ExecStart pointing to /tmp, /dev/shm, or hidden directories, services with Restart=always that aren't standard, recently created unit files, services running as root with unusual ExecStart paths, Type=oneshot services running scripts.\n- Check for: masked legitimate security services (apparmor, auditd, fail2ban), ExecStartPre/ExecStartPost running additional commands, drop-in overrides in /etc/systemd/system/*.d/ directories.\n- Cross-check: service creation should correlate with systemctl commands in bash_history and file creation timestamps in filesystem artifacts.\n- Expected: standard distro services are common — focus on what doesn't fit the installed package set."}, 'bash_history': {'name': 'Bash History', 'category': 'Shell History', 'function': 'bash_history', 'description': 'Per-user .bash_history files recording interactive shell commands. Highest-value artifact on Linux for understanding attacker activity.', 'analysis_hint': 'Hunt for curl/wget downloads, base64 encoding/decoding, reverse shells (bash -i, /dev/tcp), credential access (cat /etc/shadow), reconnaissance (id, whoami, uname -a), persistence installation (crontab -e, systemctl enable), and log tampering (truncate, shred, rm /var/log). Sparse or empty history for active accounts may indicate clearing (history -c, HISTFILE=/dev/null).', 'artifact_guidance': 'Direct record of commands typed by users — highest-value artifact on Linux systems.\n- Suspicious: curl/wget downloads, base64 encoding/decoding, reverse shells (bash -i, /dev/tcp), compiler invocations (gcc, make) for kernel exploits, credential access (cat /etc/shadow, mimipenguin), recon sequences (id, whoami, uname -a, cat /etc/passwd, ss -tlnp, ip a), persistence installation (crontab -e, systemctl enable), log tampering (truncate, shred, rm on /var/log).\n- No timestamps. Sequence matters but timing must come from other artifacts.\n- Sparse or empty history for active accounts may indicate clearing (history -c, HISTFILE=/dev/null, unset HISTFILE).\n- Look for multi-stage patterns: recon → exploitation → persistence. Commands piped to /dev/null or with stderr redirection may indicate output suppression.'}, 'zsh_history': {'name': 'Zsh History', 'category': 'Shell History', 'function': 'zsh_history', 'description': 'Per-user .zsh_history files recording Zsh shell commands with optional timestamps. Zsh history may include timing data not present in bash history.', 'analysis_hint': 'Apply the same suspicious-command patterns as bash_history. Zsh extended history format includes timestamps — use them for timeline correlation.', 'artifact_guidance': 'Zsh shell history with timestamps — higher value than bash_history for timeline construction.\n- Format: `: epoch:duration;command` — use the epoch timestamp for direct correlation with other timed artifacts (wtmp, syslog, journalctl).\n- Same threat indicators as bash_history: curl/wget downloads, base64, reverse shells, credential access, recon commands, persistence installation, log tampering.\n- Zsh extended history may record multi-line commands that bash_history splits or truncates.\n- Sparse or empty history for active accounts may indicate clearing or HISTFILE manipulation.'}, 'fish_history': {'name': 'Fish History', 'category': 'Shell History', 'function': 'fish_history', 'description': 'Per-user Fish shell history stored in YAML-like format with timestamps. Less common but may capture activity missed by bash/zsh.', 'analysis_hint': 'Apply the same suspicious-command patterns as bash_history. Fish history includes timestamps per command — correlate with login records.', 'artifact_guidance': 'Fish shell history with timestamps. Same threat indicators as bash_history.\n- Fish stores history with `- cmd:` and `when:` fields — timestamps are Unix epochs, enabling direct timeline correlation.\n- Fish is uncommon on servers. Its presence on a production system may itself be notable — check if it was recently installed.\n- Stored per-user in ~/.local/share/fish/fish_history.'}, 'python_history': {'name': 'Python History', 'category': 'Shell History', 'function': 'python_history', 'description': 'Python REPL history from interactive interpreter sessions. May reveal attacker use of Python for scripting, exploitation, or data manipulation.', 'analysis_hint': 'Look for import of socket/subprocess/os modules, file read/write operations on sensitive paths, and network connection attempts. Python is commonly used for exploit development and post-exploitation tooling.', 'artifact_guidance': 'Python interactive REPL history — records commands typed in the Python interpreter.\n- Suspicious: import os/subprocess/socket/pty, eval/exec calls, network connections (socket.connect, urllib, requests), file operations on sensitive paths (/etc/shadow, /root/.ssh), os.system or subprocess.call with shell commands, pty.spawn for shell upgrades.\n- Often used for interactive exploitation after initial access — attacker drops into Python to avoid bash history or leverage Python capabilities.\n- Stored in ~/.python_history by default. No timestamps.\n- Cross-check: Python interpreter execution should appear in bash_history (python/python3 commands) or process logs.'}, 'wtmp': {'name': 'Login Records (wtmp)', 'category': 'Authentication', 'function': 'wtmp', 'description': 'Successful login/logout records including user, terminal, source IP, and timestamps. Linux equivalent of Windows logon events.', 'analysis_hint': 'Flag logins from unexpected IPs, logins at unusual hours, root logins via SSH, and logins from accounts that should not be interactive. Cross-check with auth logs and shell history. wtmp can be tampered with — missing records or time gaps may indicate editing.', 'artifact_guidance': "Login/logout records — Linux equivalent of Windows logon events.\n- Shows: user, terminal (tty/pts), source IP for remote sessions, login/logout timestamps.\n- Suspicious: logins from unexpected IPs, logins at unusual hours, root logins via SSH, logins from accounts that shouldn't be interactive (www-data, nobody, service accounts), logins immediately after account creation.\n- Anti-forensic: wtmp is a binary file that can be tampered with (utmpdump). Missing records or time gaps may indicate editing. Compare with syslog/journalctl auth entries for consistency.\n- Cross-check: correlate with auth logs, bash_history, and btmp to build user activity timeline. A successful login here preceded by many failures in btmp indicates compromised credentials."}, 'btmp': {'name': 'Failed Logins (btmp)', 'category': 'Authentication', 'function': 'btmp', 'description': 'Failed login attempt records including user, source IP, and timestamps. High volumes indicate brute-force attacks or credential stuffing.', 'analysis_hint': 'Look for high-frequency failures from single IPs (brute force), failures for non-existent accounts (enumeration), and failures immediately before a successful wtmp login (successful brute force). Correlate source IPs with successful logins.', 'artifact_guidance': 'Failed login attempts — Linux equivalent of Windows Event ID 4625.\n- Patterns: brute force (high volume against one account), password spraying (low volume across many accounts), attempts against disabled or system accounts.\n- Source IPs are key IOCs. A successful login (in wtmp) after many failures here indicates compromised credentials.\n- High volume is normal for internet-facing SSH — focus on attempts against real local accounts rather than dictionary usernames.'}, 'lastlog': {'name': 'Last Login Records', 'category': 'Authentication', 'function': 'lastlog', 'description': 'Last login timestamp and source for each user account on the system. Provides a quick overview of account usage recency.', 'analysis_hint': 'Identify accounts with recent logins that should be dormant or disabled. Compare with wtmp for consistency — discrepancies may indicate log tampering.', 'artifact_guidance': "Last login timestamp and source for each user account — quick-reference artifact.\n- Quick checks: accounts with recent logins that shouldn't be active (service accounts, disabled users), system accounts (UID < 1000) with login records, accounts that have never logged in but were recently created.\n- Only stores the most recent login per user — no history. Cross-check against wtmp for full login records.\n- Discrepancies between lastlog and wtmp may indicate tampering with one or both.\n- Small artifact: review all entries."}, 'users': {'name': 'User Accounts', 'category': 'Authentication', 'function': 'users', 'description': 'User account information parsed from /etc/passwd and /etc/shadow, including UIDs, shells, home directories, and password metadata.', 'analysis_hint': 'Flag accounts with UID 0 (root-equivalent), accounts with login shells that should have /sbin/nologin, recently created accounts (check shadow dates), and accounts with empty password fields.', 'artifact_guidance': "User accounts from /etc/passwd and /etc/shadow — Linux equivalent of the SAM artifact.\n- Suspicious: UID 0 accounts besides root, accounts with no password or weak hash type (DES, MD5 instead of SHA-512), recently created accounts (check shadow change dates), accounts with interactive shells (/bin/bash, /bin/sh) that shouldn't have them (www-data, nobody, service accounts), home directories in unusual locations (/tmp, /dev/shm).\n- Key fields: username, UID, GID, shell, home directory, password hash type, last password change, account expiration.\n- Cross-check: new accounts should correlate with useradd commands in bash_history and auth log entries.\n- Small artifact: review all entries. Focus on accounts that don't match the expected system profile."}, 'groups': {'name': 'Groups', 'category': 'Authentication', 'function': 'groups', 'description': 'Group definitions from /etc/group including group members. Shows privilege group membership such as sudo, wheel, and docker.', 'analysis_hint': 'Check membership of privileged groups (sudo, wheel, docker, adm, root). Flag unexpected users in administrative groups.', 'artifact_guidance': 'Group memberships from /etc/group — shows privilege assignments.\n- Suspicious: unexpected members of sudo, wheel, adm, docker, lxd, disk, or shadow groups.\n- Docker and lxd group membership effectively grants root access — flag non-admin users in these groups.\n- The adm group grants log file access — membership could enable log review or tampering.\n- Small artifact: review all privileged group memberships completely.'}, 'sudoers': {'name': 'Sudoers Config', 'category': 'Authentication', 'function': 'sudoers', 'description': 'Sudo configuration from /etc/sudoers and /etc/sudoers.d/, defining which users can run which commands with elevated privileges.', 'analysis_hint': 'Flag NOPASSWD entries, overly broad command allowances (ALL), and rules for unexpected users. Attackers often modify sudoers for passwordless privilege escalation.', 'artifact_guidance': 'Sudo configuration defining privilege escalation rules.\n- Suspicious: NOPASSWD entries (sudo without password), overly broad allowances (ALL=(ALL) ALL for non-admin users), entries for unexpected users or groups, entries allowing specific dangerous commands (bash, su, cp, chmod, chown), entries with !authenticate.\n- Check both /etc/sudoers and /etc/sudoers.d/ drop-in files.\n- Recently modified sudoers files are high-priority — correlate modification timestamps with other activity.\n- Attackers commonly add NOPASSWD entries for persistence or privilege escalation.\n- Cross-check: sudoers modifications should correlate with visudo usage in bash_history or file modification timestamps.'}, 'network.interfaces': {'name': 'Network Interfaces', 'category': 'Network', 'function': 'network.interfaces', 'description': "Network interface configuration including IP addresses, subnets, and interface names. Provides context for understanding the system's network position.", 'analysis_hint': 'Document all configured interfaces and IPs for correlation with login source IPs and network artifacts from other systems. Flag unexpected interfaces (tunnels, bridges).', 'artifact_guidance': "Network interface configuration — context artifact for understanding the system's network position.\n- Shows: interface names, IP addresses, subnet masks, gateways, DNS servers, VLAN configurations.\n- Useful for: determining reachable networks, identifying multi-homed systems, understanding blast radius of a compromise.\n- Suspicious: unexpected interfaces (tun/tap for VPN tunnels, docker/veth for containers that shouldn't exist), promiscuous mode enabled (potential sniffing), IP addresses outside expected ranges.\n- Primarily a context artifact — use it to inform analysis of other artifacts rather than as a standalone finding source."}, 'syslog': {'name': 'Syslog', 'category': 'Logs', 'function': 'syslog', 'description': 'System log entries from /var/log/syslog, /var/log/messages, and /var/log/auth.log. Central log source for authentication, service, and kernel events on Linux.', 'analysis_hint': 'Filter for sshd, sudo, su, and PAM messages to reconstruct authentication activity. Look for service start/stop events, kernel warnings, and log gaps that may indicate tampering or system downtime.', 'artifact_guidance': 'Primary system log — broadest coverage of system events on Linux.\n- High-signal entries: authentication events (sshd, sudo, su, login), service start/stop, kernel messages (especially module loading via modprobe/insmod), cron execution, package manager activity, OOM kills.\n- Suspicious: timestamp gaps (log deletion/rotation tampering), sshd accepted/failed password entries, sudo command executions, unknown or unexpected service names, kernel module loading for non-standard modules.\n- Volume warning: syslog can have millions of lines. Focus on the incident time window and high-signal facility/program combinations.\n- Cross-check: syslog auth entries should be consistent with wtmp/btmp records. Discrepancies indicate tampering with one or both.'}, 'journalctl': {'name': 'Systemd Journal', 'category': 'Logs', 'function': 'journalctl', 'description': 'Structured journal entries from systemd-journald, covering services, kernel, and user-session events with rich metadata.', 'analysis_hint': 'Use unit and priority fields to filter for security-relevant events. Journal entries complement syslog and may contain structured fields not present in plain-text logs.', 'artifact_guidance': "Systemd journal — richer than syslog with structured metadata (unit names, PIDs, priority levels).\n- May capture service stdout/stderr that syslog misses. Same threat indicators: authentication events, service changes, kernel messages, cron execution.\n- Suspicious: journal file truncation or missing time ranges, failed service starts for security tools, kernel module loading, coredumps for exploited processes.\n- Journal persistence depends on config — volatile journals (/run/log/journal/) are lost on reboot. Persistent journals live in /var/log/journal/.\n- If journal has entries that syslog doesn't (or vice versa), one was likely tampered with."}, 'packagemanager': {'name': 'Package History', 'category': 'Logs', 'function': 'packagemanager', 'description': 'Package installation, removal, and update history from apt, yum, dnf, or other package managers. Shows software changes over time.', 'analysis_hint': 'Flag recently installed packages, especially compilers (gcc, make), network tools (nmap, netcat, socat), and packages installed outside normal maintenance windows. Package removal near incident time may indicate cleanup.', 'artifact_guidance': 'Package installation and removal history — shows software changes over time.\n- Suspicious: recently installed offensive tools (nmap, netcat/ncat, tcpdump, wireshark, gcc, make, gdb, strace), removed security tools (auditd, fail2ban, rkhunter, clamav), packages from non-standard repositories or PPAs, installations correlating with incident timing.\n- Compiler toolchain installation (build-essential, gcc, make) on a production server is notable — may indicate kernel exploit compilation.\n- Sources vary by distro: dpkg.log and apt history.log (Debian/Ubuntu), yum.log or dnf.log (RHEL/Fedora), pacman.log (Arch), zypper.log (SUSE).\n- Cross-check: package installations should correlate with apt/yum/dnf commands in bash_history.'}, 'ssh.authorized_keys': {'name': 'SSH Authorized Keys', 'category': 'SSH', 'function': 'ssh.authorized_keys', 'description': 'Per-user authorized_keys files listing public keys allowed for SSH authentication. A primary persistence mechanism for SSH-based access.', 'analysis_hint': "Flag keys added recently or for unexpected accounts. Compare key fingerprints across systems to identify lateral movement. Look for command-restricted keys and keys with 'from=' options limiting source IPs.", 'artifact_guidance': 'SSH public keys granting passwordless access — critical persistence mechanism.\n- Suspicious: keys in unexpected user accounts (especially root, service accounts), recently added keys (correlate with file timestamps), keys with forced command restrictions that look like backdoors (command="..." prefix), multiple keys for single accounts that don\'t match known administrators, unusual comment fields.\n- Check: ~/.ssh/authorized_keys and ~/.ssh/authorized_keys2 for all users, plus /etc/ssh/sshd_config for AuthorizedKeysFile overrides pointing to non-standard locations.\n- An attacker adding their key is one of the most common Linux persistence techniques — always review thoroughly.\n- Cross-check: key additions should correlate with SSH/SCP activity in auth logs, and echo/cat commands in bash_history writing to authorized_keys files.'}, 'ssh.known_hosts': {'name': 'SSH Known Hosts', 'category': 'SSH', 'function': 'ssh.known_hosts', 'description': 'Per-user known_hosts files recording SSH server fingerprints the user has connected to. Reveals outbound SSH connections and lateral movement targets.', 'analysis_hint': 'Identify internal hosts the user SSHed to (lateral movement) and external hosts (potential C2 or data exfiltration). Hashed known_hosts entries obscure hostnames but IP-based entries may still be readable.', 'artifact_guidance': "SSH host keys for systems this machine has connected to — shows lateral movement paths outward.\n- Suspicious: internal hosts that shouldn't be SSH targets from this system, external IPs or hostnames, large number of known hosts on a system that shouldn't be initiating SSH (web servers, database servers), recently added entries.\n- Hashed known_hosts (HashKnownHosts=yes) obscures hostnames — entry count and file modification time are still useful.\n- Check both per-user (~/.ssh/known_hosts) and system-wide (/etc/ssh/ssh_known_hosts).\n- Cross-check: SSH connections should correlate with ssh commands in bash_history and auth logs on destination systems."}}
WINDOWS_ARTIFACT_REGISTRY: dict[str, dict[str, str]] = {'runkeys': {'name': 'Run/RunOnce Keys', 'category': 'Persistence', 'function': 'runkeys', 'description': 'Registry autorun entries that launch programs at user logon or system boot. These keys commonly store malware persistence command lines and loader stubs.', 'analysis_hint': 'Prioritize entries launching from user-writable paths like AppData, Temp, or Public. Flag encoded PowerShell, LOLBins, and commands added near the suspected compromise window.', 'artifact_guidance': 'Startup persistence. Every entry is worth reviewing — these are typically few.\n- Separate HKLM (machine-wide) from HKCU (user-specific) scope.\n- Suspicious: commands from user-writable paths (AppData, Temp, Public, ProgramData), script hosts (powershell, wscript, mshta, cmd /c), encoded/obfuscated arguments, LOLBins (rundll32, regsvr32, mshta).\n- Expected: enterprise software updaters (Google, Adobe, Teams, OneDrive). If in doubt, flag it — false positives are cheap here.'}, 'tasks': {'name': 'Scheduled Tasks', 'category': 'Persistence', 'function': 'tasks', 'description': 'Windows Task Scheduler definitions including triggers, actions, principals, and timing. Adversaries frequently use tasks for periodic execution and delayed payload launch.', 'analysis_hint': 'Look for newly created or modified tasks with hidden settings, unusual run accounts, or actions pointing to scripts/binaries outside Program Files and Windows directories.', 'artifact_guidance': "Scheduled execution and persistence.\n- Suspicious: non-Microsoft authors, hidden tasks, tasks running script hosts or encoded commands, binaries outside trusted system paths, tasks created/modified near the incident window.\n- High-risk triggers: boot/logon triggers with no clear business purpose, high-frequency schedules.\n- Cross-check: task creation should correlate with EVTX and execution artifacts.\n- Expected: Windows maintenance tasks (defrag, diagnostics, updates) are normal — focus on what's new or unusual."}, 'services': {'name': 'Services', 'category': 'Persistence', 'function': 'services', 'description': 'Windows service configuration and startup metadata, including image paths and service accounts. Malicious services can provide boot persistence and privilege escalation.', 'analysis_hint': 'Investigate auto-start services with suspicious image paths, weakly named binaries, or unexpected accounts. Correlate install/start times with process creation and event log artifacts.', 'artifact_guidance': "Boot/logon persistence and privilege context.\n- Focus on auto-start and delayed-auto-start services.\n- Suspicious: image paths under user-writable directories, service names mimicking legitimate components but pointing to odd binaries, services running as LocalSystem with unusual paths, quoted-path vulnerabilities.\n- Cross-check: newly installed services should correlate with EVTX Event ID 7045.\n- Expected: vendor software services are common and usually benign — look for what doesn't fit the pattern."}, 'cim': {'name': 'WMI Persistence', 'category': 'Persistence', 'function': 'cim', 'description': 'WMI repository data such as event filters, consumers, and bindings used for event-driven execution. This is a common stealth persistence mechanism in fileless intrusions.', 'analysis_hint': 'Focus on suspicious __EventFilter, CommandLineEventConsumer, and ActiveScriptEventConsumer objects. Flag PowerShell, cmd, or script host commands triggered by system/user logon events.', 'artifact_guidance': "WMI event subscription persistence — a stealthy and often overlooked persistence mechanism.\n- Focus on the three components: EventFilter (trigger), EventConsumer (action), and FilterToConsumerBinding (link between them).\n- Suspicious: CommandLineEventConsumer or ActiveScriptEventConsumer invoking powershell, cmd, wscript, mshta, or referencing external script files. Any consumer executing from user-writable paths.\n- High-risk triggers: logon, startup, or timer-based EventFilters that re-execute payloads automatically.\n- This artifact is rarely used legitimately outside enterprise management tools (SCCM, monitoring agents). Any unexpected subscription is worth flagging.\n- Cross-check: execution of the consumer's target command should appear in EVTX process creation, prefetch, or shimcache."}, 'shimcache': {'name': 'Shimcache', 'category': 'Execution', 'function': 'shimcache', 'description': 'Application Compatibility Cache entries containing executable paths and file metadata observed by the OS. Entries provide execution context but do not independently prove a successful run.', 'analysis_hint': 'Use Shimcache to surface suspicious paths, then confirm execution with Prefetch, Amcache, or event logs. Pay attention to unsigned tools, archive extraction paths, and deleted binaries.', 'artifact_guidance': 'Evidence of program presence on disk, not definitive proof of execution.\n- Suspicious: executables in user profiles, temp directories, recycle bin, removable media, or archive extraction paths. Renamed system utilities. Known attacker tools (psexec, mimikatz, procdump, etc.).\n- Important: shimcache alone does not confirm execution. Flag items that need corroboration from Prefetch, Amcache, or EVTX.\n- Use timestamps and entry order to build a likely sequence, but label the uncertainty.\n- Expected: common enterprise software in standard paths is noise — skip it unless relevant to the investigation context.'}, 'amcache': {'name': 'Amcache', 'category': 'Execution', 'function': 'amcache', 'description': 'Application and file inventory from Amcache.hve, often including path, hash, compile info, and first-seen data. Useful for identifying executed or installed binaries and their provenance.', 'analysis_hint': 'Prioritize recently introduced executables with unknown publishers or rare install locations. Compare hashes and file names against threat intelligence and other execution artifacts.', 'artifact_guidance': "Program inventory with execution relevance and SHA-1 hashes.\n- Suspicious: newly observed executables near the incident window, uncommon install paths, unknown publishers, product name mismatches, executables without expected publisher metadata.\n- High value: SHA-1 hashes can be cross-referenced with threat intel (note this for the analyst, but don't fabricate lookups).\n- Cross-check: correlate with shimcache and prefetch for execution confirmation.\n- Expected: normal software installs and updates are common — focus on what appeared recently or doesn't belong."}, 'prefetch': {'name': 'Prefetch', 'category': 'Execution', 'function': 'prefetch', 'description': 'Windows Prefetch artifacts recording executable run metadata such as run counts, last run times, and referenced files. They are high-value evidence for userland execution on supported systems.', 'analysis_hint': 'Hunt for recently first-run utilities, script hosts, and remote administration tools. Review loaded file references for dropped DLLs and staging directories.', 'artifact_guidance': "Strong evidence of program execution with run count and timing.\n- Suspicious: low run-count executables (1-3 runs suggest recently introduced tools), script hosts and LOLBins from user-writable paths, known attacker tools, burst execution patterns.\n- Key fields: last run time and run count together tell you when something new appeared.\n- Cross-check: referenced files/directories within prefetch data can reveal staging locations or payload unpacking paths.\n- Expected: system utilities with high run counts are routine — focus on what's new or rare."}, 'bam': {'name': 'BAM/DAM', 'category': 'Execution', 'function': 'bam', 'description': 'Background Activity Moderator and Desktop Activity Moderator execution tracking tied to user SIDs. These entries help attribute process activity to specific user contexts.', 'analysis_hint': 'Correlate BAM/DAM timestamps with logons and process events to identify who launched suspicious binaries. Highlight administrative tools and scripts executed outside normal business patterns.', 'artifact_guidance': "Accurate last-execution timestamps per user. Lightweight but precise.\n- Provides user-to-executable mapping with reliable timestamps — useful for attribution.\n- Suspicious: execution of tools from temp/download/public folders, execution timestamps clustering around incident window.\n- Cross-check: correlate with prefetch and amcache to build a fuller execution picture.\n- Limited data: BAM only stores recent entries and lacks historical depth. Absence doesn't mean non-execution."}, 'userassist': {'name': 'UserAssist', 'category': 'Execution', 'function': 'userassist', 'description': 'Per-user Explorer-driven program execution traces stored in ROT13-encoded registry values. Includes run counts and last execution times for GUI-launched applications.', 'analysis_hint': 'Decode and review rarely used programs, renamed binaries, and LOLBins launched through Explorer. Use run-count deltas and last-run times to identify unusual user behavior.', 'artifact_guidance': 'GUI-driven program execution via Explorer shell, per user.\n- Shows what users launched interactively — useful for distinguishing user actions from automated/service execution.\n- Suspicious: rarely used or newly appearing applications, script hosts and LOLBins launched from Explorer, tools from atypical folders.\n- Key fields: run count and last execution time together show behavioral changes.\n- Limited scope: only captures Explorer-launched programs, not command-line or service execution.'}, 'evtx': {'name': 'Windows Event Logs', 'category': 'Event Logs', 'function': 'evtx', 'description': 'Windows event channel records covering authentication, process creation, services, policy changes, and system health. EVTX is often the backbone for timeline and intrusion reconstruction.', 'analysis_hint': 'Pivot on high-signal event IDs for logon, process creation, service installs, account changes, and log clearing. Correlate actor account, host, and parent-child process chains across Security/System channels.', 'artifact_guidance': "Primary security telemetry and event timeline. Richest artifact for incident reconstruction.\n- High-signal Event IDs to prioritize:\n - Logon: 4624 (success), 4625 (failure), 4634 (logoff), 4648 (explicit creds), 4672 (special privileges)\n - Process: 4688 (process creation — command lines are gold)\n - Services: 7045 (new service installed), 4697 (service install via Security log)\n - Accounts: 4720 (created), 4722 (enabled), 4724 (password reset), 4726 (deleted), 4732/4733 (group membership)\n - Anti-forensic: 1102 (audit log cleared)\n- Build event chains: logon → process creation → persistence change, with timestamps.\n- Flag: unusual logon types (Type 3 network, Type 10 RDP from unexpected sources), process command lines with encoding or download cradles, log gaps suggesting clearing.\n- Volume warning: EVTX can have millions of records. Focus on the incident time window and high-signal IDs. Don't enumerate routine system noise."}, 'defender.evtx': {'name': 'Defender Logs', 'category': 'Event Logs', 'function': 'defender.evtx', 'description': 'Microsoft Defender event logs describing detections, remediation actions, exclusions, and protection state changes. These records show what malware was seen and how protection responded.', 'analysis_hint': 'Identify detection names, severity, and action outcomes (blocked, quarantined, allowed, failed). Flag tamper protection events, exclusion changes, and repeated detections of the same path.', 'artifact_guidance': 'Endpoint protection detection and response events.\n- Key data: threat names, severity, affected file paths, action taken (blocked/quarantined/allowed/failed).\n- Suspicious: detections where remediation failed, repeated detections of the same threat (reinfection), real-time protection disabled, exclusions added near incident window, tamper protection changes.\n- Cross-check: correlate detection timestamps with execution artifacts to assess whether the malware ran before or after detection.\n- Distinguish real malware detections from PUA/adware noise — severity and threat name are the key differentiators.'}, 'mft': {'name': 'MFT', 'category': 'File System', 'function': 'mft', 'description': 'Master File Table metadata for NTFS files and directories, including timestamps, attributes, and record references. MFT helps reconstruct file lifecycle and artifact provenance at scale.', 'analysis_hint': 'Focus on executable/script creation in user profile, temp, and startup paths near incident time. Check for timestamp anomalies and suspicious rename/move patterns suggesting anti-forensics.', 'artifact_guidance': "Complete file metadata with MACB timestamps for every file on the volume.\n- Key technique: compare $STANDARD_INFORMATION timestamps against $FILE_NAME timestamps. Discrepancies suggest timestomping (anti-forensic timestamp manipulation).\n- Suspicious: files created in the incident window in temp/staging directories, executables in unexpected locations, files with creation times newer than modification times (copy indicator).\n- Focus on the incident time window — a full MFT can have millions of entries. Don't enumerate routine system files.\n- Cross-check: file paths found here should correlate with execution artifacts (prefetch, amcache) and persistence mechanisms (runkeys, services, tasks)."}, 'usnjrnl': {'name': 'USN Journal', 'category': 'File System', 'function': 'usnjrnl', 'description': 'NTFS change journal entries capturing create, modify, rename, and delete operations over time. USN is valuable for short-lived files that no longer exist on disk.', 'analysis_hint': 'Track rapid create-delete or rename chains involving scripts, archives, and binaries. Correlate change reasons and timestamps with execution and network artifacts for full activity flow.', 'artifact_guidance': 'NTFS change journal recording file creation, deletion, rename, and attribute changes.\n- Suspicious: file creation/rename in staging directories, batch deletions suggesting cleanup, executable files appearing in temp/download paths, rename operations disguising file types.\n- Anti-forensic value: shows files that were created then deleted (even if they no longer exist on disk).\n- Focus on the incident time window. USN journals can be very large.\n- Cross-check: file operations here should correlate with MFT timestamps, execution artifacts, and recycle bin entries.'}, 'recyclebin': {'name': 'Recycle Bin', 'category': 'File System', 'function': 'recyclebin', 'description': 'Deleted-item metadata including original paths, deletion times, and owning user context. Useful for identifying post-activity cleanup and attempted evidence removal.', 'analysis_hint': 'Prioritize deleted tools, scripts, archives, and credential files tied to suspicious users. Compare deletion timestamps against detection events and command history.', 'artifact_guidance': 'Intentionally deleted files with original path and deletion timestamp.\n- Suspicious: deleted executables, scripts, archives, credential material, log files — especially shortly after suspicious execution or detection events.\n- Clusters of deletions in a short window suggest deliberate evidence cleanup.\n- Key fields: original file path (reveals where the file lived) and deletion timestamp (reveals when cleanup happened).\n- Cross-check: correlate deletion timing with Defender detections, execution artifacts, and EVTX events.'}, 'browser.history': {'name': 'Browser History', 'category': 'User Activity', 'function': 'browser.history', 'description': 'Visited URL records with titles and timestamps from supported web browsers. These entries reveal user browsing intent, reconnaissance, and web-based attack paths.', 'analysis_hint': 'Look for phishing domains, file-sharing links, admin portals, and malware delivery infrastructure. Align visit times with downloads, process execution, and authentication events.', 'artifact_guidance': 'Web browsing history showing URLs visited with timestamps.\n- Suspicious: phishing domains, file-sharing/paste sites, malware delivery URLs, C2 panel access, remote access tool download pages, raw IP addresses, suspicious TLDs, search queries for hacking tools or techniques.\n- Cross-check: correlate visit timestamps with browser downloads and subsequent execution artifacts.\n- Context: browsing patterns can reveal reconnaissance, tool acquisition, or data exfiltration via web services.\n- Expected: routine business browsing is noise — focus on what stands out relative to the investigation context.'}, 'browser.downloads': {'name': 'Browser Downloads', 'category': 'User Activity', 'function': 'browser.downloads', 'description': 'Browser download records linking source URLs to local file paths and timing. This artifact is key for tracing initial payload ingress and user-acquired tools.', 'analysis_hint': 'Flag executable, script, archive, and disk-image downloads from untrusted domains. Correlate downloaded file names and times with Prefetch, Amcache, and Defender activity.', 'artifact_guidance': 'Files downloaded through web browsers with source URL and local save path.\n- Suspicious: downloaded executables, scripts, archives, disk images, office documents with macros — especially from unknown or suspicious URLs.\n- High-value cross-check: a downloaded file that also appears in execution artifacts (prefetch, amcache) confirms the payload was run.\n- Flag: repeated downloads of similarly named files (retry behavior), downloads from raw IP URLs, filename/extension mismatches.\n- Key fields: source URL, local path, download timestamp.'}, 'powershell_history': {'name': 'PowerShell History', 'category': 'User Activity', 'function': 'powershell_history', 'description': 'PSReadLine command history capturing interactive PowerShell commands entered by users. Often exposes attacker tradecraft such as reconnaissance, staging, and command-and-control setup.', 'analysis_hint': 'Hunt for encoded commands, download cradles, credential access, and remote execution cmdlets. Note gaps or abrupt truncation that may indicate history clearing or alternate execution methods.', 'artifact_guidance': 'Direct record of PowerShell commands typed by users. High-value tradecraft evidence.\n- Suspicious: encoded commands (-enc / -EncodedCommand), download cradles (IWR, Invoke-WebRequest, Net.WebClient), execution policy bypasses, AMSI bypasses, credential access cmdlets, discovery commands (whoami, net user, Get-ADUser, nltest), lateral movement (Enter-PSSession, Invoke-Command), file staging and archiving.\n- Anti-forensic: sparse or truncated history may indicate clearing (Clear-History, deletion of ConsoleHost_history.txt).\n- No timestamps: PSReadLine history is a plain text file without timestamps. Sequence matters but timing must come from other artifacts.\n- This is often the highest-signal artifact when present. Treat every line as potentially significant.'}, 'activitiescache': {'name': 'Activities Cache', 'category': 'User Activity', 'function': 'activitiescache', 'description': 'Windows Timeline activity records reflecting user interactions with apps, documents, and URLs. Provides broader behavioral context across applications and time.', 'analysis_hint': 'Use it to build user intent timelines around suspicious periods and identify staging behavior. Prioritize activity involving remote access tools, cloud storage, and sensitive document paths.', 'artifact_guidance': "Windows Timeline database tracking application focus time and user activity.\n- Provides a timeline of what applications the user was actively working in, with timestamps.\n- Suspicious: remote access tool usage, cloud storage clients during off-hours, admin utilities not part of the user's normal role, sensitive document access patterns.\n- Context value: establishes what the user was doing before, during, and after suspicious events detected in other artifacts.\n- Cross-check: correlate with execution artifacts and browser history to build a complete activity narrative."}, 'sru.network_data': {'name': 'SRUM Network Data', 'category': 'Network', 'function': 'sru.network_data', 'description': 'System Resource Usage Monitor network telemetry with per-application usage over time. Shows which apps consumed network bandwidth and when.', 'analysis_hint': 'Identify unusual outbound-heavy applications, especially unsigned or rarely seen executables. Correlate spikes with execution artifacts and possible data exfiltration windows.', 'artifact_guidance': "Network usage statistics per application from the SRUM database.\n- Suspicious: large data volumes from unexpected applications (potential exfiltration), network activity from known attacker tools, unusual applications making network connections.\n- Key fields: application name, bytes sent/received, timestamps.\n- Context: helps identify which processes were communicating and how much data moved, even if network logs aren't available.\n- Limitation: SRUM aggregates data over time intervals, so precise timing of individual connections isn't available."}, 'sru.application': {'name': 'SRUM Application', 'category': 'Network', 'function': 'sru.application', 'description': 'SRUM application resource usage records that provide process-level activity context across time slices. Helpful for spotting persistence or background abuse patterns.', 'analysis_hint': 'Surface low-prevalence applications active during the incident period or outside baseline hours. Cross-check with BAM, Prefetch, and network logs to confirm suspicious sustained activity.', 'artifact_guidance': 'Application resource usage (CPU time, foreground time) from the SRUM database.\n- Suspicious: high resource usage from unexpected or unknown processes, applications running with significant CPU time but zero foreground time (background/hidden execution).\n- Context: helps identify persistent or resource-intensive processes that may indicate crypto mining, data processing, or long-running attacker tools.\n- Cross-check: application names here should correlate with execution artifacts.\n- Limitation: SRUM data is aggregated — it shows that something ran, not exactly what it did.'}, 'shellbags': {'name': 'Shellbags', 'category': 'Registry', 'function': 'shellbags', 'description': 'Registry traces of folders viewed in Explorer, including local, removable, and network paths. Shellbags can preserve evidence even after files or folders are deleted.', 'analysis_hint': 'Look for access to hidden folders, USB volumes, network shares, and unusual archive locations. Use viewed-path chronology to support staging and collection hypotheses.', 'artifact_guidance': "Folder access history from Explorer — shows what directories users browsed.\n- Suspicious: access to network shares, USB/removable media paths, hidden/system directories, archive contents, other users' profiles, credential stores, and sensitive project directories.\n- Context: path access patterns can reveal reconnaissance (browsing through directories looking for data) and collection/staging behavior.\n- Cross-check: correlate accessed folders with file creation/deletion in MFT/USN and data movement to USB devices.\n- Limitation: shows folder access, not individual file access. Timestamps may reflect when the shellbag entry was updated, not necessarily first access."}, 'usb': {'name': 'USB History', 'category': 'Registry', 'function': 'usb', 'description': 'Registry evidence of connected USB devices, including identifiers and connection history metadata. Useful for tracking removable media usage and potential data transfer vectors.', 'analysis_hint': 'Identify unknown devices and compare first/last seen times with suspicious file and user activity. Focus on storage-class devices connected near possible exfiltration or staging events.', 'artifact_guidance': 'USB device connection history from the registry.\n- Key for data exfiltration investigations. Shows what removable storage was connected, when, and by which user.\n- Suspicious: USB devices connected during or shortly after the incident window, devices connected during off-hours, new/unknown devices appearing for the first time near suspicious activity.\n- Key fields: device serial number, vendor/product, first and last connection times.\n- Cross-check: correlate USB connection times with shellbag access to removable media paths and file copy operations in USN journal.'}, 'muicache': {'name': 'MUIcache', 'category': 'Registry', 'function': 'muicache', 'description': 'Cache of executable display strings written when programs are launched via the shell. Can provide residual execution clues for binaries no longer present.', 'analysis_hint': 'Hunt for suspicious executable paths and uncommon tool names absent from standard software inventories. Correlate entries with UserAssist and Shimcache for stronger execution confidence.', 'artifact_guidance': "Supplementary execution evidence — records executable descriptions from PE metadata when programs run.\n- Lower-confidence artifact on its own. Use primarily to corroborate findings from prefetch, amcache, and shimcache.\n- Suspicious: uncommon executables in user-writable directories, entries suggesting renamed binaries (description doesn't match filename), known attacker tool names.\n- Value: can reveal executables that ran but were later deleted, since the MUIcache entry persists in the registry.\n- Limitation: no timestamps. Only shows that something ran at some point. Always pair with other artifacts for timing."}, 'sam': {'name': 'SAM Users', 'category': 'Security', 'function': 'sam', 'description': 'Local Security Account Manager user account records and account state metadata. This artifact supports detection of unauthorized local account creation and privilege abuse.', 'analysis_hint': 'Flag newly created, enabled, or reactivated local accounts, especially admin-capable users. Correlate account changes with logon events and lateral movement artifacts.', 'artifact_guidance': "Local user accounts from the SAM registry hive.\n- Suspicious: recently created accounts (especially near the incident window), accounts added to the Administrators group, accounts with names mimicking system accounts, re-enabled previously disabled accounts, password changes on accounts that shouldn't change.\n- Key fields: account name, creation date, last password change, group memberships, account flags (enabled/disabled).\n- Cross-check: account creation/modification should correlate with EVTX Event IDs 4720, 4722, 4724, 4732.\n- Small artifact: SAM typically has few entries. Review all of them, not just flagged ones."}, 'defender.quarantine': {'name': 'Defender Quarantine', 'category': 'Security', 'function': 'defender.quarantine', 'description': 'Metadata about items quarantined by Microsoft Defender, including source path and detection context. Indicates which suspicious files were contained and where they originated.', 'analysis_hint': 'Confirm whether detections were successfully quarantined and whether the same paths reappear later. Use quarantine artifacts to pivot into file system, execution, and persistence traces.', 'artifact_guidance': "Files quarantined by Windows Defender — direct evidence of detected malware.\n- Every entry here is significant. This is confirmed detection, not a probabilistic indicator.\n- Key fields: original file path, threat name, detection timestamp.\n- Suspicious: quarantined files from startup/persistence locations (suggests malware achieved persistence before detection), repeated quarantine of the same threat (reinfection cycle), quarantine of attacker tools (mimikatz, cobalt strike, etc.).\n- Cross-check: correlate quarantine timestamps with Defender EVTX for remediation success/failure, and with execution artifacts to determine if the malware ran before being caught.\n- Small artifact: review all entries. Don't skip any."}}
def get_artifact_registry(os_type: str) -> dict[str, dict[str, str]]:
707def get_artifact_registry(os_type: str) -> dict[str, dict[str, str]]:
708    """Return the artifact registry appropriate for the given OS type.
709
710    Uses :func:`~app.os_utils.normalize_os_type` for consistent
711    normalisation across the codebase.
712
713    Args:
714        os_type: Operating system identifier as returned by Dissect's
715            ``target.os`` (e.g. ``"windows"``, ``"linux"``).  The value
716            is normalised to lowercase before comparison.
717
718    Returns:
719        The OS-specific artifact registry dictionary.  Defaults to
720        :data:`WINDOWS_ARTIFACT_REGISTRY` for unrecognised OS types.
721    """
722    if normalize_os_type(os_type) == "linux":
723        return LINUX_ARTIFACT_REGISTRY
724    return WINDOWS_ARTIFACT_REGISTRY

Return the artifact registry appropriate for the given OS type.

Uses ~app.os_utils.normalize_os_type() for consistent normalisation across the codebase.

Arguments:
  • os_type: Operating system identifier as returned by Dissect's target.os (e.g. "windows", "linux"). The value is normalised to lowercase before comparison.
Returns:

The OS-specific artifact registry dictionary. Defaults to WINDOWS_ARTIFACT_REGISTRY for unrecognised OS types.