diff --git a/src/uipath/runtime/governance/_audit/__init__.py b/src/uipath/runtime/governance/_audit/__init__.py
new file mode 100644
index 0000000..b00769c
--- /dev/null
+++ b/src/uipath/runtime/governance/_audit/__init__.py
@@ -0,0 +1,12 @@
+"""Audit sink framework for governance events.
+
+Internal module. Provides a pluggable audit system that emits governance
+events to one or more sinks. The only built-in sink is ``TracesAuditSink``,
+which creates OpenTelemetry spans that uipath-core's exporter ships to the
+Orchestrator Traces UI. This sink is always registered by every
+:class:`AuditManager` and cannot be disabled by application code — it
+carries the governance audit trail.
+
+Callers import from the submodules directly (``_audit.base``, ``_audit.traces``,
+``_audit.factory``). This package exposes no aggregated symbols.
+"""
diff --git a/src/uipath/runtime/governance/_audit/base.py b/src/uipath/runtime/governance/_audit/base.py
new file mode 100644
index 0000000..3364454
--- /dev/null
+++ b/src/uipath/runtime/governance/_audit/base.py
@@ -0,0 +1,729 @@
+"""Base classes and models for the audit sink framework.
+
+This module provides the core abstractions for the governance audit system:
+- AuditEvent: The data model for audit events
+- EventType: Constants for common event types
+- AuditSink: Abstract base class for sink implementations
+- AuditManager: Central hub for routing events to sinks
+
+The AuditManager uses a background thread to process events asynchronously,
+avoiding blocking the main agent execution path during audit trace HTTP calls.
+"""
+
+from __future__ import annotations
+
+import atexit
+import json
+import logging
+import os
+import queue
+import threading
+import weakref
+from abc import ABC, abstractmethod
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from typing import Any
+
+from uipath.core.governance import EnforcementMode
+
+logger = logging.getLogger(__name__)
+
+
+# Process-wide cleanup machinery for AuditManager instances.
+#
+# A single ``atexit`` hook walks a ``WeakSet`` of live managers on exit
+# and flushes/closes each one. Two important properties:
+#
+# 1. **Bounded atexit registrations.** Per-instance ``atexit.register``
+#    grows the interpreter's atexit list without bound — N runtimes →
+#    N hooks → N × shutdown-timeout total exit delay. One process-level
+#    hook is constant work regardless of how many managers were
+#    constructed.
+#
+# 2. **No strong reference to the manager.** ``WeakSet`` lets a disposed
+#    manager get garbage-collected; if it's already gone by exit time,
+#    we just skip it. The per-instance atexit hook held the manager
+#    alive until process exit, leaking memory in long-running
+#    ``uipath eval`` runs that build many runtimes serially.
+_live_managers: weakref.WeakSet[AuditManager] = weakref.WeakSet()
+_atexit_registered = False
+_atexit_lock = threading.Lock()
+
+
+def _process_cleanup_managers() -> None:
+    """Process-exit handler: flush + close every live AuditManager.
+
+    Iteration over a snapshot — the WeakSet may mutate during cleanup
+    (close() touches sinks_lock, GC may fire). Bounded by each manager's
+    own flush / close timeouts.
+    """
+    for manager in list(_live_managers):
+        try:
+            manager.flush(timeout=2.0)
+            manager.close()
+        except Exception as exc:  # noqa: BLE001 - exit cleanup must not raise
+            logger.debug("Audit manager process cleanup error: %s", exc)
+
+
+def _register_manager_for_cleanup(manager: AuditManager) -> None:
+    """Add ``manager`` to the cleanup set + ensure process atexit is wired.
+
+    Double-checked under ``_atexit_lock`` so two concurrent first-time
+    constructions don't both register the process atexit handler.
+    """
+    global _atexit_registered
+    _live_managers.add(manager)
+    if _atexit_registered:
+        return
+    with _atexit_lock:
+        if not _atexit_registered:
+            atexit.register(_process_cleanup_managers)
+            _atexit_registered = True
+
+
+# =============================================================================
+# Audit Event Model
+# =============================================================================
+
+
+@dataclass
+class AuditEvent:
+    """Generic audit event that can be sent to any sink.
+
+    Trace correlation is intentionally absent from this dataclass.
+    Sinks that need a trace id resolve one at their own boundary:
+    OTel-backed sinks let the SDK / exporter handle it, and HTTP
+    sinks defer to their injected provider, which resolves at
+    HTTP-call time.
+
+    Attributes:
+        event_type: Type of event (e.g., "rule_evaluation", "hook_summary")
+        timestamp: When the event occurred (auto-set if not provided)
+        agent_name: Name of the agent being governed
+        hook: Lifecycle hook where event occurred (optional)
+        data: Event-specific data dictionary
+        metadata: Additional metadata for filtering/routing
+    """
+
+    event_type: str
+    agent_name: str = "unknown"
+    hook: str = ""
+    data: dict[str, Any] = field(default_factory=dict)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        result = asdict(self)
+        result["timestamp"] = self.timestamp.isoformat()
+        return result
+
+    def to_json(self) -> str:
+        """Convert to JSON string."""
+        return json.dumps(self.to_dict())
+
+
+class EventType:
+    """Constants for common event types."""
+
+    RULE_EVALUATION = "rule_evaluation"
+    HOOK_START = "hook_start"
+    HOOK_END = "hook_end"
+    SESSION_START = "session_start"
+    SESSION_END = "session_end"
+    POLICY_VIOLATION = "policy_violation"
+    POLICY_ALLOW = "policy_allow"
+    PACKS_LOADED = "packs_loaded"
+
+
+# =============================================================================
+# Audit Sink Base Class
+# =============================================================================
+
+
+class AuditSink(ABC):
+    """Abstract base class for audit output destinations.
+
+    Subclass this to create custom audit sinks. Each sink receives
+    all audit events and decides how to handle them.
+
+    Example:
+        class SlackAuditSink(AuditSink):
+            def __init__(self, webhook_url: str):
+                self.webhook_url = webhook_url
+                self._name = "slack"
+
+            @property
+            def name(self) -> str:
+                return self._name
+
+            def emit(self, event: AuditEvent) -> None:
+                if event.data.get("matched") and event.data.get("action") == "deny":
+                    # Send to Slack on violations
+                    requests.post(self.webhook_url, json=event.to_dict())
+
+            def flush(self) -> None:
+                pass
+    """
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Unique name for this sink."""
+        pass
+
+    @abstractmethod
+    def emit(self, event: AuditEvent) -> None:
+        """Emit an audit event to this sink.
+
+        Args:
+            event: The audit event to emit
+
+        Note:
+            Implementations should handle errors gracefully and not
+            raise exceptions that would disrupt governance evaluation.
+        """
+        pass
+
+    def flush(self) -> None:
+        """Flush any buffered events.
+
+        Override if sink buffers events before writing.
+        """
+        return
+
+    def close(self) -> None:
+        """Clean up resources.
+
+        Override if sink holds resources that need cleanup.
+        """
+        return
+
+    def accepts(self, event: AuditEvent) -> bool:
+        """Check if this sink should receive the event.
+
+        Override to filter events. Default accepts all events.
+
+        Args:
+            event: The audit event to check
+
+        Returns:
+            True if sink should receive event, False to skip
+        """
+        return True
+
+
+# =============================================================================
+# Audit Manager
+# =============================================================================
+
+
+class AuditManager:
+    """Manages multiple audit sinks and routes events to them.
+
+    Instance-scoped: each :class:`GovernanceRuntime` owns its own
+    manager. Parallel runtimes (``uipath eval``) don't share sinks,
+    workers, or per-sink failure state.
+
+    Constructor automatically registers the always-on ``traces`` sink
+    (OpenTelemetry → Orchestrator audit UI). This sink writes the
+    governance audit trail and cannot be disabled by application code.
+    Additional sinks can be added via :meth:`register_sink`.
+
+    Thread Safety:
+        Events are queued and processed by a background thread, making
+        :meth:`emit` non-blocking. This avoids blocking agent execution
+        during audit trace HTTP calls.
+    """
+
+    # Trip a sink after this many consecutive emit failures (circuit-breaker).
+    _SINK_FAILURE_THRESHOLD = 10
+    # Bound the async queue so a stuck sink can't grow memory without limit.
+    # Matches the order of magnitude of a long-running agent's per-session
+    # audit volume; on overflow the oldest event is dropped to make room.
+    _DEFAULT_QUEUE_MAXSIZE = 10_000
+
+    def __init__(
+        self,
+        async_mode: bool = True,
+        queue_maxsize: int = _DEFAULT_QUEUE_MAXSIZE,
+        register_default_sinks: bool = True,
+    ) -> None:
+        """Initialize the audit manager.
+
+        Args:
+            async_mode: If True (default), events are processed in a background
+                       thread. If False, events are processed synchronously.
+            queue_maxsize: Max queued events in async mode. On overflow the
+                       oldest queued event is dropped to make room.
+            register_default_sinks: If True (default), register the
+                       always-on ``traces`` sink and an atexit cleanup
+                       handler. Tests that want a bare manager can pass
+                       ``False`` and register sinks explicitly.
+        """
+        self._sinks: list[AuditSink] = []
+        # Single lock guards _sinks, _sink_failures, _tripped_sinks — every
+        # collection mutated by both the worker thread and the emit caller.
+        self._sinks_lock = threading.Lock()
+        # Per-sink consecutive-failure counter, keyed by sink name.
+        self._sink_failures: dict[str, int] = {}
+        self._tripped_sinks: set[str] = set()
+        self._async_mode = async_mode
+        self._pid = os.getpid()
+
+        # Background processing
+        self._queue: queue.Queue[AuditEvent | None] = queue.Queue(maxsize=queue_maxsize)
+        self._worker_thread: threading.Thread | None = None
+        self._shutdown = threading.Event()
+
+        if self._async_mode:
+            self._start_worker()
+
+        if register_default_sinks:
+            self._register_traces_sink()
+            # Process-level atexit (one shared handler, weakref-tracked
+            # set) instead of per-instance ``atexit.register(self.method)``:
+            # avoids unbounded atexit list growth and the strong reference
+            # that would otherwise pin a disposed manager until process
+            # exit. See module-level ``_process_cleanup_managers``.
+            _register_manager_for_cleanup(self)
+
+    def _register_traces_sink(self) -> None:
+        """Register the always-on ``traces`` sink.
+
+        The traces sink (OpenTelemetry spans to the Orchestrator audit
+        UI) is registered for every manager and cannot be disabled by
+        application code — it carries the governance audit trail. The
+        factory import is deferred to avoid a module-load cycle
+        (``factory`` imports back into this module).
+        """
+        from .factory import create_sink
+
+        sink = create_sink("traces")
+        if sink is not None:
+            self.register_sink(sink)
+            logger.info("Governance audit sink registered: traces")
+
+    def _start_worker(self) -> None:
+        """Start the background worker thread."""
+        if self._worker_thread is not None and self._worker_thread.is_alive():
+            return
+
+        self._shutdown.clear()
+        self._worker_thread = threading.Thread(
+            target=self._worker_loop,
+            name="governance-audit-worker",
+            daemon=True,
+        )
+        self._worker_thread.start()
+        logger.debug("Background audit worker started")
+
+    def _worker_loop(self) -> None:
+        """Background worker loop that processes queued events."""
+        while not self._shutdown.is_set():
+            # Wait for an event with a timeout so we can re-check shutdown.
+            try:
+                event = self._queue.get(timeout=0.5)
+            except queue.Empty:
+                continue
+            # Every successful get() must be paired with exactly one
+            # task_done() — including the shutdown sentinel and the case
+            # where _emit_sync raises — otherwise unfinished_tasks never
+            # drains and flush()/join() hangs.
+            try:
+                if event is None:
+                    # Shutdown signal
+                    break
+                self._emit_sync(event)
+            except Exception as e:
+                logger.warning("Audit worker error: %s", e)
+            finally:
+                self._queue.task_done()
+
+        # Drain remaining events on shutdown
+        self._drain_queue()
+
+    def _drain_queue(self) -> None:
+        """Process any remaining events in the queue."""
+        while True:
+            try:
+                event = self._queue.get_nowait()
+            except queue.Empty:
+                break
+            # As in _worker_loop: pair every get() with one task_done(),
+            # even when _emit_sync raises, so shutdown accounting is sound.
+            try:
+                if event is not None:
+                    self._emit_sync(event)
+            except Exception as e:
+                logger.warning("Audit drain error: %s", e)
+            finally:
+                self._queue.task_done()
+
+    def _emit_sync(self, event: AuditEvent) -> None:
+        """Emit event synchronously to all sinks (called from worker thread)."""
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+            tripped = set(self._tripped_sinks)
+        for sink in sinks:
+            if sink.name in tripped:
+                continue
+            try:
+                if sink.accepts(event):
+                    sink.emit(event)
+                # Success — reset failure counter for this sink.
+                with self._sinks_lock:
+                    if self._sink_failures.get(sink.name):
+                        self._sink_failures[sink.name] = 0
+            except Exception as e:
+                with self._sinks_lock:
+                    fails = self._sink_failures.get(sink.name, 0) + 1
+                    self._sink_failures[sink.name] = fails
+                    tripped_now = fails >= self._SINK_FAILURE_THRESHOLD
+                    if tripped_now:
+                        self._tripped_sinks.add(sink.name)
+                if tripped_now:
+                    logger.error(
+                        "Audit sink '%s' tripped after %d consecutive failures; "
+                        "will be skipped for the rest of this process. Last error: %s",
+                        sink.name,
+                        fails,
+                        e,
+                    )
+                else:
+                    logger.warning(
+                        "Audit sink '%s' failed to emit event (%d/%d): %s",
+                        sink.name,
+                        fails,
+                        self._SINK_FAILURE_THRESHOLD,
+                        e,
+                    )
+
+    def register_sink(self, sink: AuditSink) -> None:
+        """Register an audit sink.
+
+        Args:
+            sink: The sink to register
+
+        Note:
+            Duplicate sinks (same name) are ignored.
+            The circuit-breaker failure counter is cleared so a freshly
+            registered sink doesn't inherit a previous instance's tripped
+            state. ``unregister_sink`` already clears these, but the
+            defensive reset here guards against external manipulation
+            of the internal counters (tests, future callers).
+        """
+        with self._sinks_lock:
+            if any(s.name == sink.name for s in self._sinks):
+                logger.debug("Sink '%s' already registered, skipping", sink.name)
+                return
+            self._sinks.append(sink)
+            self._sink_failures.pop(sink.name, None)
+            self._tripped_sinks.discard(sink.name)
+        logger.info("Registered audit sink: %s", sink.name)
+
+    def unregister_sink(self, name: str) -> bool:
+        """Unregister an audit sink by name.
+
+        Args:
+            name: Name of the sink to remove
+
+        Returns:
+            True if sink was removed, False if not found
+        """
+        sink_to_close: AuditSink | None = None
+        with self._sinks_lock:
+            for i, sink in enumerate(self._sinks):
+                if sink.name == name:
+                    sink_to_close = sink
+                    del self._sinks[i]
+                    self._sink_failures.pop(name, None)
+                    self._tripped_sinks.discard(name)
+                    break
+        if sink_to_close is not None:
+            try:
+                sink_to_close.close()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to close: %s", name, e)
+            logger.info("Unregistered audit sink: %s", name)
+            return True
+        return False
+
+    def get_sink(self, name: str) -> AuditSink | None:
+        """Get a registered sink by name."""
+        with self._sinks_lock:
+            for sink in self._sinks:
+                if sink.name == name:
+                    return sink
+        return None
+
+    def list_sinks(self) -> list[str]:
+        """Get names of all registered sinks."""
+        with self._sinks_lock:
+            return [s.name for s in self._sinks]
+
+    def emit(self, event: AuditEvent) -> None:
+        """Emit an audit event to all registered sinks.
+
+        In async mode (default), this queues the event for background
+        processing and returns immediately. This avoids blocking the
+        main agent execution path during audit trace HTTP calls.
+
+        On post-fork callers (worker process inheriting the parent's
+        manager), the queue is reinitialized and the worker thread
+        re-spawned before enqueue — otherwise events would silently
+        accumulate in a queue no one is draining.
+
+        Args:
+            event: The audit event to emit
+        """
+        self._ensure_alive_after_fork()
+
+        if self._async_mode:
+            # Non-blocking enqueue with drop-oldest backpressure: if the
+            # worker is wedged on a slow sink, this keeps memory bounded
+            # rather than growing without limit.
+            try:
+                self._queue.put_nowait(event)
+            except queue.Full:
+                try:
+                    self._queue.get_nowait()
+                    self._queue.task_done()
+                except queue.Empty:
+                    pass
+                try:
+                    self._queue.put_nowait(event)
+                except queue.Full:
+                    # Worker is so far behind that the queue refilled
+                    # between get_nowait and put_nowait — give up on
+                    # this event rather than block.
+                    pass
+        else:
+            # Synchronous processing
+            self._emit_sync(event)
+
+    def _ensure_alive_after_fork(self) -> None:
+        """Reset queue and respawn worker if we're in a forked child.
+
+        Double-checked under ``_sinks_lock``: a fresh-fork child where
+        multiple threads call :meth:`emit` concurrently could otherwise
+        each see the stale ``_pid`` and each rebuild ``_queue`` /
+        ``_shutdown`` / ``_worker_thread`` — one thread's writes would
+        clobber the other's, leaking the queue+worker pair.
+        """
+        if os.getpid() == self._pid:
+            return  # fast path: same process, no rebuild needed
+        with self._sinks_lock:
+            current_pid = os.getpid()
+            if current_pid == self._pid:
+                return  # another thread won the rebuild race
+            # Child process inherited a dead worker_thread reference and
+            # a queue the parent owned. Rebuild both so child events drain.
+            self._pid = current_pid
+            self._queue = queue.Queue(maxsize=self._queue.maxsize)
+            self._shutdown = threading.Event()
+            self._worker_thread = None
+            if self._async_mode:
+                self._start_worker()
+
+    def emit_rule_evaluation(
+        self,
+        policy_id: str,
+        rule_name: str,
+        pack_name: str,
+        hook: str,
+        matched: bool,
+        action: str,
+        enforcement_mode: EnforcementMode,
+        detail: str = "",
+        agent_name: str = "agent",
+        description: str = "",
+    ) -> None:
+        """Convenience method to emit a rule evaluation event.
+
+        ``enforcement_mode`` travels on the event so sinks don't have to
+        read a process-global. With instance-scoped runtimes the global
+        wouldn't be authoritative anyway — parallel runtimes can run in
+        different modes simultaneously.
+        """
+        self.emit(
+            AuditEvent(
+                event_type=EventType.RULE_EVALUATION,
+                agent_name=agent_name,
+                hook=hook,
+                data={
+                    "policy_id": policy_id,
+                    "rule_name": rule_name,
+                    "pack_name": pack_name,
+                    "matched": matched,
+                    "action": action,
+                    "enforcement_mode": enforcement_mode,
+                    "detail": detail,
+                    "description": description,
+                    "status": "MATCHED" if matched else "PASS",
+                },
+            )
+        )
+
+    def emit_hook_summary(
+        self,
+        hook: str,
+        agent_name: str,
+        total_rules: int,
+        matched_rules: int,
+        final_action: str,
+        enforcement_mode: EnforcementMode,
+    ) -> None:
+        """Convenience method to emit a hook summary event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.HOOK_END,
+                agent_name=agent_name,
+                hook=hook,
+                data={
+                    "total_rules": total_rules,
+                    "matched_rules": matched_rules,
+                    "final_action": final_action,
+                    "enforcement_mode": enforcement_mode,
+                },
+            )
+        )
+
+    def emit_session_start(
+        self,
+        session_id: str,
+        agent_name: str,
+        packs: list[str],
+        enforcement_mode: EnforcementMode,
+    ) -> None:
+        """Convenience method to emit a session start event.
+
+        Same ``enforcement_mode: EnforcementMode`` contract as
+        :meth:`emit_rule_evaluation` and :meth:`emit_hook_summary`
+        — every governance event carries the per-runtime mode so sinks
+        don't depend on a process-global.
+        """
+        self.emit(
+            AuditEvent(
+                event_type=EventType.SESSION_START,
+                agent_name=agent_name,
+                data={
+                    "session_id": session_id,
+                    "packs": packs,
+                    "enforcement_mode": enforcement_mode,
+                },
+            )
+        )
+
+    def emit_session_end(
+        self,
+        session_id: str,
+        agent_name: str,
+        total_evaluations: int,
+        rules_matched: int,
+        rules_denied: int,
+        enforcement_mode: EnforcementMode,
+    ) -> None:
+        """Convenience method to emit a session end event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.SESSION_END,
+                agent_name=agent_name,
+                data={
+                    "session_id": session_id,
+                    "total_evaluations": total_evaluations,
+                    "rules_matched": rules_matched,
+                    "rules_denied": rules_denied,
+                    "enforcement_mode": enforcement_mode,
+                },
+            )
+        )
+
+    def flush(self, timeout: float = 5.0) -> None:
+        """Flush all pending events and sinks.
+
+        In async mode, polls the queue until it drains or ``timeout``
+        seconds elapse, whichever comes first. ``queue.Queue.join`` has
+        no timeout argument — using it would block indefinitely on a
+        wedged sink, which defeats the bounded-shutdown contract that
+        the process-exit handler (see :func:`_process_cleanup_managers`)
+        relies on.
+
+        Args:
+            timeout: Maximum seconds to wait for queue to drain (default 5.0)
+        """
+        if self._async_mode:
+            import time
+
+            deadline = time.monotonic() + max(0.0, timeout)
+            poll_interval = min(0.05, timeout) if timeout > 0 else 0.0
+            while time.monotonic() < deadline:
+                try:
+                    if self._queue.unfinished_tasks == 0:
+                        break
+                except Exception:  # noqa: BLE001 - queue introspection is best-effort
+                    break
+                time.sleep(poll_interval)
+            else:
+                # Loop didn't break — drain timed out. Log so a wedged
+                # sink is surfaced rather than swallowed.
+                try:
+                    pending = self._queue.unfinished_tasks
+                except Exception:  # noqa: BLE001
+                    pending = -1
+                if pending:
+                    logger.warning(
+                        "Audit queue did not drain within %.2fs "
+                        "(unfinished tasks=%s); sink may be wedged",
+                        timeout, pending,
+                    )
+
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+        for sink in sinks:
+            try:
+                sink.flush()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to flush: %s", sink.name, e)
+
+    def close(self) -> None:
+        """Close all sinks and release resources.
+
+        Stops the background worker thread and drains any remaining events.
+        Shutdown is bounded: ``_shutdown`` is the primary signal the
+        worker polls; the sentinel ``None`` enqueue is best-effort. If
+        the queue is full and the worker is wedged on a slow sink,
+        ``put_nowait`` fails fast rather than hanging process exit.
+        """
+        if self._async_mode and self._worker_thread is not None:
+            # Signal shutdown first so the worker's next queue.get() loop
+            # iteration exits even if we can't enqueue the sentinel.
+            self._shutdown.set()
+            try:
+                self._queue.put_nowait(None)  # Wake up worker
+            except queue.Full:
+                # Queue saturated by a stuck sink; the worker will see
+                # _shutdown on its next loop iteration once whatever it's
+                # blocked on completes (or the 2s join timeout fires).
+                logger.debug(
+                    "Audit queue full at shutdown; relying on _shutdown signal"
+                )
+
+            # Wait for worker to finish (with timeout)
+            if self._worker_thread.is_alive():
+                self._worker_thread.join(timeout=2.0)
+
+            logger.debug("Background audit worker stopped")
+
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+            self._sinks.clear()
+            self._sink_failures.clear()
+            self._tripped_sinks.clear()
+        for sink in sinks:
+            try:
+                sink.close()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to close: %s", sink.name, e)
+
+
diff --git a/src/uipath/runtime/governance/_audit/factory.py b/src/uipath/runtime/governance/_audit/factory.py
new file mode 100644
index 0000000..334f867
--- /dev/null
+++ b/src/uipath/runtime/governance/_audit/factory.py
@@ -0,0 +1,33 @@
+"""Factory function for creating audit sinks by name.
+
+Used by :class:`AuditManager` to construct the always-on ``traces``
+sink at initialization.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from .base import AuditSink
+
+logger = logging.getLogger(__name__)
+
+
+def create_sink(name: str) -> AuditSink | None:
+    """Create an audit sink by name.
+
+    Args:
+        name: Name of the sink to create (currently only ``traces``).
+
+    Returns:
+        The created sink, or ``None`` if the name is unknown.
+    """
+    name = name.lower()
+
+    if name == "traces":
+        from .traces import TracesAuditSink
+
+        return TracesAuditSink()
+
+    logger.warning("Unknown audit sink: %s", name)
+    return None
diff --git a/src/uipath/runtime/governance/_audit/traces.py b/src/uipath/runtime/governance/_audit/traces.py
new file mode 100644
index 0000000..a529996
--- /dev/null
+++ b/src/uipath/runtime/governance/_audit/traces.py
@@ -0,0 +1,334 @@
+"""OpenTelemetry traces audit sink for Orchestrator integration.
+
+This sink creates OpenTelemetry spans for governance events. UiPath's
+OTel exporter (``uipath.tracing._otel_exporters.LlmOpsHttpExporter`` via
+``_SpanUtils.otel_span_to_uipath_span``) is what ships them to the
+Orchestrator Traces UI and is also what reads ``UIPATH_TRACE_ID``,
+``UIPATH_ORGANIZATION_ID``, ``UIPATH_TENANT_ID``, ``UIPATH_FOLDER_KEY``
+and ``UIPATH_JOB_KEY`` from the process environment and stamps them onto
+the outgoing ``UiPathSpan``. We intentionally do **not** duplicate that
+env-reading here — the exporter is the single source of truth for the
+job-execution context.
+"""
+
+from __future__ import annotations
+
+import importlib.metadata
+import logging
+from typing import Any
+
+from uipath.core.governance import EnforcementMode
+
+from .base import AuditEvent, AuditSink, EventType
+
+logger = logging.getLogger(__name__)
+
+
+def _package_version() -> str:
+    """Return the installed ``uipath-runtime`` version (``unknown`` if absent)."""
+    try:
+        return importlib.metadata.version("uipath-runtime")
+    except importlib.metadata.PackageNotFoundError:
+        return "unknown"
+
+
+# Stamped on every governance span as ``uipath_governance.version`` so
+# consumers can correlate the trace payload shape with the runtime
+# release that produced it. Resolved once at import time — the installed
+# package version doesn't change for the life of the process.
+SCHEMA_VERSION = _package_version()
+
+# Value for the ``type`` / ``span_type`` span attributes on every
+# governance span. Matches ``SpanType.AGENT_RUN`` in uipath-agents-python
+# — we use the string literal here (not a cross-package import) to keep
+# uipath-runtime free of a uipath-agents dependency. If the agents-side
+# registry adds new values, this constant is the single place to update.
+SPAN_TYPE_AGENT_RUN = "agentRun"
+
+# Identifies this auditor on every governance span. Lets a downstream
+# consumer distinguish traces emitted by the Python in-runtime governance
+# checker from those produced by the governance-server (or any future
+# language-specific governance SDK). Set as the ``source`` span
+# attribute on every governance trace span.
+GOVERNANCE_SOURCE = "governance-checker-python"
+
+# Shared attribute namespace for every key in the unified governance trace
+# contract (§4 of the cross-product unification doc). Concatenated into
+# each ``span.set_attribute`` call so the prefix appears in one place and
+# a future rename (or alias) is a one-line change.
+NS = "uipath_governance"
+
+# Unified-contract enum values (UPPER_SNAKE per §3 of the spec).
+EVALUATOR_ALLOW = "ALLOW"
+EVALUATOR_DENY = "DENY"
+EVALUATOR_HITL = "HITL"
+
+ACTION_ALLOW = "ALLOW"
+ACTION_DENY = "DENY"
+ACTION_HITL = "HITL"
+ACTION_AUDIT = "AUDIT"
+ACTION_NONE = "NONE"
+
+def _resolve_mode(event: AuditEvent) -> EnforcementMode:
+    """Read the enforcement mode the evaluator stamped on the event.
+
+    Mode travels with the event (set by :meth:`AuditManager.emit_rule_evaluation`
+    / :meth:`emit_hook_summary` from the per-runtime
+    :attr:`GovernanceRuntime.enforcement_mode`) so the sink doesn't
+    read a process-global that wouldn't be authoritative in a
+    parallel-runtime setup.
+
+    Falls back to ``AUDIT`` only when the field is missing — that's a
+    contract violation by the emitter (every governance event must carry
+    the mode), but defaulting to the safe option avoids a sink crash.
+    """
+    mode = event.data.get("enforcement_mode")
+    if isinstance(mode, EnforcementMode):
+        return mode
+    if isinstance(mode, str):
+        try:
+            return EnforcementMode(mode.lower())
+        except ValueError:
+            pass
+    return EnforcementMode.AUDIT
+
+
+def _derive_results(
+    matched: bool, configured_action: str, mode: EnforcementMode
+) -> tuple[str, str]:
+    """Return ``(evaluator_result, action_applied)`` in spec vocabulary.
+
+    ``evaluator_result`` is mode-independent — what the rule decided. The
+    rule's configured ``audit`` action collapses into a DENY decision
+    here; whether that DENY is actually applied is reflected in
+    ``action_applied``.
+
+    ``action_applied`` is mode-driven. Currently only AUDIT mode is wired
+    in the runtime, so every non-allow result lands on ``AUDIT``; the
+    ENFORCE branch is kept so the contract is already correct when
+    ENFORCE arrives in a later phase.
+
+    The configured ``audit`` rule-level action acts as a per-rule audit
+    override: even when global mode is ENFORCE, such a rule only ever
+    produces ``action_applied = AUDIT``. This preserves today's "audit
+    never blocks" behavior.
+    """
+    action = configured_action.lower()
+
+    if not matched or action == "allow":
+        return EVALUATOR_ALLOW, ACTION_NONE
+
+    if action == "escalate":
+        evaluator = EVALUATOR_HITL
+    else:
+        evaluator = EVALUATOR_DENY
+
+    # Per-rule audit override: emit AUDIT regardless of global mode.
+    if action == "audit":
+        return evaluator, ACTION_AUDIT
+
+    if mode == EnforcementMode.ENFORCE:
+        return evaluator, ACTION_DENY if evaluator == EVALUATOR_DENY else ACTION_HITL
+    return evaluator, ACTION_AUDIT
+
+class TracesAuditSink(AuditSink):
+    """Audit sink that creates OpenTelemetry spans.
+
+    Spans appear in UiPath Orchestrator Traces UI, providing structured
+    data for each governance evaluation.
+    """
+
+    def __init__(self) -> None:
+        """Initialize the sink with a deferred tracer and zero span count."""
+        self._tracer: Any = None  # Can be None, Tracer, or False
+        self._spans_created = 0
+
+    @property
+    def name(self) -> str:
+        """Constant sink identifier."""
+        return "traces"
+
+    def _get_tracer(self) -> Any:
+        """Get or create the OpenTelemetry tracer."""
+        if self._tracer is None:
+            try:
+                from opentelemetry import trace
+
+                self._tracer = trace.get_tracer("uipath.governance")
+                logger.info("OpenTelemetry tracer initialized for governance traces")
+            except ImportError:
+                # OpenTelemetry is supplied transitively by uipath-core; an
+                # ImportError here means the host install is broken or
+                # governance is running outside the UiPath SDK environment.
+                logger.warning(
+                    "OpenTelemetry not available - governance traces disabled. "
+                    "OTel is normally provided by uipath-core; reinstall the SDK."
+                )
+                self._tracer = False
+        return self._tracer if self._tracer else None
+
+    def emit(self, event: AuditEvent) -> None:
+        """Create a span for RULE_EVALUATION or HOOK_END events; drop others."""
+        if event.event_type == EventType.RULE_EVALUATION:
+            self._emit_rule_span(event)
+        elif event.event_type == EventType.HOOK_END:
+            self._emit_hook_span(event)
+
+    def _emit_hook_span(self, event: AuditEvent) -> None:
+        """Create a span for a hook summary (always emitted for each governance check)."""
+        tracer = self._get_tracer()
+        if tracer is None:
+            return
+
+        try:
+            from opentelemetry import context
+
+            data = event.data
+            hook = event.hook or "unknown"
+            span_name = f"governance.{hook.lower()}"
+
+            # Use the current OTel context if one is active; otherwise
+            # start a root span. The governance span appears as a child
+            # of whichever span is current (e.g. the runtime's root
+            # span) or as a fresh root.
+            #
+            # We don't touch org/tenant/folder/job/trace ids here — the
+            # uipath OTel exporter resolves those at export time from the
+            # process env (see module docstring).
+            ctx = context.get_current()
+
+            with tracer.start_as_current_span(span_name, context=ctx) as span:
+                # Required for Orchestrator Traces
+                span.set_attribute("type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("uipath.custom_instrumentation", True)
+
+                # Identifies which agent emitted this audit trace. Lets
+                # downstream consumers (Orchestrator Traces UI, audit
+                # dashboards) filter governance spans by producer when
+                # multiple SDKs / governance backends co-exist.
+                span.set_attribute(f"{NS}.source", GOVERNANCE_SOURCE)
+                # Hook summary attributes. Mode comes from the event — the
+                # evaluator stamps it from the per-runtime instance, so the
+                # sink is correct for parallel runtimes running different
+                # modes.
+                mode = _resolve_mode(event)
+                final_action = data.get("final_action", "allow")
+                _, action_applied = _derive_results(
+                    matched=final_action.lower() != "allow",
+                    configured_action=final_action,
+                    mode=mode,
+                )
+                span.set_attribute(f"{NS}.hook", hook)
+                span.set_attribute(f"{NS}.action_applied", action_applied)
+                span.set_attribute(f"{NS}.mode", mode.value.upper())
+
+                # Hook spans are summary containers — they're left at
+                # Status.UNSET regardless of final_action. Severity is
+                # carried by the per-rule spans (see _emit_rule_span);
+                # marking the hook span as ERROR would falsely paint
+                # the entire lifecycle phase as failed when only a
+                # specific rule fired underneath.
+
+                self._spans_created += 1
+
+        except Exception as e:
+            logger.warning("Failed to create governance hook span: %s", e)
+
+    def _emit_rule_span(self, event: AuditEvent) -> None:
+        """Create a span for a rule evaluation."""
+        tracer = self._get_tracer()
+        if tracer is None:
+            return
+
+        try:
+            from opentelemetry import context
+
+            data = event.data
+            policy_id = data.get("policy_id", "unknown")
+            span_name = f"{NS}.rule.{policy_id}"
+
+            # See note in _emit_hook_span: rely on the current OTel context
+            # rather than fabricating a remote-parent span_id; and let the
+            # uipath OTel exporter populate the job-execution context.
+            ctx = context.get_current()
+
+            with tracer.start_as_current_span(span_name, context=ctx) as span:
+                # Required for Orchestrator Traces
+                span.set_attribute("type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("uipath.custom_instrumentation", True)
+
+                # Identifies which agent emitted this audit trace. Lets
+                # downstream consumers (Orchestrator Traces UI, audit
+                # dashboards) filter governance spans by producer when
+                # multiple SDKs / governance backends co-exist.
+                span.set_attribute(f"{NS}.source", GOVERNANCE_SOURCE)
+
+                # Derive the spec-vocabulary verdict pair from the raw
+                # (matched, configured action, mode) tuple. Mode comes
+                # from the event (per-runtime instance) so parallel
+                # runtimes running different modes don't cross-contaminate.
+                # Single source of truth for the emitted attributes below
+                # AND the verbosityLevel/Status decision further down.
+                mode = _resolve_mode(event)
+                configured_action = data.get("action", "allow")
+                matched = bool(data.get("matched", False))
+                evaluator_result, action_applied = _derive_results(
+                    matched=matched,
+                    configured_action=configured_action,
+                    mode=mode,
+                )
+
+                # Governance attributes
+                span.set_attribute(f"{NS}.policy_id", policy_id)
+                span.set_attribute(f"{NS}.rule_name", data.get("rule_name", ""))
+                span.set_attribute(f"{NS}.pack_name", data.get("pack_name", ""))
+                span.set_attribute(f"{NS}.hook", event.hook)
+                span.set_attribute(f"{NS}.evaluator_result", evaluator_result)
+                span.set_attribute(f"{NS}.action_applied", action_applied)
+                span.set_attribute(f"{NS}.mode", mode.value.upper())
+                span.set_attribute(f"{NS}.version", SCHEMA_VERSION)
+
+                detail = data.get("detail", "")
+                if detail:
+                    span.set_attribute(f"{NS}.evidence", detail[:500])
+
+                # Severity is driven off the derived ``action_applied``:
+                #
+                # - ``DENY`` — runtime actually blocked the agent →
+                #   verbosityLevel=4 (Error) + Status.ERROR. The agent
+                #   span genuinely failed.
+                # - ``AUDIT`` / ``HITL`` — advisory only; runtime did NOT
+                #   block → verbosityLevel=3 (Warning), Status stays
+                #   UNSET. The agent's span shouldn't be marked failed
+                #   just because an advisory rule fired.
+                # - ``ALLOW`` / ``NONE`` — no verbosityLevel attribute
+                #   (Orchestrator default = 2, Information).
+                if action_applied == ACTION_DENY:
+                    span.set_attribute("verbosityLevel", 4)
+                    try:
+                        from opentelemetry.trace import Status, StatusCode
+
+                        span.set_status(
+                            Status(
+                                StatusCode.ERROR,
+                                f"Policy violation: "
+                                f"{data.get('rule_name', policy_id)} "
+                                f"(action={configured_action.lower()})",
+                            )
+                        )
+                    except ImportError:
+                        pass
+                elif action_applied in (ACTION_AUDIT, ACTION_HITL):
+                    span.set_attribute("verbosityLevel", 3)
+
+                self._spans_created += 1
+
+        except Exception as e:
+            logger.warning("Failed to create governance span: %s", e)
+
+    @property
+    def spans_created(self) -> int:
+        """Number of spans created."""
+        return self._spans_created
diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py
new file mode 100644
index 0000000..713a05d
--- /dev/null
+++ b/src/uipath/runtime/governance/native/__init__.py
@@ -0,0 +1,45 @@
+"""Native UiPath governance policy evaluator.
+
+YAML-defined rules evaluated in-process at each agent lifecycle hook.
+The host fetches the policy pack via the
+:class:`GovernancePolicyProvider` protocol and compiles it into a
+:class:`PolicyIndex` with :func:`build_policy_index_from_yaml` *before*
+constructing :class:`GovernanceRuntime` — so the runtime layer never
+performs I/O at construction time.
+
+This subpackage owns:
+
+- :class:`GovernanceEvaluator` – the evaluator implementation.
+- :func:`build_policy_index_from_yaml` – pure YAML → :class:`PolicyIndex`
+  compiler.
+- The native policy model: :class:`Rule`, :class:`Check`,
+  :class:`Condition`, :class:`PolicyIndex`.
+
+Shared output types (``Action``, ``AuditRecord``, …) live in
+:mod:`uipath.core.governance`.
+"""
+
+from ._yaml_to_index import build_policy_index_from_yaml
+from .evaluator import GovernanceEvaluator
+from .models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+    Severity,
+)
+
+__all__ = [
+    "GovernanceEvaluator",
+    "build_policy_index_from_yaml",
+    # Native policy model
+    "Check",
+    "CheckContext",
+    "Condition",
+    "PolicyIndex",
+    "PolicyPack",
+    "Rule",
+    "Severity",
+]
diff --git a/src/uipath/runtime/governance/native/_yaml_to_index.py b/src/uipath/runtime/governance/native/_yaml_to_index.py
index 3bf264c..9abdec3 100644
--- a/src/uipath/runtime/governance/native/_yaml_to_index.py
+++ b/src/uipath/runtime/governance/native/_yaml_to_index.py
@@ -1,10 +1,11 @@
 """Runtime YAML → PolicyIndex parser.
 
-Mirrors the shape produced by ``packs/compile_packs.py`` but builds the
-PolicyIndex directly from parsed YAML data rather than generating Python
-source. Used by :mod:`uipath.runtime.governance.native.loader` to
-compile the YAML body returned by the registered policy provider into
-an in-memory index at startup.
+Mirrors the shape produced by ``packs/compile_packs.py`` but builds
+the :class:`PolicyIndex` directly from parsed YAML data rather than
+generating Python source. The host calls this to compile the YAML
+body returned by :meth:`GovernancePolicyProvider.get_policy_async`
+into an in-memory index, then hands the index to
+:class:`GovernanceRuntime`.
 
 Accepts either a single YAML document (one pack) or a multi-document
 stream (``---``-separated packs). Unknown check types and malformed
diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py
new file mode 100644
index 0000000..f629902
--- /dev/null
+++ b/src/uipath/runtime/governance/native/evaluator.py
@@ -0,0 +1,1102 @@
+"""Governance rule evaluator.
+
+Instance-scoped — every :class:`GovernanceRuntime` constructs its own
+evaluator with explicit dependencies (audit manager, compensator,
+enforcement mode). The evaluator does not reach across the runtime
+layer through process-globals; the wiring layer composes the runtime
+graph and the evaluator consumes what it's given.
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+import re
+from collections import Counter
+from datetime import datetime, timezone
+from functools import lru_cache
+from typing import Any
+
+from uipath.core.governance import EnforcementMode
+from uipath.core.governance.exceptions import GovernanceBlockException
+from uipath.core.governance.models import (
+    Action,
+    AuditRecord,
+    LifecycleHook,
+    RuleEvaluation,
+)
+
+from uipath.runtime.governance._audit.base import AuditManager
+from uipath.runtime.governance.native.guardrail_compensation import (
+    GuardrailCompensator,
+    disabled_guardrails,
+)
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    Rule,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]:
+    """Build the ``data`` payload for the /runtime/govern compensating call.
+
+    The server runs the guardrail check against the same content the
+    evaluator was looking at — so we forward whichever
+    :class:`CheckContext` field is populated for the active hook. Fields
+    not relevant to the hook are omitted to keep the payload tight.
+    """
+    if context.hook in (LifecycleHook.BEFORE_AGENT,):
+        return {"content": context.agent_input}
+    if context.hook in (LifecycleHook.AFTER_AGENT,):
+        return {"content": context.agent_output}
+    if context.hook in (LifecycleHook.BEFORE_MODEL,):
+        payload: dict[str, Any] = {"content": context.model_input}
+        if context.messages:
+            payload["messages"] = context.messages
+        return payload
+    if context.hook in (LifecycleHook.AFTER_MODEL,):
+        return {"content": context.model_output}
+    if context.hook in (LifecycleHook.TOOL_CALL,):
+        return {"tool_name": context.tool_name, "tool_args": context.tool_args}
+    if context.hook in (LifecycleHook.AFTER_TOOL,):
+        return {"tool_name": context.tool_name, "tool_result": context.tool_result}
+    # Memory-write and unknown hooks: pass an empty content so the
+    # server still receives a structurally-valid payload.
+    return {"content": ""}
+
+
+@lru_cache(maxsize=256)
+def _compile_regex(pattern: str) -> re.Pattern[str] | None:
+    """Compile and cache a regex pattern.
+
+    Args:
+        pattern: The regex pattern string
+
+    Returns:
+        Compiled pattern or None if invalid
+    """
+    try:
+        return re.compile(pattern)
+    except re.error as e:
+        logger.warning("Invalid regex pattern '%s': %s", pattern, e)
+        return None
+
+
+# --- vaderSentiment: lazy-imported singleton ---
+# Hard dependency, but lazy-loaded to keep import-time cost off the
+# critical path. The except branch is defence against a corrupted
+# install (file present in METADATA but module unimportable) — the
+# operator no-ops rather than crashing the agent.
+_VADER_UNINITIALIZED = object()
+_vader_analyzer: Any = _VADER_UNINITIALIZED
+
+
+def _get_vader_analyzer() -> Any:
+    """Return a cached SentimentIntensityAnalyzer, or None if unavailable."""
+    global _vader_analyzer
+    if _vader_analyzer is _VADER_UNINITIALIZED:
+        try:
+            from vaderSentiment.vaderSentiment import (  # type: ignore[import-untyped]
+                SentimentIntensityAnalyzer,
+            )
+
+            _vader_analyzer = SentimentIntensityAnalyzer()
+        except ImportError:
+            logger.error(
+                "vaderSentiment failed to import despite being a hard dependency; "
+                "sentiment_concern checks will not fire. Reinstall uipath-core."
+            )
+            _vader_analyzer = None
+    return _vader_analyzer
+
+
+# --- chardet: lazy-imported module for encoding integrity (A.7.4) ---
+# Hard dependency, lazy-loaded for symmetry with the other library
+# wrappers. The except branch covers corrupted installs only.
+_CHARDET_UNINITIALIZED = object()
+_chardet_module: Any = _CHARDET_UNINITIALIZED
+
+
+def _get_chardet() -> Any:
+    """Return the chardet module, or None if unavailable."""
+    global _chardet_module
+    if _chardet_module is _CHARDET_UNINITIALIZED:
+        try:
+            import chardet
+
+            _chardet_module = chardet
+        except ImportError:
+            logger.error(
+                "chardet failed to import despite being a hard dependency; "
+                "encoding_concern confidence check will not fire (stdlib "
+                "signals still apply). Reinstall uipath-core."
+            )
+            _chardet_module = None
+    return _chardet_module
+
+
+# --- Static patterns for encoding_concern (A.7.4) ---
+# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when
+# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252.
+_MOJIBAKE_BIGRAMS: tuple[str, ...] = (
+    "Ã©",
+    "Ã¨",
+    "Ã¢",
+    "Ã ",
+    "Ã¹",
+    "Ã®",
+    "Ã´",
+    "Ã§",  # accented vowels
+    "Ã„",
+    "Ã–",
+    "Ãœ",
+    "ÃŸ",  # German umlauts / eszett
+    "â€™",
+    "â€œ",
+    "â€\x9d",
+    "â€“",
+    "â€”",
+    "â€¢",  # smart quotes / dashes
+    "Â£",
+    "Â°",
+    "Â§",
+    "Â¶",
+    "Â©",
+    "Â®",  # NBSP-leading symbols
+    "ï¿",
+    "¿½",  # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1)
+    "ï»",
+    "»¿",  # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1)
+)
+
+# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw
+# bytes leaked through a string layer rather than being decoded.
+_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}")
+
+
+# --- Static patterns for incident_concern (A.8.4) ---
+# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape
+# (categorical types over stack/status), but for string payloads from
+# model output / tool result rather than exception objects.
+_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = {
+    "safety_refusal": [
+        re.compile(
+            r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)"
+            r"\s+(?:help|assist|provide|answer|do\s+that))\b"
+        ),
+        re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"),
+        re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"),
+    ],
+    "tool_failure": [
+        re.compile(
+            r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)"
+        ),
+        re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"),
+        re.compile(r"(?i)\b(timed?\s*out|timeout)\b"),
+    ],
+    "auth_failure": [
+        re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"),
+        re.compile(
+            r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b"
+        ),
+    ],
+    "quota_exceeded": [
+        re.compile(r"\b(429)\b"),
+        re.compile(
+            r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b"
+        ),
+    ],
+    "hallucination": [
+        re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"),
+        re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"),
+    ],
+}
+
+# --- Static patterns for commitment_concern (A.10.4) ---
+# Commitment-language signals. The verb pattern covers both first-person
+# promise verbs ("we will refund") and formal-business commitment markers
+# common in proposal / SOW outputs ("Cost: $X", "fixed scope",
+# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and
+# deadline signals combine via OR semantics — see
+# :meth:`_check_commitment_concern`.
+_COMMITMENT_VERB_PATTERN = re.compile(
+    r"(?i)("
+    # First-person promise / liability verbs
+    r"\brefund\b|\breimburse\b|"
+    r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|"
+    r"\bsla\b|"
+    r"\bwaive[d]?\b|"
+    r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|"
+    r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|"
+    r"handover|hand\s+over|ship)\b|"
+    # Proposal / SOW commitment markers
+    r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|"
+    r"\bcost\s*:\s*\$?\d|"
+    r"\bquote\s*:\s*\$?\d|"
+    r"\bdeliverables?\b|"
+    r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|"
+    r"\bI\s+propose\b"
+    r")"
+)
+# Currency-anchored amount detection. Requires a currency marker adjacent
+# to the number so URL fragments (e.g. ``/667851``) don't false-positive.
+# Covers symbol-then-number ($780) and number-then-code (780 USD).
+#
+# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched
+# here — they fire on benign status / progress text ("75% complete",
+# "99.9% uptime") under OR semantics. Real percentage-bearing
+# commitments ("we'll give you a 20% discount", "refund 100%") still
+# fire via the verb pattern.
+_COMMITMENT_AMOUNT_FALLBACK = re.compile(
+    r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?"
+    r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|"
+    r"dollars?|euros?|pounds?|yen|rupees?)\b"
+)
+_COMMITMENT_DEADLINE_PATTERN = re.compile(
+    r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b"
+    r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b"
+)
+
+
+class GovernanceEvaluator:
+    """Evaluates governance rules against check contexts.
+
+    Supports two enforcement modes:
+
+    - ``AUDIT``: log all violations but never block (DENY collapses to
+      AUDIT in the final action).
+    - ``ENFORCE``: actually block on DENY rules — raises
+      :class:`GovernanceBlockException` and the agent stops.
+
+    All dependencies (mode, audit manager, compensator) are injected
+    via the constructor. The evaluator does not consult any
+    process-global state — parallel runtimes (``uipath eval``) get
+    their own evaluator with their own audit + compensation pipelines.
+    """
+
+    def __init__(
+        self,
+        policy_index: PolicyIndex,
+        *,
+        enforcement_mode: EnforcementMode = EnforcementMode.AUDIT,
+        audit_manager: AuditManager | None = None,
+        compensator: GuardrailCompensator | None = None,
+    ) -> None:
+        """Initialize with a compiled policy index and runtime-scoped deps.
+
+        Args:
+            policy_index: The compiled :class:`PolicyIndex` to evaluate.
+                Typically read from :attr:`GovernanceRuntime.policy_index`
+                — the host built it from the provider's
+                :class:`PolicyResponse` via
+                :func:`build_policy_index_from_yaml`.
+            enforcement_mode: Mode the evaluator applies. Defaults to
+                ``AUDIT`` — the safe default for callers that don't
+                explicitly opt in to ENFORCE. The wiring layer should
+                pass ``runtime.enforcement_mode`` here so the evaluator
+                and the wrapping :class:`GovernanceRuntime` agree on a
+                single source of truth.
+            audit_manager: Per-runtime :class:`AuditManager`. When
+                ``None`` the evaluator runs silently (no audit events
+                emitted). Tests that don't care about emission can
+                leave this out.
+            compensator: Per-runtime :class:`GuardrailCompensator`
+                used to dispatch ``/runtime/govern`` POSTs for
+                guardrail-fallback rules. When ``None`` such dispatch
+                is skipped — the evaluator still records the matched
+                rules in the :class:`AuditRecord`.
+        """
+        self._policy_index = policy_index
+        self._enforcement_mode = enforcement_mode
+        self._audit_manager = audit_manager
+        self._compensator = compensator
+
+    @property
+    def policy_index(self) -> PolicyIndex:
+        """Return the compiled policy index this evaluator runs against."""
+        return self._policy_index
+
+    @property
+    def mode(self) -> EnforcementMode:
+        """The enforcement mode this evaluator applies."""
+        return self._enforcement_mode
+
+    def is_audit_mode(self) -> bool:
+        """Check if running in audit-only mode."""
+        return self._enforcement_mode == EnforcementMode.AUDIT
+
+    def evaluate(self, context: CheckContext) -> AuditRecord:
+        """Evaluate rules registered for ``context.hook`` against the context.
+
+        Only rules whose ``hook`` field matches the current lifecycle hook
+        are evaluated — a ``tool_call`` rule does not fire on
+        ``before_model``, and vice versa. This avoids running checks
+        against fields the context cannot provide and keeps the audit
+        stream scoped to the active phase.
+
+        The final action depends on the enforcement mode:
+        - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted.
+        - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block)
+        - ENFORCE mode: DENY rules result in DENY action AND a
+          :class:`GovernanceBlockException` is raised.
+
+        Audit events (per-rule + hook summary) are emitted via the
+        :class:`AuditManager` injected at construction (skipped when
+        none was supplied).
+
+        Args:
+            context: The check context with hook and content
+
+        Returns:
+            AuditRecord with all evaluations and final action.
+
+        Raises:
+            GovernanceBlockException: In ENFORCE mode when a DENY rule matches.
+        """
+        mode = self._enforcement_mode
+        if mode == EnforcementMode.DISABLED:
+            return AuditRecord(
+                timestamp=datetime.now(timezone.utc),
+                agent_name=context.agent_name,
+                runtime_id=context.runtime_id,
+                hook=context.hook,
+                evaluations=[],
+                final_action=Action.ALLOW,
+                metadata={**context.metadata, "enforcement_mode": mode.value},
+            )
+
+        rules = self._policy_index.get_rules_for_hook(context.hook)
+
+        evaluations: list[RuleEvaluation] = []
+        raw_action = Action.ALLOW  # The action before mode adjustment
+        deny_would_fire = False  # Track if DENY would have fired
+
+        for rule in rules:
+            if not rule.enabled:
+                continue
+
+            evaluation = self._evaluate_rule(rule, context)
+            evaluations.append(evaluation)
+
+            if evaluation.matched:
+                # Take the most restrictive action. Use evaluation.action
+                # (which already folds in per-check overrides), not
+                # rule.action, so check-level overrides are honored here too.
+                eval_action = evaluation.action
+                if eval_action == Action.DENY:
+                    raw_action = Action.DENY
+                    deny_would_fire = True
+                elif eval_action == Action.ESCALATE and raw_action != Action.DENY:
+                    raw_action = Action.ESCALATE
+                elif eval_action == Action.AUDIT and raw_action == Action.ALLOW:
+                    raw_action = Action.AUDIT
+
+        # Apply enforcement mode
+        final_action = self._apply_enforcement_mode(raw_action)
+
+        # Build metadata with mode info
+        record_metadata = dict(context.metadata)
+        record_metadata["enforcement_mode"] = mode.value
+        if deny_would_fire and self.is_audit_mode():
+            record_metadata["audit_mode_would_deny"] = True
+
+        audit = AuditRecord(
+            timestamp=datetime.now(timezone.utc),
+            agent_name=context.agent_name,
+            runtime_id=context.runtime_id,
+            hook=context.hook,
+            evaluations=evaluations,
+            final_action=final_action,
+            metadata=record_metadata,
+        )
+
+        self._emit_audit(audit, mode)
+
+        # For any guardrail mapped to UiPath but currently disabled, hand
+        # the disabled guardrails to the governance-server's
+        # /runtime/govern endpoint. The SERVER runs the guardrail check
+        # AND writes the trace (the payload carries traceId / src_timestamp
+        # / hook / agent so it can correlate) — the agent does NOT emit a
+        # trace itself, to avoid double-writing. Fire-and-forget on a
+        # daemon thread so a slow or unreachable endpoint never blocks
+        # the agent.
+        self._dispatch_compensation(audit, context)
+
+        if final_action == Action.DENY:
+            raise GovernanceBlockException.from_audit_record(audit)
+
+        return audit
+
+    def _dispatch_compensation(
+        self, audit: AuditRecord, context: CheckContext
+    ) -> None:
+        """Schedule compensating governance for any matched fallback rules.
+
+        Delegates to the injected :class:`GuardrailCompensator`. The
+        compensator owns concurrency, queue caps, exception isolation,
+        and graceful process-exit cancellation — this method just
+        builds the payload, logs the summary, and submits.
+
+        No-op when no compensator was supplied at construction (e.g.
+        unit tests that don't care about the dispatch path).
+        """
+        if self._compensator is None:
+            return
+
+        try:
+            disabled = disabled_guardrails(audit, self._policy_index)
+            if not disabled:
+                return
+
+            # Distinct validator names for the operator-facing log line.
+            validators = [rule.validator for rule in disabled]
+
+            # Surface the disabled-guardrail fire-up: how many rules
+            # triggered the compensating call, and which validators
+            # they map to (e.g. pii_detection / prompt_injection /
+            # harmful_content). One line per dispatch so an operator
+            # can see the volume + breakdown at a glance.
+            logger.info(
+                "Compensating governance triggered: hook=%s, count=%d, validators=[%s]",
+                audit.hook.value,
+                len(disabled),
+                ", ".join(validators),
+            )
+
+            self._compensator.submit(
+                rules=disabled,
+                data=_compensation_data_for_hook(context),
+                hook=audit.hook.value,
+                src_timestamp=audit.timestamp.isoformat(),
+                agent_name=audit.agent_name,
+                runtime_id=audit.runtime_id,
+            )
+        except Exception as exc:  # noqa: BLE001 - fail-open
+            logger.warning(
+                "Failed to dispatch compensating governance call: %s", exc
+            )
+
+    def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None:
+        """Emit per-rule and hook-summary events to the injected audit manager.
+
+        No-op when no audit manager was supplied at construction. The
+        per-runtime :class:`AuditManager` handles sink-level circuit
+        breaking; emission errors stay there and never break evaluation.
+        """
+        manager = self._audit_manager
+        if manager is None:
+            return
+
+        hook_name = audit.hook.name
+
+        # ``guardrail_fallback`` rules are server-traced: the agent POSTs
+        # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and
+        # the governance-server emits the audit event with the actual
+        # validator verdict. Emitting a Python-side ``rule_evaluation``
+        # event here would produce a duplicate trace carrying no
+        # verdict, so filter these rules out of every event the Python
+        # evaluator emits (per-rule AND the hook summary's counts).
+        emittable = [
+            ev for ev in audit.evaluations
+            if not self._is_guardrail_fallback_rule(ev.rule_id)
+        ]
+
+        for evaluation in emittable:
+            manager.emit_rule_evaluation(
+                policy_id=evaluation.rule_id,
+                rule_name=evaluation.rule_name,
+                pack_name=evaluation.pack_name,
+                hook=hook_name,
+                matched=evaluation.matched,
+                action=evaluation.action.value if evaluation.matched else "allow",
+                enforcement_mode=mode,
+                detail=evaluation.detail,
+                agent_name=audit.agent_name,
+                description=evaluation.description,
+            )
+
+        manager.emit_hook_summary(
+            hook=hook_name,
+            agent_name=audit.agent_name,
+            total_rules=len(emittable),
+            matched_rules=sum(1 for ev in emittable if ev.matched),
+            final_action=audit.final_action.value,
+            enforcement_mode=mode,
+        )
+
+    def _is_guardrail_fallback_rule(self, rule_id: str) -> bool:
+        """Return True if the rule is a UiPath-compensating fallback rule.
+
+        Such rules carry a ``guardrail_fallback`` condition; their audit
+        trace is emitted by the governance-server in response to the
+        ``/runtime/govern`` POST, so the Python evaluator must not emit
+        a duplicate trace for them.
+        """
+        rule = self._policy_index.get_rule(rule_id)
+        if rule is None:
+            return False
+        for check in rule.checks:
+            for cond in check.conditions:
+                if cond.operator == "guardrail_fallback":
+                    return True
+        return False
+
+    def _apply_enforcement_mode(self, raw_action: Action) -> Action:
+        """Apply enforcement mode to the raw action.
+
+        In AUDIT mode:
+        - DENY becomes AUDIT (log but don't block)
+        - ESCALATE becomes AUDIT (log but don't escalate)
+        - AUDIT stays AUDIT
+        - ALLOW stays ALLOW
+
+        In ENFORCE mode:
+        - All actions pass through unchanged
+        """
+        if self._enforcement_mode == EnforcementMode.AUDIT:
+            if raw_action in (Action.DENY, Action.ESCALATE):
+                return Action.AUDIT
+        return raw_action
+
+    def evaluate_before_agent(
+        self,
+        agent_input: str,
+        agent_name: str,
+        runtime_id: str,
+        model_name: str = "",
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate BEFORE_AGENT rules."""
+        context = CheckContext(
+            hook=LifecycleHook.BEFORE_AGENT,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            agent_input=agent_input,
+            model_name=model_name,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_agent(
+        self,
+        agent_output: str,
+        agent_name: str,
+        runtime_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_AGENT rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_AGENT,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            agent_output=agent_output,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_before_model(
+        self,
+        model_input: str,
+        agent_name: str,
+        runtime_id: str,
+        messages: list[dict[str, Any]] | None = None,
+        model_name: str = "",
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate BEFORE_MODEL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.BEFORE_MODEL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            model_input=model_input,
+            model_name=model_name,
+            messages=messages or [],
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_model(
+        self,
+        model_output: str,
+        agent_name: str,
+        runtime_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_MODEL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_MODEL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            model_output=model_output,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_tool_call(
+        self,
+        tool_name: str,
+        tool_args: dict[str, Any],
+        agent_name: str,
+        runtime_id: str,
+        session_state: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate TOOL_CALL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.TOOL_CALL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            tool_name=tool_name,
+            tool_args=tool_args,
+            session_state=session_state or {},
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_tool(
+        self,
+        tool_name: str,
+        tool_result: str,
+        agent_name: str,
+        runtime_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_TOOL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_TOOL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            tool_name=tool_name,
+            tool_result=tool_result,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation:
+        """Evaluate a single rule against the context."""
+        if not rule.checks:
+            # No checks = always matches (for audit-only rules)
+            return RuleEvaluation(
+                rule_id=rule.rule_id,
+                rule_name=rule.name,
+                matched=True,
+                detail="Rule has no conditions (always matches)",
+                pack_name=rule.pack_name,
+                action=rule.action,
+                description=rule.description,
+            )
+
+        check_results: list[dict[str, Any]] = []
+        any_check_matched = False
+        # Resolve the rule's action from the MATCHED checks so per-check
+        # `action` overrides take effect. ``Check.action`` defaults to the
+        # rule's action (see _yaml_to_index), so for rules without an
+        # override this equals ``rule.action`` exactly. Take the most
+        # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW),
+        # mirroring evaluate()'s cross-rule aggregation.
+        matched_action = Action.ALLOW
+
+        for check in rule.checks:
+            matched, detail = self._evaluate_check(check, context)
+            check_results.append(
+                {
+                    "matched": matched,
+                    "detail": detail,
+                    "action": check.action.value,
+                }
+            )
+            if matched:
+                any_check_matched = True
+                if check.action == Action.DENY:
+                    matched_action = Action.DENY
+                elif (
+                    check.action == Action.ESCALATE
+                    and matched_action != Action.DENY
+                ):
+                    matched_action = Action.ESCALATE
+                elif (
+                    check.action == Action.AUDIT
+                    and matched_action == Action.ALLOW
+                ):
+                    matched_action = Action.AUDIT
+
+        # Surface the FIRST matched check's message; falls back to the
+        # first check's detail (empty string when none matched) for
+        # backward compatibility with rules that have a single check.
+        first_matched_detail = next(
+            (cr["detail"] for cr in check_results if cr["matched"]),
+            check_results[0]["detail"] if check_results else "",
+        )
+
+        return RuleEvaluation(
+            rule_id=rule.rule_id,
+            rule_name=rule.name,
+            matched=any_check_matched,
+            detail=first_matched_detail,
+            pack_name=rule.pack_name,
+            action=matched_action if any_check_matched else Action.ALLOW,
+            description=rule.description,
+            check_results=check_results,
+        )
+
+    def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]:
+        """Evaluate a single check against the context."""
+        if not check.conditions:
+            return True, "No conditions (always matches)"
+
+        results = []
+        for condition in check.conditions:
+            matched = self._evaluate_condition(condition, context)
+            results.append(matched)
+
+        if check.logic == "any":
+            final_match = any(results)
+        else:  # "all" is default
+            final_match = all(results)
+
+        detail = check.message if final_match else ""
+        return final_match, detail
+
+    def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool:
+        """Evaluate a single condition against the context."""
+        field_value = self._get_field_value(condition.field, context)
+        result = self._apply_operator(condition.operator, field_value, condition.value)
+
+        if condition.negate:
+            result = not result
+
+        return result
+
+    def _get_field_value(self, field: str, context: CheckContext) -> Any:
+        """Get a field value from the context."""
+        parts = field.split(".")
+
+        # Start with context
+        value: Any = context
+
+        for part in parts:
+            if hasattr(value, part):
+                value = getattr(value, part)
+            elif isinstance(value, dict) and part in value:
+                value = value[part]
+            else:
+                return None
+
+        return value
+
+    def _apply_operator(
+        self, operator: str, field_value: Any, check_value: Any
+    ) -> bool:
+        """Apply an operator to compare field value against check value."""
+        # Handle existence checks before the None check
+        if operator == "exists":
+            return field_value is not None
+        if operator == "not_exists":
+            return field_value is None
+
+        # guardrail_fallback fires only when the guardrail is mapped to
+        # UiPath but its policy is disabled. Config travels in
+        # ``check_value``; the rule's ``field`` is unused (so
+        # ``field_value`` is ``None`` here, which is expected — we must
+        # special-case this before the generic ``None`` short-circuit
+        # below).
+        if operator == "guardrail_fallback":
+            cfg = check_value if isinstance(check_value, dict) else {}
+            return bool(cfg.get("mapped_to_uipath", False)) and not bool(
+                cfg.get("policy_enabled", True)
+            )
+
+        if field_value is None:
+            return False
+
+        # Numeric operators don't need stringification — short-circuit
+        # before `str(field_value)` (expensive for dict / large payloads).
+        if operator in ("gt", "gte", "lt", "lte"):
+            try:
+                lhs = float(field_value)
+                rhs = float(check_value)
+            except (ValueError, TypeError):
+                return False
+            if operator == "gt":
+                return lhs > rhs
+            if operator == "gte":
+                return lhs >= rhs
+            if operator == "lt":
+                return lhs < rhs
+            return lhs <= rhs
+
+        field_str = str(field_value)
+
+        match operator:
+            case "equals" | "eq":
+                return field_str == str(check_value)
+
+            case "not_equals" | "ne":
+                return field_str != str(check_value)
+
+            case "contains":
+                return str(check_value).lower() in field_str.lower()
+
+            case "not_contains":
+                return str(check_value).lower() not in field_str.lower()
+
+            case "regex" | "matches":
+                compiled = _compile_regex(str(check_value))
+                if compiled is None:
+                    return False
+                return bool(compiled.search(field_str))
+
+            case "in_list":
+                if isinstance(check_value, list):
+                    return field_str in check_value
+                return False
+
+            case "not_in_list":
+                if isinstance(check_value, list):
+                    return field_str not in check_value
+                return True
+
+            case "vader_concern":
+                # VADER compound score <= threshold.
+                # check_value: dict like {"threshold": -0.3} (default -0.3)
+                return self._check_vader_concern(field_str, check_value)
+
+            case "encoding_concern":
+                # chardet-backed encoding integrity check (A.7.4).
+                # check_value: dict with optional `min_confidence` (default 0.5)
+                # and `max_replacement_ratio` (default 0.05).
+                return self._check_encoding_concern(field_str, check_value)
+
+            case "entropy_concern":
+                # Shannon entropy outside expected range (A.7.4).
+                # check_value: dict with optional `min` (default 1.5) and
+                # `max` (default 7.5) bits/byte. Stdlib only.
+                return self._check_entropy_concern(field_str, check_value)
+
+            case "incident_concern":
+                # Categorical incident detection (A.8.4).
+                # check_value: dict with optional `categories` list
+                # (subset of safety_refusal/tool_failure/auth_failure/
+                # quota_exceeded/hallucination). Default: all categories.
+                return self._check_incident_concern(field_str, check_value)
+
+            case "commitment_concern":
+                # Customer commitment language detection (A.10.4).
+                # check_value: dict with optional `require_amount` (default
+                # True) and `require_deadline` (default False). Fires when
+                # a commitment verb co-occurs with the configured signals.
+                return self._check_commitment_concern(field_str, check_value)
+
+            case _:
+                logger.debug("Unknown operator: %s", operator)
+                return False
+
+    @staticmethod
+    def _check_vader_concern(text: str, params: Any) -> bool:
+        """Return True if VADER compound score on `text` is <= threshold.
+
+        Args:
+            text: Text to analyse.
+            params: Either a dict with `threshold` key, or a numeric threshold
+                directly. Default threshold is -0.3 (clearly-negative).
+
+        Returns:
+            True iff vaderSentiment is available AND compound score <= threshold.
+            Returns False on empty input or if the library is not installed —
+            sentiment checks no-op rather than crash.
+        """
+        if not text or not text.strip():
+            return False
+
+        analyzer = _get_vader_analyzer()
+        if analyzer is None:
+            return False
+
+        if isinstance(params, dict):
+            threshold = float(params.get("threshold", -0.3))
+        else:
+            try:
+                threshold = float(params)
+            except (TypeError, ValueError):
+                threshold = -0.3
+
+        try:
+            compound = float(analyzer.polarity_scores(text)["compound"])
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.debug("VADER analysis failed: %s", exc)
+            return False
+
+        return compound <= threshold
+
+    @staticmethod
+    def _check_encoding_concern(text: str, params: Any) -> bool:
+        r"""Return True if `text` shows encoding integrity issues.
+
+        Sums multiple deterministic corruption signals against text length:
+          - U+FFFD replacement characters (already-decoded lossy text)
+          - Literal ``�`` escape sequences carried through a JSON
+            / repr layer rather than being decoded
+          - Literal ``\xHH`` hex escapes (raw bytes leaked into a string)
+          - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``Ã©``, ``â€™``)
+        If the corruption ratio exceeds ``max_replacement_ratio`` the
+        check fires. chardet (when installed) is consulted as a
+        secondary low-confidence signal.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        min_confidence = float(params.get("min_confidence", 0.5))
+        max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05))
+        min_corruption_events = int(params.get("min_corruption_events", 2))
+
+        length = max(len(text), 1)
+
+        replacement_chars = text.count("�")
+        literal_ufffd_escapes = text.count("\\ufffd")
+        hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text))
+        mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS)
+
+        # Absolute count of distinct corruption *events* (one per
+        # U+FFFD, one per literal escape sequence, one per mojibake
+        # bigram). Even diluted by a lot of clean text, a few of these
+        # in production output is a strong signal.
+        corruption_events = (
+            replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams
+        )
+        if corruption_events >= min_corruption_events:
+            return True
+
+        # Ratio-based fallback for cases below the absolute floor: still
+        # catches very short payloads where a single corruption char is
+        # disproportionate.
+        # Weight each event by its source-char span so denser corruption
+        # in shorter text trips the ratio sooner:
+        #   U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars,
+        #   mojibake bigram = 2 chars.
+        corruption_chars = (
+            replacement_chars
+            + 6 * literal_ufffd_escapes
+            + 4 * hex_escapes
+            + 2 * mojibake_bigrams
+        )
+        if corruption_chars / length > max_replacement_ratio:
+            return True
+
+        # Secondary: chardet on the encoded bytes. For pure str input
+        # this almost always reports high UTF-8/ASCII confidence (the
+        # branch is intentionally permissive), but it does catch bytes
+        # routed through `repr()` or `__str__` of a `bytes` object that
+        # chardet recognises as a non-UTF8 encoding with low confidence.
+        chardet = _get_chardet()
+        if chardet is None:
+            return False
+        try:
+            detection = chardet.detect(text.encode("utf-8", errors="replace"))
+            confidence = float(detection.get("confidence") or 0.0)
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.debug("chardet detection failed: %s", exc)
+            return False
+
+        return confidence < min_confidence
+
+    @staticmethod
+    def _check_entropy_concern(text: str, params: Any) -> bool:
+        """Return True if Shannon entropy of `text` is outside an expected range.
+
+        Stdlib-only. Entropy is computed in bits per symbol over byte
+        frequencies. English prose typically lands ~3.5–4.5 bits/byte;
+        binary noise approaches 8 bits/byte; constant/repetitive text
+        approaches 0.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        lo = float(params.get("min", 1.5))
+        hi = float(params.get("max", 7.5))
+
+        data = text.encode("utf-8", errors="replace")
+        total = len(data)
+        if total == 0:
+            return False
+
+        counts = Counter(data)
+        entropy = 0.0
+        for c in counts.values():
+            p = c / total
+            entropy -= p * math.log2(p)
+
+        return entropy < lo or entropy > hi
+
+    @staticmethod
+    def _check_incident_concern(text: str, params: Any) -> bool:
+        """Return True if `text` matches any configured incident pattern (A.8.4).
+
+        Categories: safety_refusal, tool_failure, auth_failure,
+        quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to
+        restrict; default scans all categories.
+        """
+        if not text or not text.strip():
+            return False
+
+        if isinstance(params, dict):
+            requested = params.get("categories")
+        else:
+            requested = None
+
+        if not requested:
+            categories = list(_INCIDENT_PATTERNS.keys())
+        else:
+            categories = [c for c in requested if c in _INCIDENT_PATTERNS]
+
+        for category in categories:
+            for pattern in _INCIDENT_PATTERNS[category]:
+                if pattern.search(text):
+                    return True
+        return False
+
+    @staticmethod
+    def _check_commitment_concern(text: str, params: Any) -> bool:
+        """Return True if `text` carries customer-commitment language (A.10.4).
+
+        OR semantics: a commitment-verb match always fires; when
+        ``require_amount`` is true, a currency-anchored amount alone also
+        fires; when ``require_deadline`` is true, a deadline phrase alone
+        also fires. With both flags false the rule matches on verb only
+        (verb-only mode).
+
+        The verb pattern covers first-person promise verbs *and* proposal
+        / SOW commitment markers ("Cost: $X", "fixed scope",
+        "Deliverables", "Timeline: N days", "I propose"). The amount
+        pattern requires a currency marker adjacent to the number so URL
+        fragments don't false-positive.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        require_amount = bool(params.get("require_amount", True))
+        require_deadline = bool(params.get("require_deadline", False))
+
+        verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text))
+
+        # Verb-only mode: neither supporting signal is enabled.
+        if not require_amount and not require_deadline:
+            return verb_match
+
+        amount_match = require_amount and bool(
+            _COMMITMENT_AMOUNT_FALLBACK.search(text)
+        )
+        deadline_match = require_deadline and bool(
+            _COMMITMENT_DEADLINE_PATTERN.search(text)
+        )
+        return verb_match or amount_match or deadline_match
diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py
new file mode 100644
index 0000000..369c1b3
--- /dev/null
+++ b/src/uipath/runtime/governance/native/guardrail_compensation.py
@@ -0,0 +1,312 @@
+"""Compensating governance for disabled centralized guardrails.
+
+When a ``guardrail_fallback`` rule fires (the guardrail is mapped to
+UiPath but the centralized policy is disabled), the framework asks the
+governance-server to run the real guardrail check via its
+``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint.
+
+This module owns only the **local concerns**: a bounded background
+pool that schedules the call without blocking the agent hook, and a
+trace-id capture that runs on the caller thread before the worker hop
+(the worker has no OpenTelemetry context).
+
+The actual HTTP call — URL composition, auth, headers, JSON
+serialisation, env-backed job-context auto-fill — is the
+:class:`uipath.core.governance.GovernanceCompensationProvider`'s job.
+Callers inject a concrete provider implementation, and this module
+just builds the :class:`GovernRequest` wire model and hands it off.
+
+The call is **fire-and-forget**: the server runs the guardrail AND
+writes the audit trace from its side. The agent doesn't inspect the
+response — it only cares about whether the call reached the server.
+
+The compensator is **instance-scoped**: each :class:`GovernanceRuntime`
+owns its own pool and semaphore. ``uipath eval`` parallel runtimes
+don't share workers, queue slots, or saturation state — one runtime's
+spam can't silently drop another's compensation calls.
+
+The compensator does **not** read host env vars and does not resolve
+trace ids itself. It propagates the caller's ``contextvars`` (which
+hold the live OTel span) across the worker-thread hop via
+:func:`contextvars.copy_context`, so the provider can resolve trace
+context at HTTP-call time inside the captured context.
+"""
+
+from __future__ import annotations
+
+import atexit
+import contextvars
+import logging
+import threading
+import weakref
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any
+
+from uipath.core.governance import (
+    FiredRule,
+    GovernanceCompensationProvider,
+    GovernRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ----------------------------------------------------------------------------
+# Process-wide cleanup machinery
+#
+# One ``atexit`` hook walks a ``WeakSet`` of live compensators on exit and
+# closes each. Bounded atexit registrations (N runtimes → 1 hook, not N) and
+# weakref tracking so a disposed compensator can be GC'd. Same pattern as
+# :class:`uipath.runtime.governance._audit.base.AuditManager`.
+# ----------------------------------------------------------------------------
+
+_live_compensators: weakref.WeakSet[GuardrailCompensator] = weakref.WeakSet()
+_atexit_registered = False
+_atexit_lock = threading.Lock()
+
+
+def _process_cleanup_compensators() -> None:
+    """Process-exit handler: close every live compensator."""
+    for compensator in list(_live_compensators):
+        try:
+            compensator.close()
+        except Exception as exc:  # noqa: BLE001 - exit cleanup must not raise
+            logger.debug("Compensator process cleanup error: %s", exc)
+
+
+def _register_compensator_for_cleanup(compensator: GuardrailCompensator) -> None:
+    """Add ``compensator`` to the cleanup set + ensure atexit is wired once."""
+    global _atexit_registered
+    _live_compensators.add(compensator)
+    if _atexit_registered:
+        return
+    with _atexit_lock:
+        if not _atexit_registered:
+            atexit.register(_process_cleanup_compensators)
+            _atexit_registered = True
+
+
+# ----------------------------------------------------------------------------
+# Stateless helpers
+# ----------------------------------------------------------------------------
+
+
+def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]:
+    """Return per-rule metadata for each fired guardrail-fallback rule.
+
+    A guardrail rule fires only when it is mapped to UiPath
+    (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) —
+    see the ``guardrail_fallback`` operator. The validator name (e.g.
+    ``pii_detection``) is read from the rule's ``guardrail_fallback``
+    check config and used as the validator on the compensating call.
+
+    One :class:`FiredRule` entry is emitted per matching
+    ``guardrail_fallback`` condition. Rules in this codebase declare a
+    single fallback condition each, so the returned list has one entry
+    per fired rule in practice; multi-condition rules would emit more
+    than one entry sharing the same ``rule_id``.
+    """
+    out: list[FiredRule] = []
+    for ev in audit.evaluations:
+        if not ev.matched:
+            continue
+        rule = policy_index.get_rule(ev.rule_id)
+        if rule is None:
+            continue
+        for check in rule.checks:
+            for cond in check.conditions:
+                if cond.operator != "guardrail_fallback":
+                    continue
+                if not isinstance(cond.value, dict):
+                    continue
+                # The ``guardrail_fallback`` operator at evaluation time
+                # only matches when ``mapped_to_uipath=True`` AND
+                # ``policy_enabled=False``. We re-check here defensively
+                # so a future code path that bypasses the evaluator (or
+                # a multi-condition rule that fired on a sibling check)
+                # can't trigger a compensation call for a guardrail
+                # that isn't actually disabled.
+                if not bool(cond.value.get("mapped_to_uipath", False)):
+                    continue
+                if bool(cond.value.get("policy_enabled", True)):
+                    continue
+                validator = str(cond.value.get("validator", ""))
+                if validator:
+                    out.append(
+                        FiredRule(
+                            rule_id=ev.rule_id,
+                            rule_name=ev.rule_name,
+                            pack_name=getattr(rule, "pack_name", "") or "",
+                            validator=validator,
+                        )
+                    )
+    return out
+
+
+def _validators(rules: list[FiredRule]) -> list[str]:
+    """Distinct validator names from the fired rules, preserving order."""
+    return list(dict.fromkeys(r.validator for r in rules if r.validator))
+
+
+# ----------------------------------------------------------------------------
+# GuardrailCompensator
+# ----------------------------------------------------------------------------
+
+
+class GuardrailCompensator:
+    """Instance-scoped compensating-governance dispatcher.
+
+    Each :class:`GovernanceRuntime` constructs one. Owns:
+
+    - A :class:`ThreadPoolExecutor` (default 4 workers) that runs the
+      ``/runtime/govern`` POST off the agent's hook thread.
+    - A :class:`threading.BoundedSemaphore` (default cap = workers × 4)
+      that bounds total in-flight submissions (running + queued) so a
+      misbehaving agent firing compensation faster than the server can
+      absorb can't grow memory without limit. Saturated submissions are
+      dropped with a warning.
+
+    Process exit cancels queued work via a single process-level atexit
+    handler (see :func:`_process_cleanup_compensators`); running tasks
+    finish bounded by the provider's HTTP timeout.
+
+    Fire-and-forget: :meth:`submit` returns immediately. The actual HTTP
+    work is delegated to :meth:`GovernanceCompensationProvider.compensate`
+    — this class never touches URL/headers/auth/JSON itself.
+    """
+
+    _DEFAULT_MAX_WORKERS = 4
+    # Queue depth multiplier — total in-flight cap = max_workers × this.
+    _INFLIGHT_OVERSUBSCRIPTION = 4
+
+    def __init__(
+        self,
+        provider: GovernanceCompensationProvider,
+        *,
+        max_workers: int = _DEFAULT_MAX_WORKERS,
+        inflight_oversubscription: int = _INFLIGHT_OVERSUBSCRIPTION,
+    ) -> None:
+        """Construct a compensator bound to one provider.
+
+        The compensator does not carry a trace id. Trace-id resolution
+        is the provider's responsibility at HTTP-call time. To preserve
+        live OTel context across the thread-pool hop (worker threads
+        don't inherit ``contextvars``), :meth:`submit` runs the worker
+        callable inside a snapshot captured via
+        :func:`contextvars.copy_context` — so the caller's OTel span is
+        still visible when the provider runs on the worker.
+
+        Args:
+            provider: The :class:`GovernanceCompensationProvider` that
+                actually fires the ``/runtime/govern`` POST.
+            max_workers: Concurrent worker threads in the pool.
+            inflight_oversubscription: How deep the work queue grows
+                before saturated submissions get dropped. Total cap is
+                ``max_workers * inflight_oversubscription``.
+        """
+        self._provider = provider
+        self._inflight_cap = max_workers * inflight_oversubscription
+        self._pool = ThreadPoolExecutor(
+            max_workers=max_workers,
+            thread_name_prefix="governance-compensation",
+        )
+        self._inflight = threading.BoundedSemaphore(self._inflight_cap)
+        _register_compensator_for_cleanup(self)
+
+    def submit(
+        self,
+        rules: list[FiredRule],
+        data: dict[str, Any],
+        hook: str,
+        src_timestamp: str,
+        agent_name: str,
+        runtime_id: str,
+    ) -> None:
+        """Schedule a /runtime/govern call on the bounded background pool.
+
+        Fire-and-forget. Returns immediately; the call runs on a worker
+        thread. When the in-flight queue is saturated the call is
+        dropped with a warning and the agent continues.
+
+        ``rules`` is the per-rule metadata from :func:`disabled_guardrails`;
+        the validators sent to the guardrail API are derived from it.
+
+        The current :mod:`contextvars` context (which carries the live
+        OpenTelemetry span) is captured here and re-applied inside the
+        worker via :meth:`contextvars.Context.run`. This lets the
+        provider see the live OTel context on the worker thread —
+        without the snapshot the worker would inherit an empty context
+        and the provider could only resolve env-based trace ids.
+
+        Never raises — including when the pool has already been shut down.
+        """
+        if not rules:
+            return
+
+        validators = _validators(rules)
+        if not validators:
+            return
+
+        if not self._inflight.acquire(blocking=False):
+            logger.warning(
+                "Compensation pool saturated (>%d in flight); dropping call "
+                "(validators=[%s])",
+                self._inflight_cap,
+                ", ".join(validators),
+            )
+            return
+
+        request = GovernRequest(
+            validators=validators,
+            rules=rules,
+            data=data,
+            hook=hook,
+            trace_id="",  # the provider fills this from the captured context
+            src_timestamp=src_timestamp,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+        )
+
+        provider = self._provider
+        inflight = self._inflight
+        # Snapshot the caller's contextvars (OTel span lives in there
+        # for Python OTel >= 1.x). The worker runs inside this snapshot
+        # so the provider sees the live span at HTTP-call time.
+        ctx = contextvars.copy_context()
+
+        def _run() -> None:
+            try:
+                provider.compensate(request)
+            except Exception as exc:  # noqa: BLE001 - fail-open by contract
+                logger.warning(
+                    "Compensation worker failed (validators=[%s]): %s",
+                    ", ".join(validators),
+                    exc,
+                )
+            finally:
+                inflight.release()
+
+        try:
+            self._pool.submit(ctx.run, _run)
+        except RuntimeError as exc:
+            # Pool was shut down (atexit, dispose, or test teardown) —
+            # release the semaphore slot we took and log; never raise.
+            self._inflight.release()
+            logger.warning(
+                "Compensation pool unavailable (validators=[%s]): %s",
+                ", ".join(validators),
+                exc,
+            )
+
+    def close(self) -> None:
+        """Cancel queued tasks. Running tasks finish bounded by the provider HTTP timeout.
+
+        ``wait=False`` returns immediately so caller / process shutdown
+        isn't held up; ``cancel_futures=True`` drops anything not yet
+        running. Idempotent — calling close on an already-closed pool
+        is a logged no-op.
+        """
+        try:
+            self._pool.shutdown(wait=False, cancel_futures=True)
+        except Exception as exc:  # noqa: BLE001 - shutdown must not raise
+            logger.debug("Compensator shutdown error: %s", exc)
diff --git a/src/uipath/runtime/governance/native/loader.py b/src/uipath/runtime/governance/native/loader.py
deleted file mode 100644
index 5b45d21..0000000
--- a/src/uipath/runtime/governance/native/loader.py
+++ /dev/null
@@ -1,342 +0,0 @@
-"""Policy pack loader.
-
-Per-runtime policy loading: a :class:`PolicyLoader` instance owns one
-provider plus the cached PolicyIndex and prefetch state. The runtime
-never contacts the governance backend directly; the provider owns the
-wire / transport (auth, retries, telemetry). When no provider is
-supplied, or the provider raises / returns an empty body / yields zero
-rules, the loader returns an empty PolicyIndex and the agent runs
-without any rules.
-
-The loader holds **no module-level state**. ``uipath eval`` can spin up
-multiple ``GovernanceRuntime`` instances in the same process and each
-gets its own loader with its own provider, cache, and selector — no
-cross-instance interference.
-"""
-
-from __future__ import annotations
-
-import logging
-import threading
-import time
-from collections import Counter
-
-import yaml
-from uipath.core.governance import (
-    EnforcementMode,
-    GovernancePolicyProvider,
-    PolicyContext,
-)
-
-from uipath.runtime.governance.native._yaml_to_index import build_policy_index_from_yaml
-from uipath.runtime.governance.native.models import PolicyIndex
-
-logger = logging.getLogger(__name__)
-
-
-class PolicyLoader:
-    """Instance-scoped policy loader bound to one provider.
-
-    Owns the policy-index cache, prefetch coordination, and the
-    conversational selector for a single :class:`GovernanceRuntime`
-    instance. Multiple loaders coexist in the same process without
-    clobbering each other.
-
-    Typical lifecycle::
-
-        loader = PolicyLoader(provider, is_conversational=False)
-        loader.prefetch()                  # non-blocking, optional
-        index = loader.get_policy_index()  # cached after first call
-
-    When ``provider`` is ``None``, every load returns an empty
-    PolicyIndex without invoking anything.
-    """
-
-    # Upper bound on how long :meth:`get_policy_index` waits for an
-    # in-flight prefetch before falling back to an empty PolicyIndex.
-    # The provider owns its own transport timeouts; this is the runtime's
-    # ceiling on blocking the first hook fire.
-    _PROVIDER_WAIT_SECONDS = 10.0
-
-    def __init__(
-        self,
-        provider: GovernancePolicyProvider | None,
-        *,
-        is_conversational: bool | None = None,
-    ) -> None:
-        """Construct a per-runtime policy loader.
-
-        Args:
-            provider: Policy source. ``None`` means no policies will be
-                loaded — the loader yields an empty PolicyIndex.
-            is_conversational: Whether the hosted agent is
-                conversational. Travels in the :class:`PolicyContext`
-                so the provider can select the matching policy view.
-                ``None`` leaves the selector unset — the provider
-                applies its default.
-        """
-        self._provider = provider
-        self._is_conversational = is_conversational
-        self._policy_index: PolicyIndex | None = None
-        # Enforcement mode supplied by the provider on the most recent
-        # load. ``None`` until the first load lands (or whenever the
-        # provider omits a mode); :attr:`enforcement_mode` returns
-        # ``AUDIT`` in that case. Instance-scoped so parallel runtimes
-        # (e.g. ``uipath eval``) don't clobber each other.
-        self._enforcement_mode: EnforcementMode | None = None
-        # ``_prefetch_event`` is set once the background load finishes
-        # (success OR failure); callers of ``get_policy_index`` wait on
-        # it. ``_prefetch_lock`` guards the start-once semantics so
-        # concurrent ``prefetch`` calls don't kick off duplicate threads.
-        self._prefetch_event: threading.Event | None = None
-        self._prefetch_lock = threading.Lock()
-
-    def prefetch(self) -> None:
-        """Kick off a background load of the policy index.
-
-        Non-blocking. Designed to be called as early as possible (at
-        :class:`GovernanceRuntime` init) so the policy fetch overlaps
-        with the rest of agent setup. The result lands in this loader's
-        cache; :meth:`get_policy_index` waits on the prefetch when it's
-        in flight.
-
-        Idempotent: subsequent calls while the first is running are
-        no-ops, and calls after completion are no-ops. No-op when no
-        provider is supplied — there's nothing to fetch.
-        """
-        if self._provider is None:
-            return
-
-        with self._prefetch_lock:
-            if self._policy_index is not None:
-                return  # already loaded
-            if self._prefetch_event is not None:
-                return  # already in flight
-            event = threading.Event()
-            self._prefetch_event = event
-
-        def _worker() -> None:
-            try:
-                loaded = self.load_policy_index()
-            except Exception as exc:  # noqa: BLE001 - logged; first hook will retry sync
-                logger.warning("Policy prefetch failed: %s", exc)
-            else:
-                with self._prefetch_lock:
-                    # Only publish if we're still the live prefetch.
-                    # ``clear_cache`` nulls ``_prefetch_event`` to retire
-                    # an in-flight worker; in that case the loaded value
-                    # belongs to a stale generation and must be dropped
-                    # rather than clobbering the just-cleared state.
-                    if self._prefetch_event is event:
-                        self._policy_index = loaded
-            finally:
-                event.set()
-
-        threading.Thread(
-            target=_worker,
-            name="governance-policy-prefetch",
-            daemon=True,
-        ).start()
-
-    def get_policy_index(self) -> PolicyIndex:
-        """Get the cached policy index, loading if necessary.
-
-        Resolution order on first call:
-          1. If a prefetch (see :meth:`prefetch`) is in flight, wait
-             for it to complete (bounded by ``_PROVIDER_WAIT_SECONDS``).
-          2. Synchronously call :meth:`load_policy_index` (which invokes
-             the provider).
-          3. Empty PolicyIndex when no provider is supplied or the
-             provider fails / returns nothing.
-
-        Result is cached for the loader's lifetime; per-hook evaluation
-        never touches the network. Call :meth:`clear_cache` to force a
-        refetch (mainly for tests).
-        """
-        if self._policy_index is not None:
-            return self._policy_index
-
-        event = self._prefetch_event
-        if event is not None:
-            completed = event.wait(timeout=self._PROVIDER_WAIT_SECONDS)
-            if completed and self._policy_index is not None:
-                return self._policy_index
-            if not completed:
-                # Timeout: cache an empty index so we don't re-wait the
-                # full timeout on every subsequent hook.
-                logger.warning(
-                    "Policy prefetch did not complete in %.1fs; "
-                    "agent will run without any policies",
-                    self._PROVIDER_WAIT_SECONDS,
-                )
-                self._policy_index = PolicyIndex()
-                return self._policy_index
-
-            # Completed but produced no PolicyIndex — the worker hit an
-            # unexpected error. Do NOT cache the empty result: caching
-            # would permanently disable governance for the loader's
-            # lifetime even though a later prefetch / clear_cache could
-            # still recover. Return an empty index for this call only.
-            logger.warning(
-                "Policy prefetch completed but produced no PolicyIndex "
-                "(see prior WARN for the root cause); agent will run "
-                "without any policies for this call"
-            )
-            return PolicyIndex()
-
-        # No prefetch was started (direct callers / tests). Sync load.
-        self._policy_index = self.load_policy_index()
-        return self._policy_index
-
-    def load_policy_index(self) -> PolicyIndex:
-        """Synchronously load and parse the policy index.
-
-        Returns:
-            PolicyIndex parsed from the provider response. Empty
-            PolicyIndex when no provider is supplied, the provider
-            raises, the YAML is malformed, or the response yields
-            zero rules.
-        """
-        start = time.perf_counter()
-
-        index = (
-            self._load_from_provider(self._provider)
-            if self._provider is not None
-            else None
-        )
-
-        if index is not None:
-            self._log_index_summary(index)
-            logger.info(
-                "Policy index ready: source=provider, total_ms=%.1f",
-                (time.perf_counter() - start) * 1000,
-            )
-            return index
-
-        reason = self._empty_index_reason()
-        logger.info(
-            "Policy index ready: source=empty (%s), total_ms=%.1f",
-            reason,
-            (time.perf_counter() - start) * 1000,
-        )
-        return PolicyIndex()
-
-    def _empty_index_reason(self) -> str:
-        """Diagnose why policy loading produced nothing."""
-        if self._provider is None:
-            return "no policy provider supplied"
-        return "provider returned no policies (error / empty body / zero rules)"
-
-    def _load_from_provider(
-        self, provider: GovernancePolicyProvider
-    ) -> PolicyIndex | None:
-        """Fetch and parse the policy index via the supplied provider.
-
-        Applies the provider-supplied enforcement mode as a side effect.
-        Returns ``None`` when the provider raises, when the YAML is
-        malformed, or when the resulting index has no rules — caller
-        returns an empty PolicyIndex in those cases.
-
-        Takes ``provider`` as a parameter (rather than reading
-        ``self._provider``) so the type system can prove the call site
-        is non-None — :meth:`load_policy_index` guards on ``None`` and
-        passes the narrowed value through.
-        """
-        start = time.perf_counter()
-
-        ctx = PolicyContext(is_conversational=self._is_conversational)
-
-        try:
-            response = provider.get_policy(ctx)
-        except Exception as exc:  # noqa: BLE001 - fail-open by contract
-            logger.warning("Policy provider get_policy failed: %s", exc)
-            return None
-
-        if response.mode is not None:
-            self._enforcement_mode = response.mode
-            logger.info("Enforcement mode set from provider: %s", response.mode.value)
-
-        if not response.policies:
-            logger.warning(
-                "Policy provider returned empty policies field; "
-                "agent will run without any policies"
-            )
-            return None
-
-        try:
-            index = build_policy_index_from_yaml(response.policies)
-        except yaml.YAMLError as exc:
-            logger.warning("Policy YAML from provider was malformed: %s", exc)
-            return None
-        except Exception as exc:  # noqa: BLE001 - never let load break agent startup
-            logger.warning("Failed to build PolicyIndex from provider YAML: %s", exc)
-            return None
-
-        if index.total_rules == 0:
-            logger.warning(
-                "Policy YAML from provider yielded zero rules; "
-                "agent will run without any policies"
-            )
-            return None
-
-        elapsed_ms = (time.perf_counter() - start) * 1000
-        logger.info(
-            "Loaded policy index from provider: packs=%s, rules=%d, elapsed_ms=%.1f",
-            index.pack_names,
-            index.total_rules,
-            elapsed_ms,
-        )
-        return index
-
-    def _log_index_summary(self, index: PolicyIndex) -> None:
-        """Log summary of loaded policy index."""
-        hook_counts: Counter[str] = Counter()
-        for rule in index.all_rules:
-            hook_counts[rule.hook.value] += 1
-
-        logger.debug(
-            "Policy packs: %s, total rules: %d, by hook: %s",
-            index.pack_names,
-            index.total_rules,
-            dict(hook_counts),
-        )
-
-    @property
-    def enforcement_mode(self) -> EnforcementMode:
-        """Active enforcement mode for this loader.
-
-        The canonical source is whatever the policy provider supplied on
-        the most recent load. Until that load lands (or if the provider
-        omits a mode), the default is :attr:`EnforcementMode.AUDIT` —
-        evaluate and log without blocking. Defaulting to AUDIT avoids
-        the chicken-and-egg where a DISABLED default would short-circuit
-        evaluation before the background load could ever opt the tenant
-        in.
-        """
-        return (
-            self._enforcement_mode
-            if self._enforcement_mode is not None
-            else EnforcementMode.AUDIT
-        )
-
-    @property
-    def available_packs(self) -> list[str]:
-        """Pack names from the currently loaded policy index.
-
-        Returns whatever the provider supplied on the most recent load.
-        Empty list if no index has been loaded yet.
-        """
-        if self._policy_index is None:
-            return []
-        return self._policy_index.pack_names
-
-    def clear_cache(self) -> None:
-        """Clear the cached policy index and any in-flight prefetch state.
-
-        Next call to :meth:`get_policy_index` will reload from the
-        provider.
-        """
-        with self._prefetch_lock:
-            self._policy_index = None
-            self._prefetch_event = None
-        logger.debug("Policy index cache cleared")
diff --git a/src/uipath/runtime/governance/native/models.py b/src/uipath/runtime/governance/native/models.py
index 125e75e..b8d4adc 100644
--- a/src/uipath/runtime/governance/native/models.py
+++ b/src/uipath/runtime/governance/native/models.py
@@ -74,12 +74,16 @@ class Rule:
 
 @dataclass
 class CheckContext:
-    """Context passed to rule evaluation."""
+    """Context passed to rule evaluation.
+
+    ``trace_id`` is intentionally absent — trace correlation is
+    resolved by the wire-side provider at HTTP-call time, not carried
+    through the runtime evaluation context.
+    """
 
     hook: LifecycleHook
     agent_name: str
     runtime_id: str
-    trace_id: str
 
     # Content fields (populated based on hook)
     agent_input: str = ""
diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py
index c8f9dd9..ab3d177 100644
--- a/src/uipath/runtime/governance/runtime.py
+++ b/src/uipath/runtime/governance/runtime.py
@@ -1,36 +1,45 @@
 """Governance runtime wrapper.
 
-Wraps a :class:`UiPathRuntimeProtocol` delegate so policy data is sourced
-through a :class:`GovernancePolicyProvider`. The provider owns the wire
-/ transport (auth, retries, telemetry); the runtime only consumes the
-parsed :class:`PolicyResponse`. There is no direct backend fallback —
-when ``policy_provider`` is ``None`` the agent runs without any
-governance policies.
-
-The wiring layer (uipath CLI) decides whether to construct
-``GovernanceRuntime`` at all (feature flag, project config, etc.) and
-passes ``is_conversational`` explicitly when it knows the agent type.
-The runtime layer does not introspect the delegate's private attributes
-to discover that.
-
-**Staging caveat — policy loading only, no enforcement yet.** This
-module is the policy-loading scaffold: ``__init__`` constructs an
-instance-scoped :class:`PolicyLoader` and kicks off a background
-prefetch. ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` are
-pure passthroughs — no per-hook policy evaluation runs. The evaluator
-and framework adapter wiring that consumes the loader's policy index
-lands in a follow-up slice. Customers constructing
-:class:`GovernanceRuntime` today get policy loading without policy
-enforcement; this is intentional and will change when the evaluator
-slice merges.
+Wraps a :class:`UiPathRuntimeProtocol` delegate and carries a resolved
+policy snapshot — a :class:`PolicyIndex` and :class:`EnforcementMode`
+supplied by the caller. The wrapper performs no I/O at construction,
+holds no background thread, retains no policy provider, and reads no
+host environment variables.
+
+The caller (typically the host CLI) is expected to:
+
+- ``await provider.get_policy_async(PolicyContext(...))`` itself,
+- compile the response YAML via
+  :func:`uipath.runtime.governance.native.build_policy_index_from_yaml`,
+- skip wrapping entirely when the response mode is
+  :attr:`EnforcementMode.DISABLED`,
+- pass the resolved ``PolicyIndex`` and ``EnforcementMode`` into the
+  constructor.
+
+The wrapper owns the BEFORE_AGENT / AFTER_AGENT lifecycle boundary
+when an evaluator is supplied at construction. Framework adapters
+intentionally skip chain-level events so nested chain runs don't fire
+duplicate boundary evaluations; the runtime layer is the unambiguous
+"one invocation = one boundary" point, so it owns those hooks. Per-step
+hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) are fired by
+adapters that observe per-step events.
+
+Trace-id is intentionally **not** carried on this wrapper. The
+governance compensator captures the live OTel context across the
+thread-pool hop via :func:`contextvars.copy_context`, and the
+injected provider resolves the canonical trace id at HTTP-call time.
+The runtime layer is fully env-free for this path.
 """
 
 from __future__ import annotations
 
+import json
 import logging
 from typing import Any, AsyncGenerator
 
-from uipath.core.governance import GovernancePolicyProvider
+from uipath.core.governance import EnforcementMode
+from uipath.core.governance.exceptions import GovernanceBlockException
+from uipath.core.serialization import serialize_object
 
 from uipath.runtime.base import (
     UiPathExecuteOptions,
@@ -38,89 +47,166 @@
     UiPathStreamOptions,
 )
 from uipath.runtime.events import UiPathRuntimeEvent
-from uipath.runtime.governance.native.loader import PolicyLoader
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+from uipath.runtime.governance.native.models import PolicyIndex
 from uipath.runtime.result import UiPathRuntimeResult
 from uipath.runtime.schema import UiPathRuntimeSchema
 
 logger = logging.getLogger(__name__)
 
 
-class GovernanceRuntime:
+def _serialize_payload(payload: Any) -> str:
+    """Serialize an agent input / output to a string for evaluator checks.
+
+    The native evaluator's BEFORE_AGENT / AFTER_AGENT checks scan a
+    flat string. ``None`` becomes ``""``, ``str`` passes through (so
+    regex / sentiment checks don't see JSON quotes around the bare
+    text), and everything else is normalized via
+    :func:`uipath.core.serialization.serialize_object` (handles
+    Pydantic / dataclass / datetime / nested structures) and then
+    JSON-encoded.
+    """
+    if payload is None:
+        return ""
+    if isinstance(payload, str):
+        return payload
+    try:
+        return json.dumps(serialize_object(payload))
+    except Exception:  # noqa: BLE001 — last-resort string fallback
+        return str(payload)
+
+
+class UiPathGovernedRuntime:
     """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate.
 
-    Constructs an instance-scoped :class:`PolicyLoader` bound to the
-    supplied provider and kicks off a non-blocking prefetch so the
-    policy pack overlaps with the rest of agent setup. When
-    ``policy_provider`` is ``None``, the loader yields an empty
-    PolicyIndex and the agent runs without any governance policies for
-    the lifetime of this instance.
-
-    **Policy loading only — no enforcement yet.** ``execute`` / ``stream``
-    / ``get_schema`` / ``dispose`` are passthroughs to the delegate; no
-    per-hook policy evaluation runs in this slice. The evaluator and
-    framework adapter wiring that consumes the loader's policy index is
-    staged separately.
+    Holds a caller-resolved :class:`PolicyIndex` and
+    :class:`EnforcementMode` for the lifetime of the instance.
+    ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` forward to
+    the delegate.
+
+    When ``evaluator`` is supplied, :meth:`execute` and :meth:`stream`
+    fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after a
+    successful return. Without an evaluator the wrapper is a pure
+    pass-through.
     """
 
     def __init__(
         self,
         delegate: UiPathRuntimeProtocol,
-        policy_provider: GovernancePolicyProvider | None,
+        policy_index: PolicyIndex,
+        enforcement_mode: EnforcementMode,
         *,
-        is_conversational: bool | None = None,
+        evaluator: GovernanceEvaluator | None = None,
+        agent_name: str = "",
+        runtime_id: str = "",
     ):
-        """Initialize the governance runtime.
+        """Initialize the governance runtime with a resolved policy snapshot.
 
         Args:
             delegate: The wrapped runtime to forward execution to.
-            policy_provider: Source of the policy pack. ``None`` means
-                no policies will be loaded — the agent runs without
-                governance for the lifetime of this instance.
-            is_conversational: Whether the hosted agent is
-                conversational. Forwarded into the provider's
-                :class:`PolicyContext` so it can pick the right policy
-                view (conversational vs autonomous). ``None`` (default)
-                leaves the selector unset — the provider applies its
-                default. The wiring layer (uipath CLI) is expected to
-                pass the concrete value when it knows the agent type.
+            policy_index: Resolved :class:`PolicyIndex` built from the
+                provider's :class:`PolicyResponse`. Pass an empty
+                ``PolicyIndex()`` to attach the wrapper without any
+                rules (useful when the wrapper exists for audit
+                emission only).
+            enforcement_mode: Resolved :class:`EnforcementMode` from
+                the provider's :class:`PolicyResponse`. The caller is
+                expected to skip wrapping entirely when the response
+                mode is :attr:`EnforcementMode.DISABLED`; this
+                constructor does not check.
+            evaluator: Optional :class:`GovernanceEvaluator` that
+                drives BEFORE_AGENT / AFTER_AGENT inside
+                :meth:`execute` / :meth:`stream`. When ``None`` the
+                wrapper is a pure passthrough — the caller is expected
+                to fire those evaluations itself.
+            agent_name: Name of the agent (the runtime's entrypoint).
+                Passed through to the evaluator's hook methods.
+            runtime_id: Runtime-instance id (conversation id, job id,
+                or a synthetic per-run id). Passed through so
+                per-runtime state routes cleanly.
         """
         self._delegate = delegate
-        self._loader = PolicyLoader(
-            policy_provider,
-            is_conversational=is_conversational,
-        )
-        self._loader.prefetch()
-
-    @property
-    def loader(self) -> PolicyLoader:
-        """The instance-scoped policy loader.
-
-        Exposed so adapters / evaluators wired into this runtime can
-        call :meth:`PolicyLoader.get_policy_index` at hook time.
+        self._policy_index = policy_index
+        self._enforcement_mode = enforcement_mode
+        self._evaluator = evaluator
+        self._agent_name = agent_name
+        self._runtime_id = runtime_id
+
+    def _fire_before_agent(self, input: Any) -> None:
+        """Fire BEFORE_AGENT when an evaluator is wired; otherwise no-op.
+
+        ``GovernanceBlockException`` propagates — that's how
+        ENFORCE-mode DENY rules halt a run. Anything else is logged
+        and swallowed so a governance bug never breaks the agent.
         """
-        return self._loader
+        if self._evaluator is None:
+            return
+        try:
+            self._evaluator.evaluate_before_agent(
+                agent_input=_serialize_payload(input),
+                agent_name=self._agent_name,
+                runtime_id=self._runtime_id,
+            )
+        except GovernanceBlockException:
+            raise
+        except Exception as exc:  # noqa: BLE001 — never break a run on audit failure
+            logger.warning("BEFORE_AGENT governance evaluation failed: %s", exc)
+
+    def _fire_after_agent(self, result: UiPathRuntimeResult) -> None:
+        """Fire AFTER_AGENT against ``result.output``.
+
+        Same exception policy as :meth:`_fire_before_agent`.
+        """
+        if self._evaluator is None:
+            return
+        try:
+            self._evaluator.evaluate_after_agent(
+                agent_output=_serialize_payload(result.output),
+                agent_name=self._agent_name,
+                runtime_id=self._runtime_id,
+            )
+        except GovernanceBlockException:
+            raise
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("AFTER_AGENT governance evaluation failed: %s", exc)
 
     async def execute(
         self,
         input: dict[str, Any] | None = None,
         options: UiPathExecuteOptions | None = None,
     ) -> UiPathRuntimeResult:
-        """Execute the delegate. Policy evaluation hooks are wired separately."""
-        return await self._delegate.execute(input, options=options)
+        """Execute the delegate, firing BEFORE_AGENT / AFTER_AGENT around it.
+
+        AFTER_AGENT fires only on successful return — if the delegate
+        raises, there's no output to evaluate.
+        """
+        self._fire_before_agent(input)
+        result = await self._delegate.execute(input, options=options)
+        self._fire_after_agent(result)
+        return result
 
     async def stream(
         self,
         input: dict[str, Any] | None = None,
         options: UiPathStreamOptions | None = None,
     ) -> AsyncGenerator[UiPathRuntimeEvent, None]:
-        """Stream events from the delegate. Hooks are wired separately."""
+        """Stream events from the delegate, firing BEFORE_AGENT first.
+
+        AFTER_AGENT fires once a :class:`UiPathRuntimeResult` event is
+        observed in the stream — that's the runtime's contract for
+        signalling a completed invocation. Intermediate state events
+        pass through untouched.
+        """
+        self._fire_before_agent(input)
         async for event in self._delegate.stream(input, options=options):
+            if isinstance(event, UiPathRuntimeResult):
+                self._fire_after_agent(event)
             yield event
 
     async def get_schema(self) -> UiPathRuntimeSchema:
-        """Passthrough schema for the delegate."""
+        """Forward schema lookup to the delegate."""
         return await self._delegate.get_schema()
 
     async def dispose(self) -> None:
-        """Dispose the delegate."""
+        """Forward disposal to the delegate."""
         await self._delegate.dispose()
diff --git a/tests/_helpers.py b/tests/_helpers.py
deleted file mode 100644
index 2d3d924..0000000
--- a/tests/_helpers.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""Shared test-only helpers.
-
-Keeps test concerns out of the production governance package: shared
-stubs live here rather than inside the production modules.
-
-The enforcement-mode reset helper is gone because the mode is now
-instance-scoped on :class:`PolicyLoader` — tests that want a clean
-slate just construct a fresh loader instead of touching a global.
-"""
-
-from __future__ import annotations
-
-import time
-
-from uipath.core.governance import PolicyContext, PolicyResponse
-
-
-class StubPolicyProvider:
-    """Minimal in-memory :class:`GovernancePolicyProvider` for tests.
-
-    Records every :class:`PolicyContext` it receives so tests can assert
-    on the selector that travelled to the provider. Either returns a
-    pre-canned :class:`PolicyResponse` or raises a pre-canned exception;
-    the optional ``slow`` knob lets tests exercise the prefetch-wait
-    path.
-    """
-
-    def __init__(
-        self,
-        response: PolicyResponse | None = None,
-        raises: Exception | None = None,
-        slow: float = 0.0,
-    ):
-        self.calls: list[PolicyContext] = []
-        self._response = response
-        self._raises = raises
-        self._slow = slow
-
-    def get_policy(self, context: PolicyContext) -> PolicyResponse:
-        self.calls.append(context)
-        if self._slow:
-            time.sleep(self._slow)
-        if self._raises is not None:
-            raise self._raises
-        assert self._response is not None
-        return self._response
diff --git a/tests/conftest.py b/tests/conftest.py
index ba76eca..a6c5cd5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -19,7 +19,7 @@ def temp_dir() -> Generator[str, None, None]:
         yield tmp_dir
 
 
-# Governance state — provider, conversational selector, policy cache,
-# enforcement mode — is owned by each :class:`PolicyLoader` instance,
-# so no autouse cross-test reset is needed. Tests that want a clean
-# slate just construct a fresh loader.
+# Governance state is held inline on the :class:`UiPathGovernedRuntime`
+# instance — the host passes a resolved :class:`PolicyIndex` +
+# :class:`EnforcementMode` into the constructor, no module-level
+# state, no cross-test reset needed.
diff --git a/tests/test_audit_manager_lifecycle.py b/tests/test_audit_manager_lifecycle.py
new file mode 100644
index 0000000..66db0b8
--- /dev/null
+++ b/tests/test_audit_manager_lifecycle.py
@@ -0,0 +1,194 @@
+"""Lifecycle tests for :class:`AuditManager`.
+
+Pins the production-readiness invariants of the audit manager:
+
+- Process cleanup uses a single ``atexit`` handler that walks a
+  ``WeakSet`` — so creating many managers in one process doesn't
+  bloat the atexit list and doesn't pin managers in memory.
+- The fork-rebuild path is lock-protected: two threads in a
+  freshly-forked child can't both rebuild the queue/worker
+  concurrently.
+"""
+
+from __future__ import annotations
+
+import gc
+import os
+import threading
+from typing import Any
+from unittest.mock import patch
+
+import pytest
+
+from uipath.runtime.governance._audit import base as audit_base
+from uipath.runtime.governance._audit.base import AuditManager
+
+
+def _bare_manager() -> AuditManager:
+    """Build a manager with no default sinks (no traces sink, no atexit-set add)."""
+    return AuditManager(async_mode=False, register_default_sinks=False)
+
+
+# ---------------------------------------------------------------------------
+# atexit accounting: one process-level hook, no per-instance accumulation
+# ---------------------------------------------------------------------------
+
+
+def test_default_managers_register_once_in_process_atexit() -> None:
+    """Creating N managers must NOT add N entries to interpreter atexit.
+
+    Regression: per-instance ``atexit.register(self._atexit_cleanup)``
+    grew the atexit list linearly and held a strong ref to each manager.
+    The fix routes everyone through one process-level cleanup hook.
+    """
+    with patch.object(audit_base.atexit, "register") as mock_register:
+        # Reset module state so the assertion is deterministic
+        # regardless of test-order side effects.
+        audit_base._atexit_registered = False
+        try:
+            AuditManager(async_mode=False)  # first → registers
+            AuditManager(async_mode=False)  # second → reuses
+            AuditManager(async_mode=False)  # third  → reuses
+            assert mock_register.call_count == 1, (
+                "Each AuditManager must NOT register its own atexit handler"
+            )
+        finally:
+            # Drop test managers from the cleanup set before leaving.
+            audit_base._live_managers.clear()
+
+
+def test_register_default_sinks_false_skips_cleanup_set() -> None:
+    """Bare managers (tests) are not tracked for process cleanup."""
+    m = _bare_manager()
+    assert m not in audit_base._live_managers
+
+
+def test_disposed_manager_can_be_garbage_collected() -> None:
+    """The WeakSet must NOT keep a disposed manager alive.
+
+    Regression: per-instance atexit held a strong ref → disposed
+    managers leaked until process exit. With ``WeakSet`` + a single
+    process hook, dropping the last reference lets the manager GC.
+    """
+    import weakref
+
+    manager = AuditManager(async_mode=False)
+    ref = weakref.ref(manager)
+
+    # Sanity: it's tracked while alive.
+    assert manager in audit_base._live_managers
+
+    # Drop the local strong ref + force collection.
+    del manager
+    gc.collect()
+
+    # The WeakSet entry must be gone (or about to be).
+    assert ref() is None, (
+        "AuditManager was kept alive — strong reference leak in cleanup machinery"
+    )
+
+
+def test_process_cleanup_handles_already_closed_manager() -> None:
+    """If a manager was explicitly closed, the process hook is a no-op for it.
+
+    A manager that called close() during normal lifecycle should not
+    raise from the process-level cleanup — sink list is empty, worker
+    is already joined.
+    """
+    m = AuditManager(async_mode=False)
+    m.close()
+    # Must not raise.
+    audit_base._process_cleanup_managers()
+
+
+# ---------------------------------------------------------------------------
+# Fork-rebuild safety
+# ---------------------------------------------------------------------------
+
+
+def test_ensure_alive_after_fork_is_idempotent_under_concurrent_emit(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Two threads in a fresh-fork child must not both rebuild the queue.
+
+    Without the lock, both threads observed the stale ``_pid``, both
+    constructed a new ``queue.Queue`` / ``threading.Event`` /
+    ``threading.Thread``, and the later writer leaked the earlier
+    one's queue+worker. With the lock the loser sees the updated
+    ``_pid`` after acquiring and returns.
+    """
+    m = AuditManager(async_mode=True, register_default_sinks=False)
+
+    # Capture the post-construction queue + worker so we can detect
+    # whether multiple rebuild winners occurred.
+    original_queue = m._queue
+    original_worker = m._worker_thread
+
+    # Simulate a fork by mutating the recorded pid. We do NOT actually
+    # fork; we just put the manager into "I think I'm in a stale
+    # process" state.
+    m._pid = -1
+
+    barrier = threading.Barrier(8)
+    seen_queues: set[int] = set()
+    seen_workers: set[int] = set()
+    lock = threading.Lock()
+
+    def worker() -> None:
+        barrier.wait()
+        m._ensure_alive_after_fork()
+        with lock:
+            seen_queues.add(id(m._queue))
+            seen_workers.add(id(m._worker_thread))
+
+    threads = [threading.Thread(target=worker) for _ in range(8)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=5.0)
+
+    # Exactly one queue + worker survived the race.
+    assert len(seen_queues) == 1, (
+        f"Multiple queues survived fork-rebuild race: {seen_queues}"
+    )
+    assert len(seen_workers) == 1, (
+        f"Multiple workers survived fork-rebuild race: {seen_workers}"
+    )
+    # And the survivor is NOT the original (we did rebuild).
+    assert original_queue is not m._queue
+    assert original_worker is not m._worker_thread
+    assert m._pid == os.getpid()
+
+    m.close()
+
+
+def test_ensure_alive_after_fork_fast_path_when_pid_unchanged() -> None:
+    """Same-process call must NOT rebuild — sanity check on the fast path."""
+    m = AuditManager(async_mode=True, register_default_sinks=False)
+    original_queue = m._queue
+    original_worker = m._worker_thread
+
+    m._ensure_alive_after_fork()  # same PID — no-op
+
+    assert m._queue is original_queue
+    assert m._worker_thread is original_worker
+    m.close()
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _clean_module_state() -> Any:
+    """Test isolation for the module-level cleanup machinery.
+
+    Sweep the WeakSet between tests so leftovers from one test don't
+    show up in another. Don't reset ``_atexit_registered`` — once
+    Python's ``atexit`` accepts a handler, we shouldn't unregister it
+    just for tests, and the tests above that check registration count
+    do their own reset under a patched ``atexit.register``.
+    """
+    yield
+    audit_base._live_managers.clear()
diff --git a/tests/test_audit_register_sink.py b/tests/test_audit_register_sink.py
new file mode 100644
index 0000000..19c9996
--- /dev/null
+++ b/tests/test_audit_register_sink.py
@@ -0,0 +1,108 @@
+"""Tests for ``AuditManager.register_sink`` failure-counter semantics.
+
+A re-registered same-name sink must NOT inherit the previous instance's
+tripped circuit-breaker state. ``unregister_sink`` already clears these
+counters, but ``register_sink`` also clears them on a successful add as
+defense-in-depth (covers tests / external callers that touch the
+internal counter dicts directly).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from uipath.runtime.governance._audit.base import (
+    AuditEvent,
+    AuditManager,
+    AuditSink,
+    EventType,
+)
+
+
+class _NoopSink(AuditSink):
+    """Sink that records emit calls and never raises."""
+
+    def __init__(self, name: str = "test-sink") -> None:
+        self._name = name
+        self.events: list[AuditEvent] = []
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def emit(self, event: AuditEvent) -> None:
+        self.events.append(event)
+
+
+def _event() -> AuditEvent:
+    return AuditEvent(event_type=EventType.RULE_EVALUATION, agent_name="a")
+
+
+@pytest.fixture
+def manager() -> Any:
+    """Build a fresh, sync-mode AuditManager with no default sinks.
+
+    ``register_default_sinks=False`` keeps the traces sink (and the
+    per-instance atexit hook) out of the test, so assertions about
+    registered sinks see only what the test puts there.
+    """
+    return AuditManager(async_mode=False, register_default_sinks=False)
+
+
+def test_register_clears_stale_failure_counter(manager: AuditManager) -> None:
+    """A new sink with a name that previously tripped starts fresh."""
+    # Simulate prior instance having tripped the circuit-breaker without
+    # going through unregister (e.g. test code or external code that
+    # mutated the counters directly).
+    manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD
+    manager._tripped_sinks.add("test-sink")
+
+    new_sink = _NoopSink(name="test-sink")
+    manager.register_sink(new_sink)
+
+    # Counter and tripped-set must be cleared.
+    assert manager._sink_failures.get("test-sink", 0) == 0
+    assert "test-sink" not in manager._tripped_sinks
+
+    # And the new sink actually receives events (would be skipped if
+    # still considered tripped).
+    manager.emit(_event())
+    assert len(new_sink.events) == 1
+
+
+def test_register_does_not_clear_for_duplicate(manager: AuditManager) -> None:
+    """Re-registering an already-present sink is a no-op (no counter reset)."""
+    sink = _NoopSink(name="test-sink")
+    manager.register_sink(sink)
+
+    # Simulate the existing sink having accumulated some failures.
+    manager._sink_failures["test-sink"] = 3
+
+    # A second register call with the same name should NOT clear those
+    # failures — the duplicate-check fires before the reset.
+    duplicate = _NoopSink(name="test-sink")
+    manager.register_sink(duplicate)
+
+    assert manager._sink_failures["test-sink"] == 3
+
+
+def test_unregister_then_register_starts_fresh(manager: AuditManager) -> None:
+    """The full lifecycle: register → trip → unregister → register again."""
+    sink = _NoopSink(name="test-sink")
+    manager.register_sink(sink)
+    manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD
+    manager._tripped_sinks.add("test-sink")
+
+    manager.unregister_sink("test-sink")
+    # Unregister already clears.
+    assert "test-sink" not in manager._tripped_sinks
+
+    new_sink = _NoopSink(name="test-sink")
+    manager.register_sink(new_sink)
+    assert manager._sink_failures.get("test-sink", 0) == 0
+    assert "test-sink" not in manager._tripped_sinks
+
+    manager.emit(_event())
+    assert len(new_sink.events) == 1
diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py
new file mode 100644
index 0000000..a46149b
--- /dev/null
+++ b/tests/test_commitment_concern.py
@@ -0,0 +1,205 @@
+"""Tests for the commitment_concern check (A.10.4).
+
+The check now uses OR semantics: a verb match, an amount match, or a
+deadline match is each sufficient when its enabling flag is on. With
+both flags false the rule matches verb-only.
+
+The verb pattern also covers proposal / SOW style commitment markers
+("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose")
+so formal-business commitments without first-person verbs still fire.
+
+Amount detection requires a currency marker adjacent to the number to
+prevent URL fragments (forum-post IDs, image dimensions, etc.) from
+false-positiving.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+
+# ---------------------------------------------------------------------------
+# The proposal-style sample that originally slipped through the rule.
+# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables",
+# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID.
+# Triple-quoted so we keep the line breaks the model produced.
+# ---------------------------------------------------------------------------
+SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851).
+
+Here's how I propose we turn your software around quickly:
+
+Plan
+- Triage (logs + reproduce)
+- Quick stabilization
+
+Deliverables
+- Defect triage report
+
+Timeline: 4 days total
+- Day 1: Triage + reproduction
+
+Cost: $780 (fixed for the above scope)
+"""
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "Cost: $780 (fixed for the above scope)",
+        "Deliverables: a, b, c",
+        "Timeline: 4 days total for the whole engagement",
+        "I propose we turn this around in a week",
+        "We will refund the difference",
+        "I'll deliver the report by Friday",
+        "the warranty covers parts only",
+        "fixed price of one hundred dollars",
+    ],
+)
+def test_verb_match_alone_fires(text: str) -> None:
+    """Each verb-style commitment marker fires on its own (verb-only mode)."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": False, "require_deadline": False}
+        )
+        is True
+    )
+
+
+def test_full_proposal_sample_fires() -> None:
+    """The originally-missed proposal output now fires."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            SAMPLE_PROPOSAL,
+            {"require_amount": False, "require_deadline": False},
+        )
+        is True
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "$780",
+        "We charge USD 1,200 per seat",
+        "The fee is 500 EUR",
+    ],
+)
+def test_amount_alone_fires_when_require_amount_true(text: str) -> None:
+    """Currency-anchored amount alone fires under OR semantics."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": False}
+        )
+        is True
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "Task is 75% complete.",
+        "We maintain 99.9% uptime.",
+        "Battery at 50%.",
+        "Score: 12%.",
+    ],
+)
+def test_bare_percentage_does_not_fire(text: str) -> None:
+    """Status-only percentages must not trigger commitment_concern.
+
+    Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount
+    regex, which fired on benign status / progress text. Real
+    percentage-bearing commitments ("we'll give a 20% discount")
+    still fire via the verb pattern.
+    """
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": False}
+        )
+        is False
+    )
+
+
+def test_percentage_with_verb_still_fires() -> None:
+    """A commitment verb co-occurring with a percentage still fires."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "We will refund 100% of the purchase price.",
+            {"require_amount": True, "require_deadline": False},
+        )
+        is True
+    )
+
+
+def test_amount_alone_does_not_fire_when_require_amount_false() -> None:
+    """Amount-only text is silent when require_amount=False and no verb."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "The list price is $780.",
+            {"require_amount": False, "require_deadline": False},
+        )
+        is False
+    )
+
+
+def test_deadline_alone_fires_when_require_deadline_true() -> None:
+    """Deadline phrase alone fires under OR semantics."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "Will be done within 5 days.",
+            {"require_amount": False, "require_deadline": True},
+        )
+        is True
+    )
+
+
+def test_url_fragment_digits_do_not_false_positive() -> None:
+    """A long URL with embedded digits is not a 'commitment'.
+
+    Catches the prior price-parser misbehaviour where Price.fromstring()
+    picked up forum-post IDs (e.g. ``667851``) and conflated them with
+    unrelated currency symbols elsewhere in the text.
+    """
+    text = (
+        "See https://forum.example.com/t/topic/667851 for details — "
+        "no commitment language here."
+    )
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": True}
+        )
+        is False
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "",
+        "   ",
+        "Just chatting about the weather today.",
+        "The product is durable and well-made.",
+    ],
+)
+def test_no_signal_does_not_fire(text: str) -> None:
+    """Text without any commitment signal stays silent regardless of flags."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": True}
+        )
+        is False
+    )
+
+
+def test_non_dict_params_treated_as_defaults() -> None:
+    """``params`` of the wrong type degrades to defaults rather than crashing."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern("we will refund", None)
+        is True
+    )
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "no verbs here", "garbage"
+        )
+        is False
+    )
diff --git a/tests/test_enforcement_mode_default.py b/tests/test_enforcement_mode_default.py
deleted file mode 100644
index 78230fd..0000000
--- a/tests/test_enforcement_mode_default.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""Tests for the default enforcement-mode resolution on :class:`PolicyLoader`.
-
-The default is :attr:`EnforcementMode.AUDIT` so the wrapper attaches at
-runtime construction and the background policy load can run. If the
-provider later returns ``disabled``, the loader records it and
-:attr:`enforcement_mode` flips.
-
-Resolution (per :attr:`PolicyLoader.enforcement_mode`):
-1. The provider-supplied value on the most recent load.
-2. Default :attr:`EnforcementMode.AUDIT`.
-"""
-
-from __future__ import annotations
-
-from uipath.core.governance import EnforcementMode, PolicyResponse
-
-from tests._helpers import StubPolicyProvider
-from uipath.runtime.governance.native.loader import PolicyLoader
-
-
-def test_default_mode_is_audit() -> None:
-    """No provider-supplied mode yet → AUDIT.
-
-    AUDIT is the default so the wrapper attaches and the background
-    policy fetch can run. The backend can flip the mode to DISABLED
-    on fetch when the tenant has no policies.
-    """
-    loader = PolicyLoader(None)
-    assert loader.enforcement_mode is EnforcementMode.AUDIT
-
-
-def test_provider_disabled_wins_over_default() -> None:
-    """A provider supplying DISABLED overrides the AUDIT default."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="")
-    )
-    loader = PolicyLoader(provider)
-    loader.load_policy_index()
-    assert loader.enforcement_mode is EnforcementMode.DISABLED
-
-
-def test_provider_enforce_wins_over_default() -> None:
-    """A provider supplying ENFORCE flips the loader to enforce."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(
-            mode=EnforcementMode.ENFORCE,
-            policies="standard: p\nrules: [{id: r1, hook: before_model, "
-            "checks: [{type: regex, patterns: ['x']}]}]\n",
-        )
-    )
-    loader = PolicyLoader(provider)
-    loader.load_policy_index()
-    assert loader.enforcement_mode is EnforcementMode.ENFORCE
-
-
-def test_loader_with_none_mode_response_keeps_previous_value() -> None:
-    """Provider returning ``mode=None`` doesn't clobber a previously-set mode.
-
-    The wire response model treats ``None`` as "no opinion" — the loader
-    must not overwrite a real value with it. Otherwise a transient
-    provider response could silently demote a tenant's enforcement
-    posture.
-    """
-    p1 = StubPolicyProvider(
-        response=PolicyResponse(
-            mode=EnforcementMode.ENFORCE,
-            policies="standard: p\nrules: [{id: r1, hook: before_model, "
-            "checks: [{type: regex, patterns: ['x']}]}]\n",
-        )
-    )
-    loader = PolicyLoader(p1)
-    loader.load_policy_index()
-    assert loader.enforcement_mode is EnforcementMode.ENFORCE
-
-    # A second provider response that omits mode should not flip back to AUDIT.
-    loader._provider = StubPolicyProvider(
-        response=PolicyResponse(
-            mode=None,
-            policies="standard: p\nrules: [{id: r1, hook: before_model, "
-            "checks: [{type: regex, patterns: ['x']}]}]\n",
-        )
-    )
-    loader.clear_cache()
-    loader.load_policy_index()
-    assert loader.enforcement_mode is EnforcementMode.ENFORCE
-
-
-def test_two_loaders_carry_independent_enforcement_modes() -> None:
-    """The whole point of the refactor: parallel loaders don't share mode.
-
-    Previously :func:`set_enforcement_mode` wrote a module global, so an
-    ENFORCE-mode loader and a DISABLED-mode loader running concurrently
-    in the same process clobbered each other (last writer wins).
-    Instance-scoped mode means each loader's mode is read-isolated.
-    """
-    p_enforce = StubPolicyProvider(
-        response=PolicyResponse(
-            mode=EnforcementMode.ENFORCE,
-            policies="standard: e\nrules: [{id: r1, hook: before_model, "
-            "checks: [{type: regex, patterns: ['x']}]}]\n",
-        )
-    )
-    p_disabled = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="")
-    )
-
-    enforce_loader = PolicyLoader(p_enforce)
-    disabled_loader = PolicyLoader(p_disabled)
-
-    enforce_loader.load_policy_index()
-    disabled_loader.load_policy_index()
-
-    assert enforce_loader.enforcement_mode is EnforcementMode.ENFORCE
-    assert disabled_loader.enforcement_mode is EnforcementMode.DISABLED
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
new file mode 100644
index 0000000..2039182
--- /dev/null
+++ b/tests/test_evaluator.py
@@ -0,0 +1,420 @@
+"""Tests for the audit + enforcement behavior of GovernanceEvaluator.
+
+The evaluator's three load-bearing responsibilities:
+
+1. DISABLED enforcement mode short-circuits — no rules evaluated, no
+   audit events emitted, no exceptions raised.
+2. AUDIT mode evaluates rules and emits audit events, but transforms
+   matched DENY actions into AUDIT so execution continues.
+3. ENFORCE mode evaluates, emits audit, and raises
+   :class:`GovernanceBlockException` when a DENY rule matches.
+
+Plus a fail-safe contract: a misbehaving audit sink must not stop
+evaluation from completing or propagate as an exception. The
+evaluator is constructed with explicit dependencies (audit manager,
+enforcement mode); no process-globals are involved.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from uipath.core.governance import EnforcementMode
+from uipath.core.governance.exceptions import GovernanceBlockException
+from uipath.core.governance.models import Action, LifecycleHook
+
+from uipath.runtime.governance._audit.base import (
+    AuditEvent,
+    AuditManager,
+    AuditSink,
+    EventType,
+)
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+)
+
+# ---------------------------------------------------------------------------
+# Test helpers
+# ---------------------------------------------------------------------------
+
+
+class _CapturingSink(AuditSink):
+    """Audit sink that records every event for assertions."""
+
+    def __init__(self) -> None:
+        self.events: list[AuditEvent] = []
+
+    @property
+    def name(self) -> str:
+        return "capturing"
+
+    def emit(self, event: AuditEvent) -> None:
+        self.events.append(event)
+
+
+def _deny_rule_on_input_contains(needle: str) -> Rule:
+    """Build a rule that DENIES when agent_input contains ``needle``."""
+    return Rule(
+        rule_id="TEST-01",
+        name="Test deny on input",
+        clause="A.1.1",
+        hook=LifecycleHook.BEFORE_AGENT,
+        action=Action.DENY,
+        checks=[
+            Check(
+                conditions=[
+                    Condition(
+                        operator="contains",
+                        field="agent_input",
+                        value=needle,
+                    )
+                ],
+                action=Action.DENY,
+                message=f"Input must not contain {needle!r}",
+            )
+        ],
+    )
+
+
+def _build_index_with(rule: Rule) -> PolicyIndex:
+    """Wrap a single rule in a one-pack PolicyIndex."""
+    idx = PolicyIndex()
+    idx.add_pack(
+        PolicyPack(
+            name="test_pack",
+            version="1.0",
+            description="test",
+            rules=[rule],
+        )
+    )
+    return idx
+
+
+def _ctx(agent_input: str) -> CheckContext:
+    return CheckContext(
+        hook=LifecycleHook.BEFORE_AGENT,
+        agent_name="test-agent",
+        runtime_id="run-1",
+        agent_input=agent_input,
+    )
+
+
+def _build_evaluator(
+    rule: Rule,
+    mode: EnforcementMode,
+    audit_manager: AuditManager | None = None,
+) -> GovernanceEvaluator:
+    """Construct an evaluator with explicit deps — no process-globals involved."""
+    return GovernanceEvaluator(
+        _build_index_with(rule),
+        enforcement_mode=mode,
+        audit_manager=audit_manager,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def audit_setup() -> Any:
+    """Per-test :class:`AuditManager` + capturing sink — no default sinks.
+
+    Returns ``(manager, sink)`` so a test can build evaluators with the
+    manager and inspect emitted events through the sink. Synchronous
+    mode keeps assertions deterministic.
+    """
+    manager = AuditManager(async_mode=False, register_default_sinks=False)
+    sink = _CapturingSink()
+    manager.register_sink(sink)
+    yield manager, sink
+    manager.close()
+
+
+# ---------------------------------------------------------------------------
+# DISABLED mode
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_mode_short_circuits_with_empty_record(audit_setup: Any) -> None:
+    """DISABLED returns an empty AuditRecord and emits nothing."""
+    manager, sink = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.DISABLED,
+        audit_manager=manager,
+    )
+
+    audit = evaluator.evaluate(_ctx("definitely contains secret"))
+
+    assert audit.evaluations == []
+    assert audit.final_action == Action.ALLOW
+    assert audit.metadata["enforcement_mode"] == "disabled"
+    assert sink.events == []
+
+
+def test_disabled_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None:
+    """Even when a DENY rule WOULD match, DISABLED never raises."""
+    manager, _ = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("blocked"),
+        EnforcementMode.DISABLED,
+        audit_manager=manager,
+    )
+
+    # Must not raise.
+    evaluator.evaluate(_ctx("this is blocked"))
+
+
+# ---------------------------------------------------------------------------
+# AUDIT mode
+# ---------------------------------------------------------------------------
+
+
+def test_audit_mode_transforms_deny_to_audit(audit_setup: Any) -> None:
+    """AUDIT mode evaluates rules but never returns a DENY final_action."""
+    manager, _ = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.AUDIT,
+        audit_manager=manager,
+    )
+
+    audit = evaluator.evaluate(_ctx("contains secret data"))
+
+    assert len(audit.evaluations) == 1
+    assert audit.evaluations[0].matched is True
+    assert audit.evaluations[0].action == Action.DENY  # raw rule action preserved
+    assert audit.final_action == Action.AUDIT  # mode-adjusted
+    assert audit.metadata["audit_mode_would_deny"] is True
+
+
+def test_audit_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None:
+    """AUDIT mode never raises GovernanceBlockException, even on a DENY hit."""
+    manager, _ = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("blocked"),
+        EnforcementMode.AUDIT,
+        audit_manager=manager,
+    )
+
+    evaluator.evaluate(_ctx("this is blocked"))  # must not raise
+
+
+def test_audit_mode_emits_per_rule_and_summary_events(audit_setup: Any) -> None:
+    """One rule_evaluation event per rule + one hook_summary per evaluate()."""
+    manager, sink = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.AUDIT,
+        audit_manager=manager,
+    )
+
+    evaluator.evaluate(_ctx("contains secret"))
+
+    rule_events = [
+        e for e in sink.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    summary_events = [
+        e for e in sink.events if e.event_type == EventType.HOOK_END
+    ]
+    assert len(rule_events) == 1
+    assert rule_events[0].hook == "BEFORE_AGENT"
+    assert rule_events[0].data["policy_id"] == "TEST-01"
+    assert rule_events[0].data["matched"] is True
+    assert rule_events[0].data["action"] == "deny"
+    # Mode travels on every event (PR #122 contract).
+    assert rule_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT
+
+    assert len(summary_events) == 1
+    assert summary_events[0].data["matched_rules"] == 1
+    assert summary_events[0].data["final_action"] == "audit"
+    assert summary_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT
+
+
+def test_audit_mode_unmatched_rule_logged_as_allow(audit_setup: Any) -> None:
+    """Unmatched rules still emit a rule_evaluation event with action='allow'."""
+    manager, sink = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.AUDIT,
+        audit_manager=manager,
+    )
+
+    evaluator.evaluate(_ctx("benign user query"))
+
+    rule_events = [
+        e for e in sink.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    assert len(rule_events) == 1
+    assert rule_events[0].data["matched"] is False
+    assert rule_events[0].data["action"] == "allow"
+
+
+# ---------------------------------------------------------------------------
+# ENFORCE mode
+# ---------------------------------------------------------------------------
+
+
+def test_enforce_mode_raises_on_deny_match(audit_setup: Any) -> None:
+    """ENFORCE mode raises GovernanceBlockException when a DENY rule matches."""
+    manager, _ = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("blocked"),
+        EnforcementMode.ENFORCE,
+        audit_manager=manager,
+    )
+
+    with pytest.raises(GovernanceBlockException) as exc_info:
+        evaluator.evaluate(_ctx("input is blocked"))
+
+    exc = exc_info.value
+    assert exc.rule_id == "TEST-01"
+    assert exc.rule_name == "Test deny on input"
+    assert exc.audit_record is not None
+    assert exc.audit_record.final_action == Action.DENY
+
+
+def test_enforce_mode_emits_audit_before_raising(audit_setup: Any) -> None:
+    """The audit trail must be emitted even when the call raises."""
+    manager, sink = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("blocked"),
+        EnforcementMode.ENFORCE,
+        audit_manager=manager,
+    )
+
+    with pytest.raises(GovernanceBlockException):
+        evaluator.evaluate(_ctx("contains blocked"))
+
+    rule_events = [
+        e for e in sink.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    summary_events = [
+        e for e in sink.events if e.event_type == EventType.HOOK_END
+    ]
+    assert len(rule_events) == 1
+    assert summary_events[0].data["final_action"] == "deny"
+    assert summary_events[0].data["enforcement_mode"] == EnforcementMode.ENFORCE
+
+
+def test_enforce_mode_returns_record_when_no_rule_matches(audit_setup: Any) -> None:
+    """No DENY hit → no raise; the AuditRecord is returned normally."""
+    manager, _ = audit_setup
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("blocked"),
+        EnforcementMode.ENFORCE,
+        audit_manager=manager,
+    )
+
+    audit = evaluator.evaluate(_ctx("benign query"))
+
+    assert audit.final_action == Action.ALLOW
+    assert audit.evaluations[0].matched is False
+
+
+# ---------------------------------------------------------------------------
+# Sink-failure isolation + no-audit-manager case
+# ---------------------------------------------------------------------------
+
+
+def test_sink_failure_does_not_propagate_or_block_evaluation(
+    audit_setup: Any,
+) -> None:
+    """A broken sink must not make evaluate() raise or lose its return value.
+
+    Contract: AuditManager wraps each sink's emit() in try/except with a
+    per-sink failure counter (circuit-breaker), so a sink exception
+    never propagates back to the evaluator.
+    """
+    manager, capturing_sink = audit_setup
+
+    class _BrokenSink(AuditSink):
+        @property
+        def name(self) -> str:
+            return "broken"
+
+        def emit(self, event: AuditEvent) -> None:
+            raise RuntimeError("sink broke")
+
+    manager.register_sink(_BrokenSink())
+
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.AUDIT,
+        audit_manager=manager,
+    )
+
+    # Must complete without raising even with a broken sink registered.
+    audit = evaluator.evaluate(_ctx("contains secret"))
+
+    assert audit.final_action == Action.AUDIT
+    # The non-broken capturing sink still got its events.
+    assert any(
+        e.event_type == EventType.RULE_EVALUATION for e in capturing_sink.events
+    )
+
+
+def test_no_audit_manager_short_circuits_emission() -> None:
+    """``audit_manager=None`` is a no-op — evaluation still completes.
+
+    Replaces the previous test that mocked ``get_audit_manager`` to
+    raise. With explicit injection, the equivalent "no manager
+    available" state is simply ``audit_manager=None`` at construction.
+    """
+    evaluator = _build_evaluator(
+        _deny_rule_on_input_contains("secret"),
+        EnforcementMode.AUDIT,
+        audit_manager=None,
+    )
+
+    # Must complete, return record, and not raise.
+    audit = evaluator.evaluate(_ctx("contains secret"))
+
+    assert audit.final_action == Action.AUDIT
+    assert audit.evaluations[0].matched is True
+
+
+# ---------------------------------------------------------------------------
+# Protocol conformance smoke test
+# ---------------------------------------------------------------------------
+
+
+def test_governance_evaluator_satisfies_evaluator_protocol() -> None:
+    """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected.
+
+    Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol —
+    an explicit assignment to the protocol-typed variable documents the
+    structural contract.
+    """
+    from uipath.core.adapters import EvaluatorProtocol
+
+    evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex())
+    assert isinstance(evaluator, EvaluatorProtocol)
+
+
+def test_evaluator_protocol_methods_resolvable_on_concrete() -> None:
+    """Every method the protocol declares must be callable on the concrete impl."""
+    from uipath.core.adapters import EvaluatorProtocol
+
+    evaluator: Any = GovernanceEvaluator(PolicyIndex())
+    for method_name in (
+        "evaluate_before_agent",
+        "evaluate_after_agent",
+        "evaluate_before_model",
+        "evaluate_after_model",
+        "evaluate_tool_call",
+        "evaluate_after_tool",
+    ):
+        assert callable(getattr(evaluator, method_name))
+    # The variable annotation also asserts type compatibility at runtime
+    # because EvaluatorProtocol is @runtime_checkable.
+    assert isinstance(evaluator, EvaluatorProtocol)
diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py
new file mode 100644
index 0000000..32e83c6
--- /dev/null
+++ b/tests/test_evaluator_operators.py
@@ -0,0 +1,672 @@
+"""Tests for ``GovernanceEvaluator`` operators and field resolution.
+
+Covers each operator implemented in :meth:`_apply_operator` plus the
+``_check_*`` helper functions (vader, encoding, entropy, incident,
+commitment) and the ``evaluate_*`` dispatchers.
+"""
+
+from __future__ import annotations
+
+import pytest
+from uipath.core.governance import EnforcementMode
+from uipath.core.governance.models import Action, LifecycleHook
+
+from uipath.runtime.governance.native.evaluator import (
+    _INCIDENT_PATTERNS,
+    GovernanceEvaluator,
+)
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _evaluator() -> GovernanceEvaluator:
+    """Build a GovernanceEvaluator with an empty PolicyIndex (operators only).
+
+    AUDIT is the default mode; operator tests don't care about
+    enforcement and we don't need an audit manager for purely
+    operator-level assertions.
+    """
+    return GovernanceEvaluator(policy_index=PolicyIndex())
+
+
+def _ctx(**fields) -> CheckContext:
+    """Construct a CheckContext with sensible defaults plus overrides."""
+    defaults = dict(
+        hook=LifecycleHook.AFTER_MODEL,
+        agent_name="agent",
+        runtime_id="rt-1",
+    )
+    defaults.update(fields)
+    return CheckContext(**defaults)
+
+
+def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule:
+    return Rule(
+        rule_id="r1",
+        name="r1",
+        clause="",
+        hook=LifecycleHook.AFTER_MODEL,
+        action=Action.AUDIT,
+        checks=[
+            Check(
+                conditions=[
+                    Condition(operator=operator, field=field, value=value, negate=negate)
+                ],
+            )
+        ],
+    )
+
+
+# Mode is per-instance now — tests construct evaluators with the mode
+# they need via the ``enforcement_mode`` kwarg. No process-globals to
+# reset.
+
+
+# ---------------------------------------------------------------------------
+# Field resolution — _get_field_value
+# ---------------------------------------------------------------------------
+
+
+def test_get_field_value_top_level_attr() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="hello")
+    assert ev._get_field_value("model_output", ctx) == "hello"
+
+
+def test_get_field_value_dotted_path_into_dict() -> None:
+    ev = _evaluator()
+    ctx = _ctx(session_state={"tool_calls": 7})
+    assert ev._get_field_value("session_state.tool_calls", ctx) == 7
+
+
+def test_get_field_value_missing_segment_returns_none() -> None:
+    ev = _evaluator()
+    ctx = _ctx()
+    assert ev._get_field_value("nonexistent", ctx) is None
+    assert ev._get_field_value("session_state.absent", ctx) is None
+
+
+# ---------------------------------------------------------------------------
+# Existence / guardrail_fallback (special-cased before the None check)
+# ---------------------------------------------------------------------------
+
+
+def test_exists_true_when_value_present() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="x")
+    assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True
+
+
+def test_exists_false_when_missing() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("exists", None, None) is False
+
+
+def test_not_exists_inverse() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("not_exists", None, None) is True
+    assert ev._apply_operator("not_exists", "x", None) is False
+
+
+def test_guardrail_fallback_mapped_and_disabled_fires() -> None:
+    ev = _evaluator()
+    result = ev._apply_operator(
+        "guardrail_fallback",
+        None,
+        {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"},
+    )
+    assert result is True
+
+
+@pytest.mark.parametrize(
+    "cfg",
+    [
+        {"mapped_to_uipath": False, "policy_enabled": False},
+        {"mapped_to_uipath": True, "policy_enabled": True},
+        {"mapped_to_uipath": False, "policy_enabled": True},
+    ],
+)
+def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("guardrail_fallback", None, cfg) is False
+
+
+def test_guardrail_fallback_non_dict_value_silent() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("guardrail_fallback", None, "string") is False
+
+
+# ---------------------------------------------------------------------------
+# None-field short-circuit (everything except exists / guardrail_fallback)
+# ---------------------------------------------------------------------------
+
+
+def test_other_operators_short_circuit_when_field_is_none() -> None:
+    ev = _evaluator()
+    for op in ("contains", "regex", "in_list", "gt"):
+        assert ev._apply_operator(op, None, "anything") is False, op
+
+
+# ---------------------------------------------------------------------------
+# Numeric operators
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "op,lhs,rhs,expected",
+    [
+        ("gt", 5, 3, True),
+        ("gt", 3, 5, False),
+        ("gt", 3, 3, False),
+        ("gte", 3, 3, True),
+        ("gte", 2, 3, False),
+        ("lt", 1, 3, True),
+        ("lt", 3, 3, False),
+        ("lte", 3, 3, True),
+        ("lte", 4, 3, False),
+    ],
+)
+def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None:
+    assert _evaluator()._apply_operator(op, lhs, rhs) is expected
+
+
+def test_numeric_operators_handle_string_coercion() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("gt", "5", "3") is True
+
+
+def test_numeric_operators_return_false_on_uncoercible() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("gt", "not-a-number", 3) is False
+    assert ev._apply_operator("gt", 3, "not-a-number") is False
+
+
+# ---------------------------------------------------------------------------
+# String operators
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "op,lhs,rhs,expected",
+    [
+        ("equals", "abc", "abc", True),
+        ("equals", "abc", "ABC", False),  # equals is case-sensitive
+        ("eq", "x", "x", True),
+        ("not_equals", "abc", "xyz", True),
+        ("ne", "x", "x", False),
+        ("contains", "Hello World", "world", True),  # case-insensitive
+        ("contains", "Hello", "xyz", False),
+        ("not_contains", "Hello", "xyz", True),
+        ("not_contains", "Hello", "hello", False),
+    ],
+)
+def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None:
+    assert _evaluator()._apply_operator(op, lhs, rhs) is expected
+
+
+def test_regex_matches_pattern() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True
+
+
+def test_regex_matches_alias() -> None:
+    """``matches`` is documented as a synonym for ``regex``."""
+    ev = _evaluator()
+    assert ev._apply_operator("matches", "abc-123", r"\d+") is True
+
+
+def test_regex_invalid_pattern_returns_false() -> None:
+    """Malformed regex is logged and silently returns False."""
+    ev = _evaluator()
+    assert ev._apply_operator("regex", "anything", "(unclosed") is False
+
+
+# ---------------------------------------------------------------------------
+# List operators
+# ---------------------------------------------------------------------------
+
+
+def test_in_list_membership() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True
+    assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False
+
+
+def test_in_list_non_list_value_returns_false() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("in_list", "x", "not a list") is False
+
+
+def test_not_in_list_inverse() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True
+    assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False
+
+
+def test_not_in_list_non_list_value_returns_true() -> None:
+    """``not_in_list`` against a non-list value safely returns True
+    (nothing is in a non-list)."""
+    ev = _evaluator()
+    assert ev._apply_operator("not_in_list", "x", "not a list") is True
+
+
+# ---------------------------------------------------------------------------
+# Unknown operator
+# ---------------------------------------------------------------------------
+
+
+def test_unknown_operator_returns_false() -> None:
+    """Unknown operator strings log a debug message and return False."""
+    ev = _evaluator()
+    assert ev._apply_operator("never_heard_of_this", "x", "y") is False
+
+
+# ---------------------------------------------------------------------------
+# Negate flag — flips the result
+# ---------------------------------------------------------------------------
+
+
+def test_condition_negate_flips_result() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="hello")
+    # contains "hello" → matches; negate inverts to False.
+    cond = Condition(
+        operator="contains", field="model_output", value="hello", negate=True,
+    )
+    assert ev._evaluate_condition(cond, ctx) is False
+    cond2 = Condition(
+        operator="contains", field="model_output", value="world", negate=True,
+    )
+    assert ev._evaluate_condition(cond2, ctx) is True
+
+
+# ---------------------------------------------------------------------------
+# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions
+# ---------------------------------------------------------------------------
+
+
+def test_empty_check_conditions_always_match() -> None:
+    """A check with no conditions trivially matches — surfaces rule shape bugs."""
+    ev = _evaluator()
+    check = Check(conditions=[], logic="all")
+    matched, _ = ev._evaluate_check(check, _ctx())
+    assert matched is True
+
+
+def test_check_logic_all_requires_every_condition() -> None:
+    ev = _evaluator()
+    check = Check(
+        conditions=[
+            Condition(operator="contains", field="model_output", value="a"),
+            Condition(operator="contains", field="model_output", value="missing"),
+        ],
+        logic="all",
+    )
+    matched, _ = ev._evaluate_check(check, _ctx(model_output="a only"))
+    assert matched is False
+
+
+def test_check_logic_any_requires_one_condition() -> None:
+    ev = _evaluator()
+    check = Check(
+        conditions=[
+            Condition(operator="contains", field="model_output", value="present"),
+            Condition(operator="contains", field="model_output", value="absent"),
+        ],
+        logic="any",
+    )
+    matched, detail = ev._evaluate_check(check, _ctx(model_output="present text"))
+    assert matched is True
+    # detail is the check's message on match; empty by default in our builder.
+    assert detail == ""
+
+
+# ---------------------------------------------------------------------------
+# VADER sentiment
+# ---------------------------------------------------------------------------
+
+
+def test_vader_concern_negative_text_fires() -> None:
+    """A clearly-negative sentence trips the default threshold of -0.3."""
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "I absolutely hate this terrible, awful product.", {"threshold": -0.3}
+        )
+        is True
+    )
+
+
+def test_vader_concern_positive_text_does_not_fire() -> None:
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "This is wonderful and I love it!", {"threshold": -0.3}
+        )
+        is False
+    )
+
+
+def test_vader_concern_empty_text_silent() -> None:
+    assert GovernanceEvaluator._check_vader_concern("", {}) is False
+    assert GovernanceEvaluator._check_vader_concern("   ", {}) is False
+
+
+def test_vader_concern_threshold_as_scalar() -> None:
+    """``params`` may be a bare number; the operator coerces."""
+    assert (
+        GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True
+    )
+
+
+def test_vader_concern_invalid_threshold_falls_back() -> None:
+    """Non-numeric scalar params fall back to the documented default."""
+    # "garbage" -> default -0.3 → should still classify clear negative
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "I hate this awful, terrible thing", "garbage"
+        )
+        is True
+    )
+
+
+# ---------------------------------------------------------------------------
+# Encoding integrity
+# ---------------------------------------------------------------------------
+
+
+def test_encoding_concern_clean_text_silent() -> None:
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            "Just a normal English sentence with no corruption.", {}
+        )
+        is False
+    )
+
+
+def test_encoding_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_encoding_concern("", {}) is False
+
+
+def test_encoding_concern_replacement_chars_fire() -> None:
+    """U+FFFD replacement chars are a strong corruption signal."""
+    text = "Hello � � world"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+def test_encoding_concern_mojibake_bigrams_fire() -> None:
+    """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape."""
+    text = "Ã© Ã© hello Ã©"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+def test_encoding_concern_hex_escape_literals_fire() -> None:
+    """Literal ``\\xHH`` sequences mean raw bytes leaked into a string."""
+    text = r"Hello \x80 \x81 \x82 world"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+# ---------------------------------------------------------------------------
+# Entropy (stdlib only — deterministic)
+# ---------------------------------------------------------------------------
+
+
+def test_entropy_concern_normal_english_does_not_fire() -> None:
+    """English prose entropy lands ~3.5–4.5 bits/byte — inside default range."""
+    text = "The quick brown fox jumps over the lazy dog." * 5
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5})
+        is False
+    )
+
+
+def test_entropy_concern_low_entropy_fires() -> None:
+    """Highly repetitive text approaches 0 bits/byte."""
+    text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5})
+        is True
+    )
+
+
+def test_entropy_concern_high_entropy_fires() -> None:
+    """Random-ish bytes approach 8 bits/byte."""
+    # Build text with many distinct chars to push entropy high.
+    text = "".join(chr(c) for c in range(32, 127)) * 5
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0})
+        is True
+    )
+
+
+def test_entropy_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_entropy_concern("", {}) is False
+
+
+def test_entropy_concern_non_dict_params_uses_defaults() -> None:
+    """Non-dict params don't crash; defaults apply."""
+    # Normal English prose still won't trip the default min=1.5, max=7.5 range.
+    text = "The quick brown fox jumps over the lazy dog."
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, "garbage") is False
+    )
+
+
+# ---------------------------------------------------------------------------
+# Incident taxonomy (regex-based, deterministic)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "text,expected_category",
+    [
+        ("I cannot help with that.", "safety_refusal"),
+        ("I'm sorry, but I cannot answer.", "safety_refusal"),
+        ("500 internal server error", "tool_failure"),
+        ("Connection refused", "tool_failure"),
+        ("timed out", "tool_failure"),
+        ("401 unauthorized", "auth_failure"),
+        ("authentication failed", "auth_failure"),
+        ("429", "quota_exceeded"),
+        ("rate limit exceeded", "quota_exceeded"),
+        ("I made that up", "hallucination"),
+        ("I don't actually know", "hallucination"),
+    ],
+)
+def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None:
+    """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar."""
+    assert expected_category in _INCIDENT_PATTERNS
+    assert GovernanceEvaluator._check_incident_concern(text, {}) is True
+
+
+def test_incident_concern_unmatched_silent() -> None:
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "All systems operating normally.", {}
+        )
+        is False
+    )
+
+
+def test_incident_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_incident_concern("", {}) is False
+
+
+def test_incident_concern_category_filter() -> None:
+    """Limit scanning to a subset of categories via ``categories`` param."""
+    # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled,
+    # the scanner should miss it.
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "401 unauthorized", {"categories": ["quota_exceeded"]}
+        )
+        is False
+    )
+    # With auth_failure enabled, it fires.
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "401 unauthorized", {"categories": ["auth_failure"]}
+        )
+        is True
+    )
+
+
+def test_incident_concern_unknown_category_silently_dropped() -> None:
+    """Categories the system doesn't know about are silently ignored."""
+    # Only the unknown category is requested — falls back to no categories,
+    # so even matching text doesn't fire.
+    result = GovernanceEvaluator._check_incident_concern(
+        "401 unauthorized", {"categories": ["unknown_cat_xyz"]}
+    )
+    assert result is False
+
+
+# ---------------------------------------------------------------------------
+# evaluate_* dispatchers — verify they build the right CheckContext
+# ---------------------------------------------------------------------------
+
+
+def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]:
+    """Patch evaluate() to capture the context it receives instead of running rules."""
+    captured: dict = {}
+    ev = _evaluator()
+
+    def _fake_evaluate(ctx):  # type: ignore[no-untyped-def]
+        captured["ctx"] = ctx
+        from datetime import datetime, timezone
+
+        from uipath.core.governance.models import AuditRecord
+
+        return AuditRecord(
+            timestamp=datetime.now(timezone.utc),
+            agent_name=ctx.agent_name,
+            runtime_id=ctx.runtime_id,
+            hook=ctx.hook,
+            evaluations=[],
+            final_action=Action.ALLOW,
+        )
+
+    ev.evaluate = _fake_evaluate  # type: ignore[assignment]
+    return ev, captured
+
+
+def test_evaluate_before_agent_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_before_agent(
+        agent_input="user-text",
+        agent_name="a",
+        runtime_id="r",
+        model_name="gpt-5",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.BEFORE_AGENT
+    assert ctx.agent_input == "user-text"
+    assert ctx.model_name == "gpt-5"
+
+
+def test_evaluate_after_agent_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_agent(
+        agent_output="reply", agent_name="a", runtime_id="r",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_AGENT
+    assert ctx.agent_output == "reply"
+
+
+def test_evaluate_before_model_carries_messages() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_before_model(
+        model_input="prompt",
+        agent_name="a",
+        runtime_id="r",
+        messages=[{"role": "user", "content": "hi"}],
+        model_name="gpt-5",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.BEFORE_MODEL
+    assert ctx.model_input == "prompt"
+    assert ctx.messages == [{"role": "user", "content": "hi"}]
+
+
+def test_evaluate_after_model_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_model(
+        model_output="resp", agent_name="a", runtime_id="r",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_MODEL
+    assert ctx.model_output == "resp"
+
+
+def test_evaluate_tool_call_carries_args() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_tool_call(
+        tool_name="search",
+        tool_args={"q": "x"},
+        agent_name="a",
+        runtime_id="r",
+        session_state={"tool_calls": 1},
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.TOOL_CALL
+    assert ctx.tool_name == "search"
+    assert ctx.tool_args == {"q": "x"}
+    assert ctx.session_state == {"tool_calls": 1}
+
+
+def test_evaluate_after_tool_carries_result() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_tool(
+        tool_name="search",
+        tool_result="some-data",
+        agent_name="a",
+        runtime_id="r",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_TOOL
+    assert ctx.tool_name == "search"
+    assert ctx.tool_result == "some-data"
+
+
+# ---------------------------------------------------------------------------
+# DISABLED mode — evaluate() short-circuits without emitting audit
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_mode_returns_empty_audit_record() -> None:
+    """DISABLED mode short-circuits the rule loop and audit emission."""
+    rule = _rule_with_condition("contains", "model_output", "anything")
+    pack = PolicyPack(name="p", version="1", description="", rules=[rule])
+    idx = PolicyIndex()
+    idx.add_pack(pack)
+    ev = GovernanceEvaluator(
+        policy_index=idx, enforcement_mode=EnforcementMode.DISABLED
+    )
+
+    audit = ev.evaluate(_ctx(model_output="contains anything"))
+    assert audit.final_action == Action.ALLOW
+    assert audit.evaluations == []
diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py
index 810a881..23654a7 100644
--- a/tests/test_governance_runtime.py
+++ b/tests/test_governance_runtime.py
@@ -1,25 +1,29 @@
-"""Tests for the GovernanceRuntime wrapper and the provider loader path.
-
-The runtime no longer introspects the delegate's private attributes to
-discover the conversational flag — the wiring layer passes it
-explicitly. The runtime also no longer reads the governance feature
-flag: the wiring layer decides whether to construct
-:class:`GovernanceRuntime` at all.
+"""Tests for :class:`UiPathGovernedRuntime` — pure resolved-policy wrapper.
+
+The runtime takes an already-resolved :class:`PolicyIndex` +
+:class:`EnforcementMode` at construction (the host fetched the policy
+asynchronously via the :class:`GovernancePolicyProvider` and compiled
+the YAML). Tests here confirm the wrapper holds the snapshot and
+passes execution straight through to the delegate.
+
+``trace_id`` is intentionally NOT on this wrapper — the injected
+provider resolves it at HTTP-call time and the compensator captures
+live OTel context across the pool hop via
+``contextvars.copy_context``. Tests that previously asserted
+``runtime.trace_id`` were dropped along with the property.
 """
 
 from __future__ import annotations
 
 from typing import Any
 
-from uipath.core.governance import (
-    EnforcementMode,
-    PolicyResponse,
-)
+from uipath.core.governance import EnforcementMode
 
-from tests._helpers import StubPolicyProvider
-from uipath.runtime.governance.native.loader import PolicyLoader
+from uipath.runtime.governance.native import (
+    build_policy_index_from_yaml,
+)
 from uipath.runtime.governance.native.models import PolicyIndex
-from uipath.runtime.governance.runtime import GovernanceRuntime
+from uipath.runtime.governance.runtime import UiPathGovernedRuntime
 
 SIMPLE_POLICY_YAML = """
 standard: provider-pack
@@ -33,107 +37,28 @@
 """
 
 
-# Each test constructs a fresh ``PolicyLoader`` / ``GovernanceRuntime``
-# — no module-level state to reset.
-
-
 # ---------------------------------------------------------------------------
-# PolicyLoader — provider plumbing (mode application, context, errors)
+# build_policy_index_from_yaml — host-side compile path
 # ---------------------------------------------------------------------------
 
 
-def test_loader_builds_index_and_applies_mode() -> None:
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML)
-    )
-
-    loader = PolicyLoader(provider)
-    index = loader.load_policy_index()
-
+def test_build_policy_index_from_yaml_compiles_pack() -> None:
+    """The host uses this to turn the provider's YAML response into the snapshot."""
+    index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML)
     assert isinstance(index, PolicyIndex)
     assert index.total_rules == 1
     assert "provider-pack" in index.pack_names
-    assert loader.enforcement_mode == EnforcementMode.ENFORCE
-
-
-def test_loader_passes_is_conversational_in_context() -> None:
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
-    )
-
-    PolicyLoader(provider, is_conversational=True).load_policy_index()
 
-    assert len(provider.calls) == 1
-    assert provider.calls[0].is_conversational is True
-
-
-def test_loader_omits_is_conversational_when_unset() -> None:
-    """``is_conversational=None`` (the default) leaves the selector unset."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
-    )
-
-    PolicyLoader(provider).load_policy_index()
-
-    assert len(provider.calls) == 1
-    assert provider.calls[0].is_conversational is None
-
-
-def test_loader_returns_empty_when_provider_raises() -> None:
-    provider = StubPolicyProvider(raises=RuntimeError("boom"))
-    index = PolicyLoader(provider).load_policy_index()
-    assert index.total_rules == 0
-
-
-def test_loader_returns_empty_on_empty_policies() -> None:
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies="")
-    )
-    index = PolicyLoader(provider).load_policy_index()
-    assert index.total_rules == 0
 
-
-def test_loader_returns_empty_on_zero_rules() -> None:
-    empty_pack_yaml = "standard: empty\nrules: []\n"
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=empty_pack_yaml)
-    )
-    index = PolicyLoader(provider).load_policy_index()
-    assert index.total_rules == 0
-
-
-def test_loader_returns_empty_on_malformed_yaml() -> None:
-    provider = StubPolicyProvider(
-        response=PolicyResponse(
-            mode=EnforcementMode.AUDIT, policies="key: : invalid: : yaml"
-        )
-    )
-    index = PolicyLoader(provider).load_policy_index()
+def test_build_policy_index_from_yaml_empty_yields_empty_index() -> None:
+    """Empty YAML compiles to an empty PolicyIndex — host can pass straight through."""
+    index = build_policy_index_from_yaml("")
+    assert isinstance(index, PolicyIndex)
     assert index.total_rules == 0
 
 
-def test_loader_does_not_change_mode_when_response_mode_is_none() -> None:
-    """Provider returning ``mode=None`` doesn't clobber a previously-set mode."""
-    p1 = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML)
-    )
-    loader = PolicyLoader(p1)
-    loader.load_policy_index()
-    assert loader.enforcement_mode == EnforcementMode.ENFORCE
-
-    # Next load via a different provider that returns mode=None must not
-    # demote the loader's mode back to AUDIT.
-    loader._provider = StubPolicyProvider(
-        response=PolicyResponse(mode=None, policies=SIMPLE_POLICY_YAML)
-    )
-    loader.clear_cache()
-    loader.load_policy_index()
-
-    assert loader.enforcement_mode == EnforcementMode.ENFORCE
-
-
 # ---------------------------------------------------------------------------
-# GovernanceRuntime — passthroughs + loader wiring
+# UiPathGovernedRuntime — passthroughs
 # ---------------------------------------------------------------------------
 
 
@@ -163,57 +88,53 @@ async def dispose(self) -> None:
         self.disposed = True
 
 
-def test_governance_runtime_exposes_loader_bound_to_provider() -> None:
-    """The wrapper builds an instance-scoped PolicyLoader carrying the provider."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
+def _make_runtime(
+    delegate: _StubDelegate | None = None,
+    *,
+    policy_index: PolicyIndex | None = None,
+    enforcement_mode: EnforcementMode = EnforcementMode.AUDIT,
+) -> UiPathGovernedRuntime:
+    """Build a runtime with sensible test defaults."""
+    return UiPathGovernedRuntime(
+        delegate or _StubDelegate(),
+        policy_index if policy_index is not None else PolicyIndex(),
+        enforcement_mode,
     )
 
-    runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider)
 
-    assert isinstance(runtime.loader, PolicyLoader)
-    assert runtime.loader._provider is provider
-
-
-def test_governance_runtime_forwards_is_conversational_to_loader() -> None:
-    """The constructor's explicit ``is_conversational`` reaches PolicyContext."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
-    )
-
-    runtime = GovernanceRuntime(
-        _StubDelegate(), policy_provider=provider, is_conversational=True
-    )
-    # Force the prefetch to land — load synchronously so we can read calls[0].
-    runtime.loader.get_policy_index()
+# ---------------------------------------------------------------------------
+# Snapshot stored internally — not exposed as a public property
+# ---------------------------------------------------------------------------
 
-    assert provider.calls, "provider.get_policy was never invoked"
-    assert provider.calls[0].is_conversational is True
 
+def test_resolved_policy_index_is_held_for_evaluator_use() -> None:
+    """The wrapper stores the resolved snapshot; the evaluator reads it."""
+    index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML)
+    runtime = _make_runtime(policy_index=index)
+    # Internal attribute — verify the wrapper kept the exact instance.
+    assert runtime._policy_index is index
 
-def test_governance_runtime_loader_default_selector_is_none() -> None:
-    """Omitting ``is_conversational`` leaves the selector unset on PolicyContext."""
-    provider = StubPolicyProvider(
-        response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
-    )
 
-    runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider)
-    runtime.loader.get_policy_index()
+def test_enforcement_mode_is_held_for_evaluator_use() -> None:
+    """The wrapper stores the mode supplied at construction."""
+    runtime = _make_runtime(enforcement_mode=EnforcementMode.ENFORCE)
+    assert runtime._enforcement_mode is EnforcementMode.ENFORCE
 
-    assert provider.calls[0].is_conversational is None
 
+def test_empty_policy_index_is_a_valid_construction() -> None:
+    """``PolicyIndex()`` with no packs is acceptable — wrapper attaches without rules."""
+    runtime = _make_runtime(policy_index=PolicyIndex())
+    assert runtime._policy_index.total_rules == 0
 
-def test_governance_runtime_with_none_provider_yields_empty_index() -> None:
-    """No provider → loader yields an empty PolicyIndex, no provider invocation."""
-    runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None)
 
-    index = runtime.loader.get_policy_index()
-    assert index.total_rules == 0
+# ---------------------------------------------------------------------------
+# Passthrough behavior
+# ---------------------------------------------------------------------------
 
 
 async def test_governance_runtime_execute_delegates() -> None:
     delegate = _StubDelegate()
-    runtime = GovernanceRuntime(delegate, policy_provider=None)
+    runtime = _make_runtime(delegate)
 
     result = await runtime.execute({"x": 1})
 
@@ -223,7 +144,7 @@ async def test_governance_runtime_execute_delegates() -> None:
 
 async def test_governance_runtime_stream_delegates() -> None:
     delegate = _StubDelegate()
-    runtime = GovernanceRuntime(delegate, policy_provider=None)
+    runtime = _make_runtime(delegate)
 
     events = [e async for e in runtime.stream({"x": 1})]
 
@@ -233,7 +154,7 @@ async def test_governance_runtime_stream_delegates() -> None:
 
 async def test_governance_runtime_schema_and_dispose_delegate() -> None:
     delegate = _StubDelegate()
-    runtime = GovernanceRuntime(delegate, policy_provider=None)
+    runtime = _make_runtime(delegate)
 
     assert await runtime.get_schema() == "schema"
     await runtime.dispose()
diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py
new file mode 100644
index 0000000..ef6046a
--- /dev/null
+++ b/tests/test_guardrail_compensation.py
@@ -0,0 +1,503 @@
+"""Tests for the instance-scoped GuardrailCompensator.
+
+The runtime layer owns only the bounded background pool and the
+contextvars propagation that keeps live OTel context visible on the
+worker thread. HTTP/auth/URL/header concerns — including ``trace_id``
+resolution — live behind the
+:class:`uipath.core.governance.GovernanceCompensationProvider` protocol
+and are exercised in the concrete provider's own tests.
+
+These tests cover:
+
+- ``disabled_guardrails`` — distilling fired ``guardrail_fallback`` rules
+  into per-rule wire metadata.
+- ``GuardrailCompensator.submit`` — pool routing, in-flight
+  backpressure, shutdown safety, wire-model assembly, and the
+  ``contextvars.copy_context()`` propagation that keeps the agent's
+  OTel span visible inside the worker callable.
+- Cross-instance isolation — two compensators do not share a pool or
+  semaphore.
+- Process-level cleanup — one ``atexit`` registration, weak refs only.
+"""
+
+from __future__ import annotations
+
+import gc
+import threading
+from types import SimpleNamespace
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+from uipath.core.governance import (
+    FiredRule,
+    GovernanceCompensationProvider,
+    GovernRequest,
+)
+
+from uipath.runtime.governance.native import guardrail_compensation
+from uipath.runtime.governance.native.guardrail_compensation import (
+    GuardrailCompensator,
+    disabled_guardrails,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _provider() -> MagicMock:
+    """Mock satisfying the GovernanceCompensationProvider protocol."""
+    return MagicMock(spec=GovernanceCompensationProvider)
+
+
+def _rules(
+    *validators: str,
+    rule_id: str = "R1",
+    rule_name: str = "n",
+    pack: str = "p",
+) -> list[FiredRule]:
+    """Build a list of FiredRule wire models — one per validator."""
+    return [
+        FiredRule(
+            rule_id=rule_id,
+            rule_name=rule_name,
+            pack_name=pack,
+            validator=v,
+        )
+        for v in validators
+    ]
+
+
+def _run_inline(compensator: GuardrailCompensator) -> None:
+    """Replace the pool's ``submit`` with synchronous execution.
+
+    Lets tests assert provider behavior deterministically without
+    relying on wait()/sleep().
+    """
+
+    def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None:
+        # The compensator submits ``ctx.run, _run`` (the bound method
+        # of a captured context plus the callable). Mirror that here so
+        # the captured context still wraps the worker callable.
+        if args:
+            fn(*args, **kwargs)
+        else:
+            fn()
+
+    compensator._pool.submit = _sync_submit  # type: ignore[method-assign]
+
+
+@pytest.fixture(autouse=True)
+def _close_dangling_compensators() -> Any:
+    """Best-effort teardown: close any compensator weak-refs still in the set.
+
+    Each test should call ``compensator.close()``, but a failing
+    assertion mid-test could leak. The sweep prevents pytest from
+    hanging at exit on a leftover worker pool.
+    """
+    yield
+    for compensator in list(guardrail_compensation._live_compensators):
+        try:
+            compensator.close()
+        except Exception:  # noqa: BLE001 - best-effort teardown
+            pass
+    guardrail_compensation._live_compensators.clear()
+
+
+# ---------------------------------------------------------------------------
+# disabled_guardrails
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_guardrails_returns_fired_rule_for_matched_disabled_guardrail() -> None:
+    cond = SimpleNamespace(
+        operator="guardrail_fallback",
+        value={
+            "validator": "pii_detection",
+            "mapped_to_uipath": True,
+            "policy_enabled": False,
+        },
+    )
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="")
+    audit = SimpleNamespace(
+        evaluations=[
+            SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail")
+        ]
+    )
+    policy_index = SimpleNamespace(
+        get_rule=lambda rid: rule if rid == "R1" else None
+    )
+
+    out = disabled_guardrails(audit, policy_index)
+
+    assert len(out) == 1
+    fr = out[0]
+    assert isinstance(fr, FiredRule)
+    assert fr.rule_id == "R1"
+    assert fr.rule_name == "PII guardrail"
+    assert fr.pack_name == ""
+    assert fr.validator == "pii_detection"
+
+
+def test_disabled_guardrails_skips_unmatched_evaluations() -> None:
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: None)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+def test_disabled_guardrails_skips_non_guardrail_conditions() -> None:
+    cond = SimpleNamespace(operator="regex", value="some-pattern")
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])])
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: rule)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+def test_disabled_guardrails_skips_enabled_guardrails() -> None:
+    """Mapped to UiPath AND enabled → no compensation needed."""
+    cond = SimpleNamespace(
+        operator="guardrail_fallback",
+        value={
+            "validator": "pii_detection",
+            "mapped_to_uipath": True,
+            "policy_enabled": True,
+        },
+    )
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="")
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: rule)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+def test_disabled_guardrails_skips_unmapped_guardrails() -> None:
+    """Not mapped to UiPath → server can't fall back; skip."""
+    cond = SimpleNamespace(
+        operator="guardrail_fallback",
+        value={
+            "validator": "pii_detection",
+            "mapped_to_uipath": False,
+            "policy_enabled": False,
+        },
+    )
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="")
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: rule)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+# ---------------------------------------------------------------------------
+# GuardrailCompensator.submit — short-circuits + pool routing + backpressure
+# ---------------------------------------------------------------------------
+
+
+def test_submit_empty_rules_short_circuits() -> None:
+    """No rules → no pool submit, no provider call."""
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+    with patch.object(compensator, "_pool") as mock_pool:
+        compensator.submit([], {}, "before_model", "ts", "a", "r")
+    mock_pool.submit.assert_not_called()
+    provider.compensate.assert_not_called()
+
+
+def test_submit_no_validators_short_circuits() -> None:
+    """Rules with empty validator strings → no call (nothing to dispatch)."""
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+    rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")]
+    with patch.object(compensator, "_pool") as mock_pool:
+        compensator.submit(rules, {}, "before_model", "ts", "a", "r")
+    mock_pool.submit.assert_not_called()
+    provider.compensate.assert_not_called()
+
+
+def test_submit_routes_through_pool() -> None:
+    """A non-empty rules list submits a single task to the pool."""
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+    with patch.object(compensator, "_pool") as mock_pool:
+        compensator.submit(
+            _rules("pii_detection"),
+            {"content": "x"},
+            "before_model",
+            "ts",
+            "agent",
+            "run",
+        )
+    mock_pool.submit.assert_called_once()
+
+
+def test_submit_drops_when_pool_saturated() -> None:
+    """When the in-flight semaphore is exhausted, the call is dropped."""
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+
+    # Force the semaphore into "exhausted" state.
+    drained = threading.BoundedSemaphore(1)
+    drained.acquire()  # next acquire(blocking=False) returns False
+    compensator._inflight = drained
+
+    with patch.object(compensator, "_pool") as mock_pool:
+        compensator.submit(
+            _rules("pii_detection"),
+            {},
+            "before_model",
+            "ts",
+            "agent",
+            "run",
+        )
+
+    mock_pool.submit.assert_not_called()
+    provider.compensate.assert_not_called()
+
+
+def test_submit_swallows_pool_shutdown_runtimeerror() -> None:
+    """If the pool was shut down, submit must not raise."""
+
+    class _ShutdownPool:
+        def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None:
+            raise RuntimeError("cannot schedule new futures after shutdown")
+
+    compensator = GuardrailCompensator(_provider())
+    compensator._pool = _ShutdownPool()  # type: ignore[assignment]
+    compensator._inflight = threading.BoundedSemaphore(4)
+
+    # Must not raise.
+    compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r")
+
+
+# ---------------------------------------------------------------------------
+# GuardrailCompensator.submit — wire-model assembly + provider invocation
+# ---------------------------------------------------------------------------
+
+
+def test_submit_invokes_provider_with_govern_request() -> None:
+    """The provider receives a GovernRequest carrying every wire field.
+
+    ``trace_id`` is left empty on the wire — the injected provider
+    resolves it at HTTP-call time.
+    """
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+    _run_inline(compensator)
+    rules = _rules("pii_detection", "harmful_content")
+
+    compensator.submit(
+        rules,
+        {"content": "x"},
+        "before_model",
+        "2026-06-06T00:00:00Z",
+        "langchain",
+        "patch-langchain",
+    )
+
+    provider.compensate.assert_called_once()
+    (request,) = provider.compensate.call_args.args
+    assert isinstance(request, GovernRequest)
+    # distinct validators drive the guardrail API call
+    assert request.validators == ["pii_detection", "harmful_content"]
+    assert request.rules == rules
+    assert request.data == {"content": "x"}
+    assert request.hook == "before_model"
+    # ``trace_id`` is intentionally empty — the provider resolves at HTTP time.
+    assert request.trace_id == ""
+    assert request.src_timestamp == "2026-06-06T00:00:00Z"
+    assert request.agent_name == "langchain"
+    assert request.runtime_id == "patch-langchain"
+    # Job-context fields are left for the provider to auto-fill from env.
+    assert request.folder_key is None
+    assert request.job_key is None
+    assert request.process_key is None
+    assert request.reference_id is None
+    assert request.agent_version is None
+
+
+def test_submit_dedupes_validators() -> None:
+    """Multiple rules with the same validator collapse on the wire."""
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+    _run_inline(compensator)
+    rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2")
+
+    compensator.submit(rules, {}, "before_model", "ts", "a", "r")
+
+    (request,) = provider.compensate.call_args.args
+    assert request.validators == ["pii_detection"]
+    # Per-rule metadata is preserved (one record per rule even with shared validator).
+    assert len(request.rules) == 2
+
+
+def test_submit_swallows_provider_errors() -> None:
+    """A provider exception must never propagate to the caller / agent."""
+    provider = _provider()
+    provider.compensate.side_effect = RuntimeError("network down")
+    compensator = GuardrailCompensator(provider)
+    _run_inline(compensator)
+
+    # Must not raise.
+    compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r")
+
+    provider.compensate.assert_called_once()
+
+
+def test_submit_releases_semaphore_on_provider_error() -> None:
+    """Provider failure must not leak a semaphore slot."""
+    provider = _provider()
+    provider.compensate.side_effect = RuntimeError("transient")
+    # 4 workers × 1 oversubscription = 4 slots total.
+    compensator = GuardrailCompensator(provider, inflight_oversubscription=1)
+    _run_inline(compensator)
+
+    # Fire 8 — all 8 must reach the provider; the semaphore must release
+    # on each error so the next submit can acquire.
+    for _ in range(8):
+        compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r")
+
+    assert provider.compensate.call_count == 8, (
+        "All 8 submissions should fire — semaphore must release on error"
+    )
+
+
+# ---------------------------------------------------------------------------
+# contextvars propagation — live OTel context visible inside the worker
+# ---------------------------------------------------------------------------
+
+
+def test_submit_propagates_otel_context_to_worker_thread() -> None:
+    """The worker callable runs inside the caller's contextvars snapshot.
+
+    Without ``contextvars.copy_context()``, a worker thread started by
+    ``ThreadPoolExecutor`` would see an empty OTel context — the
+    the provider could only resolve env-based trace ids on the worker.
+    With the snapshot, the worker sees the same live span the agent
+    hook saw, so the provider can resolve the agent's actual trace id.
+    """
+    from opentelemetry import trace
+    from opentelemetry.sdk.trace import TracerProvider
+
+    tracer = TracerProvider().get_tracer("test")
+    provider = _provider()
+    compensator = GuardrailCompensator(provider)
+
+    done = threading.Event()
+    captured: dict[str, Any] = {}
+
+    def _capture(request: GovernRequest) -> None:
+        # Runs on the worker thread but inside the captured context —
+        # the agent's live span should still be visible here.
+        ctx = trace.get_current_span().get_span_context()
+        captured["worker_trace_id_hex"] = (
+            format(ctx.trace_id, "032x") if ctx.is_valid else ""
+        )
+        captured["worker_thread_name"] = threading.current_thread().name
+        done.set()
+
+    provider.compensate.side_effect = _capture
+
+    with tracer.start_as_current_span("agent-run") as span:
+        expected = format(span.get_span_context().trace_id, "032x")
+        compensator.submit(
+            _rules("pii_detection"),
+            {"content": "x"},
+            "before_model",
+            "2026-06-06T00:00:00Z",
+            "agent",
+            "rt",
+        )
+    assert done.wait(timeout=2.0), "compensation worker never ran"
+
+    # Worker ran on the dedicated pool thread (not the caller).
+    assert captured["worker_thread_name"].startswith("governance-compensation")
+    # And the captured contextvars context propagated the OTel span across
+    # the thread hop — the worker sees the same trace_id the agent saw.
+    assert captured["worker_trace_id_hex"] == expected
+
+
+# ---------------------------------------------------------------------------
+# Cross-instance isolation — the architectural motivation for the refactor
+# ---------------------------------------------------------------------------
+
+
+def test_two_compensators_do_not_share_pool_or_semaphore() -> None:
+    """Parallel runtimes cannot saturate each other's compensation pool."""
+    p1 = _provider()
+    p2 = _provider()
+    c1 = GuardrailCompensator(p1)
+    c2 = GuardrailCompensator(p2)
+
+    assert c1._pool is not c2._pool
+    assert c1._inflight is not c2._inflight
+
+    # Drain c1's semaphore to its cap; c2 must remain unaffected.
+    drained = threading.BoundedSemaphore(1)
+    drained.acquire()
+    c1._inflight = drained
+
+    _run_inline(c2)
+    c2.submit(_rules("pii_detection"), {}, "before_model", "ts", "a", "r")
+    p2.compensate.assert_called_once()
+    p1.compensate.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle — bounded atexit + weakref tracking (mirrors AuditManager pattern)
+# ---------------------------------------------------------------------------
+
+
+def test_three_compensators_register_one_process_atexit_hook() -> None:
+    """N compensators → 1 atexit registration, not N.
+
+    Regression: a per-instance ``atexit.register(self.close)`` would
+    grow the atexit list linearly. The fix routes everyone through one
+    process-level cleanup hook keyed by a WeakSet.
+    """
+    with patch.object(guardrail_compensation.atexit, "register") as mock_register:
+        guardrail_compensation._atexit_registered = False
+        GuardrailCompensator(_provider())
+        GuardrailCompensator(_provider())
+        GuardrailCompensator(_provider())
+        assert mock_register.call_count == 1, (
+            "Each compensator must NOT register its own atexit handler"
+        )
+
+
+def test_disposed_compensator_can_be_garbage_collected() -> None:
+    """The WeakSet must NOT keep a disposed compensator alive."""
+    import weakref
+
+    compensator = GuardrailCompensator(_provider())
+    ref = weakref.ref(compensator)
+
+    assert compensator in guardrail_compensation._live_compensators
+
+    compensator.close()
+    del compensator
+    gc.collect()
+
+    assert ref() is None, (
+        "GuardrailCompensator kept alive — strong reference leak in cleanup machinery"
+    )
+
+
+def test_process_cleanup_handles_already_closed_compensator() -> None:
+    """If a compensator was explicitly closed, the process hook is a no-op for it."""
+    c = GuardrailCompensator(_provider())
+    c.close()
+    # Must not raise.
+    guardrail_compensation._process_cleanup_compensators()
+
+
+def test_close_is_idempotent() -> None:
+    """Calling close() twice is a logged no-op, not a crash."""
+    c = GuardrailCompensator(_provider())
+    c.close()
+    c.close()  # must not raise
diff --git a/tests/test_loader.py b/tests/test_loader.py
deleted file mode 100644
index 87e453b..0000000
--- a/tests/test_loader.py
+++ /dev/null
@@ -1,307 +0,0 @@
-"""Tests for the policy loader.
-
-Provider-only world: each :class:`PolicyLoader` is instance-scoped and
-bound to one :class:`GovernancePolicyProvider`. Tests here cover the
-caching, prefetch coordination, and fallback-to-empty behavior
-independent of any specific provider. End-to-end provider plumbing
-(mode application, YAML parsing, runtime wrapper integration) lives in
-:mod:`tests.test_governance_runtime`.
-
-The loader no longer reads the governance feature flag — deciding
-whether governance attaches at all is the wiring layer's concern, not
-the loader's.
-"""
-
-from __future__ import annotations
-
-import threading
-import time
-from typing import Any
-from unittest.mock import patch
-
-from uipath.core.governance import (
-    EnforcementMode,
-    PolicyContext,
-    PolicyResponse,
-)
-
-from tests._helpers import StubPolicyProvider
-from uipath.runtime.governance.native import loader as loader_mod
-from uipath.runtime.governance.native.loader import PolicyLoader
-from uipath.runtime.governance.native.models import PolicyIndex
-
-SIMPLE_POLICY_YAML = """
-standard: test-pack
-version: "1.0"
-rules:
-  - id: r1
-    hook: before_model
-    checks:
-      - type: regex
-        patterns: ["leak"]
-"""
-
-
-def _ok_response() -> PolicyResponse:
-    return PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML)
-
-
-# Each test constructs a fresh ``PolicyLoader`` — no shared state to reset.
-
-
-# ---------------------------------------------------------------------------
-# _empty_index_reason — diagnostic string for the "no policies" log
-# ---------------------------------------------------------------------------
-
-
-def test_empty_index_reason_no_provider() -> None:
-    msg = PolicyLoader(None)._empty_index_reason()
-    assert "no policy provider" in msg
-
-
-def test_empty_index_reason_with_provider() -> None:
-    msg = PolicyLoader(StubPolicyProvider(response=_ok_response()))._empty_index_reason()
-    assert "provider returned no policies" in msg
-
-
-# ---------------------------------------------------------------------------
-# load_policy_index — synchronous entry point
-# ---------------------------------------------------------------------------
-
-
-def test_load_policy_index_empty_when_no_provider() -> None:
-    """No provider supplied → empty PolicyIndex."""
-    index = PolicyLoader(None).load_policy_index()
-    assert isinstance(index, PolicyIndex)
-    assert index.total_rules == 0
-
-
-def test_load_policy_index_uses_provider() -> None:
-    provider = StubPolicyProvider(response=_ok_response())
-
-    index = PolicyLoader(provider).load_policy_index()
-
-    assert isinstance(index, PolicyIndex)
-    assert "test-pack" in index.pack_names
-    assert len(provider.calls) == 1
-
-
-def test_load_policy_index_returns_empty_when_provider_raises() -> None:
-    provider = StubPolicyProvider(raises=RuntimeError("boom"))
-    index = PolicyLoader(provider).load_policy_index()
-    assert index.total_rules == 0
-
-
-# ---------------------------------------------------------------------------
-# get_policy_index — caching
-# ---------------------------------------------------------------------------
-
-
-def test_get_policy_index_caches_after_first_call() -> None:
-    """A second call returns the cached index without re-invoking the provider."""
-    provider = StubPolicyProvider(response=_ok_response())
-    loader = PolicyLoader(provider)
-
-    a = loader.get_policy_index()
-    b = loader.get_policy_index()
-
-    assert a is b
-    assert len(provider.calls) == 1
-
-
-def test_get_policy_index_sync_load_when_no_prefetch() -> None:
-    """Without a prefetch in flight, get_policy_index synchronously loads."""
-    loader = PolicyLoader(StubPolicyProvider(response=_ok_response()))
-    index = loader.get_policy_index()
-    assert index.total_rules == 1
-
-
-def test_get_policy_index_empty_with_no_provider() -> None:
-    """No provider supplied → cached empty index, provider never invoked."""
-    loader = PolicyLoader(None)
-    a = loader.get_policy_index()
-    b = loader.get_policy_index()
-    assert a is b
-    assert a.total_rules == 0
-
-
-# ---------------------------------------------------------------------------
-# Prefetch — idempotency + completion + timeout
-# ---------------------------------------------------------------------------
-
-
-def test_prefetch_no_op_when_provider_is_none() -> None:
-    """No provider → prefetch is a no-op (no thread, no event)."""
-    loader = PolicyLoader(None)
-    loader.prefetch()
-    assert loader._prefetch_event is None
-
-
-def test_prefetch_is_idempotent() -> None:
-    """Second call while first is in flight is a no-op (no second thread)."""
-    block = threading.Event()
-
-    def _slow_get(context: PolicyContext) -> PolicyResponse:
-        block.wait(timeout=2.0)
-        return _ok_response()
-
-    provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})()
-    loader = PolicyLoader(provider)
-
-    loader.prefetch()
-    first_event = loader._prefetch_event
-    loader.prefetch()
-    assert loader._prefetch_event is first_event
-    block.set()
-    if first_event is not None:
-        first_event.wait(timeout=2.0)
-
-
-def test_prefetch_no_op_when_index_already_loaded() -> None:
-    """If the index is already cached, prefetch is a no-op."""
-    provider = StubPolicyProvider(response=_ok_response())
-    loader = PolicyLoader(provider)
-    loader.get_policy_index()  # populate the cache
-
-    loader.prefetch()
-
-    assert len(provider.calls) == 1
-
-
-def test_get_policy_index_waits_for_prefetch_then_returns() -> None:
-    """When a prefetch is in flight, get_policy_index waits for completion."""
-    started = threading.Event()
-    release = threading.Event()
-
-    def _fetch(context: PolicyContext) -> PolicyResponse:
-        started.set()
-        release.wait(timeout=2.0)
-        return _ok_response()
-
-    provider: Any = type("P", (), {"get_policy": staticmethod(_fetch)})()
-    loader = PolicyLoader(provider)
-
-    loader.prefetch()
-    assert started.wait(timeout=2.0)
-    threading.Thread(
-        target=lambda: (time.sleep(0.05), release.set()), daemon=True
-    ).start()
-    index = loader.get_policy_index()
-    assert index.total_rules == 1
-
-
-def test_get_policy_index_logs_when_prefetch_completes_with_empty_index() -> None:
-    """The 'completed but produced no PolicyIndex' branch fires on provider failure.
-
-    Manually wire a completed event without populating ``_policy_index`` —
-    simulates a prefetch worker that hit an unexpected error after the
-    event was claimed but before the index was set.
-    """
-    loader = PolicyLoader(StubPolicyProvider(response=_ok_response()))
-    event = threading.Event()
-    event.set()
-    loader._prefetch_event = event
-
-    with patch.object(loader_mod.logger, "warning") as mock_warning:
-        index = loader.get_policy_index()
-
-    assert index.total_rules == 0
-    assert any(
-        "completed but produced no PolicyIndex" in str(call.args[0])
-        for call in mock_warning.call_args_list
-    )
-
-
-# ---------------------------------------------------------------------------
-# available_packs / clear_cache
-# ---------------------------------------------------------------------------
-
-
-def test_available_packs_before_load_returns_empty() -> None:
-    assert PolicyLoader(None).available_packs == []
-
-
-def test_available_packs_after_load() -> None:
-    loader = PolicyLoader(StubPolicyProvider(response=_ok_response()))
-    loader.get_policy_index()
-    assert "test-pack" in loader.available_packs
-
-
-def test_clear_cache_forces_refetch() -> None:
-    provider = StubPolicyProvider(response=_ok_response())
-    loader = PolicyLoader(provider)
-
-    loader.get_policy_index()
-    loader.clear_cache()
-    loader.get_policy_index()
-
-    assert len(provider.calls) == 2
-
-
-def test_clear_cache_drops_in_flight_worker_result() -> None:
-    """A worker spawned before ``clear_cache`` must not clobber state after it.
-
-    The race: ``prefetch()`` starts a worker, ``clear_cache()`` retires
-    the prefetch event, then the worker finishes and (incorrectly,
-    before the fix) writes its loaded index back over the cleared
-    cache. With the fix the worker checks ``_prefetch_event is event``
-    before publishing and discards its result when orphaned.
-    """
-    block = threading.Event()
-
-    def _slow_get(context: PolicyContext) -> PolicyResponse:
-        block.wait(timeout=2.0)
-        return _ok_response()
-
-    provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})()
-    loader = PolicyLoader(provider)
-
-    loader.prefetch()
-    captured_event = loader._prefetch_event
-    assert captured_event is not None  # prefetch actually started
-
-    # Retire the in-flight worker.
-    loader.clear_cache()
-    assert loader._policy_index is None
-    assert loader._prefetch_event is None
-
-    # Release the worker; let it finish and try to publish.
-    block.set()
-    assert captured_event.wait(timeout=2.0)
-
-    # The orphan worker's result must NOT land in the cache.
-    assert loader._policy_index is None
-
-
-# ---------------------------------------------------------------------------
-# Cross-instance isolation — the whole point of instance-scoped state
-# ---------------------------------------------------------------------------
-
-
-def test_two_loaders_do_not_share_cache() -> None:
-    """Concurrent loaders maintain independent caches.
-
-    ``uipath eval`` runs multiple runtimes in parallel; each gets its
-    own loader and must not leak its cached PolicyIndex into the next.
-    """
-    p1 = StubPolicyProvider(response=_ok_response())
-    p2 = StubPolicyProvider(response=_ok_response())
-    l1 = PolicyLoader(p1)
-    l2 = PolicyLoader(p2)
-
-    l1.get_policy_index()
-    l2.get_policy_index()
-
-    assert len(p1.calls) == 1
-    assert len(p2.calls) == 1
-
-
-def test_two_loaders_carry_independent_conversational_selectors() -> None:
-    """Each loader threads its own selector into PolicyContext."""
-    p1 = StubPolicyProvider(response=_ok_response())
-    p2 = StubPolicyProvider(response=_ok_response())
-    PolicyLoader(p1, is_conversational=True).load_policy_index()
-    PolicyLoader(p2, is_conversational=False).load_policy_index()
-
-    assert p1.calls[0].is_conversational is True
-    assert p2.calls[0].is_conversational is False
diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py
new file mode 100644
index 0000000..e163932
--- /dev/null
+++ b/tests/test_text_extraction.py
@@ -0,0 +1,307 @@
+"""Tests for ``_extract_governable_text`` content extraction.
+
+Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent``
+and ``_check_after_agent``. Pulls clean text out of structured shapes
+(dicts, list-of-blocks, pydantic models) instead of letting dict-repr
+noise leak into the regex-scanned blob.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import pytest
+
+# The wrapper lands in a later slice of the governance stack; skip (don't
+# error at collection) when it isn't present yet.
+_wrapper = pytest.importorskip(
+    "uipath.runtime.governance.wrapper",
+    reason="governance wrapper not yet present in this slice",
+)
+_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP
+_extract_governable_text = _wrapper._extract_governable_text
+
+
+def test_plain_string_passes_through() -> None:
+    assert _extract_governable_text("hello world") == "hello world"
+
+
+def test_none_returns_empty() -> None:
+    assert _extract_governable_text(None) == ""
+
+
+def test_dict_with_content_key_extracts_content_first() -> None:
+    """The classic coded-agent output shape — content comes through clean."""
+    out = _extract_governable_text(
+        {"content": "Estimated cost: $780", "_meta": {"id": "abc"}}
+    )
+    assert out.startswith("Estimated cost: $780")
+    # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``.
+    assert "{'content'" not in out
+    assert "'_meta'" not in out
+
+
+def test_dict_priority_keys_lead() -> None:
+    """``content`` / ``text`` / etc. lead before remaining keys."""
+    out = _extract_governable_text(
+        {"trailing_meta": "noise-meta", "content": "primary-text"}
+    )
+    assert out.index("primary-text") < out.index("noise-meta")
+
+
+def test_list_of_text_blocks_concatenates() -> None:
+    """Anthropic-style content blocks."""
+    out = _extract_governable_text(
+        [
+            {"type": "text", "text": "first part"},
+            {"type": "image", "source": {"data": "..."}},
+            {"type": "text", "text": "second part"},
+        ]
+    )
+    assert "first part" in out
+    assert "second part" in out
+
+
+def test_openai_function_call_shape_extracts_arguments() -> None:
+    """``arguments`` field on OpenAI-style function-call blocks."""
+    out = _extract_governable_text(
+        [
+            {
+                "type": "function_call",
+                "name": "end_execution",
+                "arguments": '{"content":"Cost: $1,200"}',
+                "id": "fc_abc",
+            }
+        ]
+    )
+    assert "Cost: $1,200" in out
+
+
+def test_numeric_scalars_are_skipped() -> None:
+    """Numbers / booleans aren't governance text — they shouldn't pad the blob."""
+    out = _extract_governable_text(
+        {"content": "hello", "count": 42, "ok": True, "rate": 3.14}
+    )
+    assert out == "hello"
+
+
+def test_pydantic_like_model_dump_is_walked() -> None:
+    """Anything with ``model_dump()`` is walked as its dict form."""
+
+    class Stub:
+        def model_dump(self) -> dict:
+            return {"content": "from pydantic"}
+
+    assert _extract_governable_text(Stub()) == "from pydantic"
+
+
+def test_dataclass_via_dict_method() -> None:
+    """Objects exposing a ``dict()`` callable also walk via that path."""
+
+    class Stub:
+        def dict(self) -> dict:
+            return {"content": "from dict"}
+
+    assert _extract_governable_text(Stub()) == "from dict"
+
+
+def test_plain_object_attribute_fallback() -> None:
+    """Public attributes on opaque objects feed the walker."""
+
+    @dataclass
+    class Result:
+        content: str
+        _private: str = "ignored"
+
+    out = _extract_governable_text(Result(content="visible"))
+    assert "visible" in out
+    assert "ignored" not in out
+
+
+def test_cycle_in_structure_does_not_recurse_forever() -> None:
+    a: dict = {"content": "outer"}
+    b: dict = {"loop": a}
+    a["loop"] = b
+    # Should return without recursing infinitely.
+    out = _extract_governable_text(a)
+    assert "outer" in out
+
+
+def test_text_is_capped_at_budget() -> None:
+    """Long content is truncated so a runaway payload can't dominate scans."""
+    big = "x" * (_GOVERNANCE_TEXT_CAP + 1000)
+    out = _extract_governable_text(big)
+    assert len(out) == _GOVERNANCE_TEXT_CAP
+
+
+def test_nested_dict_content_extracted() -> None:
+    """LangGraph-style state with messages nested under a key."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "hi"},
+                {"role": "assistant", "content": "Cost: $50"},
+            ]
+        }
+    )
+    assert "Cost: $50" in out
+
+
+def test_unknown_block_type_with_no_text_returns_empty() -> None:
+    """Image-only block with no text payload contributes nothing."""
+    out = _extract_governable_text(
+        [{"type": "image", "source": {"type": "base64", "data": "..."}}]
+    )
+    # Could be empty or contain just the base64 data — but should NOT
+    # contain Python dict syntax characters that the old path emitted.
+    assert "{'type'" not in out
+
+
+# ---------------------------------------------------------------------------
+# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat).
+# ---------------------------------------------------------------------------
+
+
+def test_budget_cap_is_64k() -> None:
+    """Documents the cap so a future drop won't go unnoticed."""
+    assert _GOVERNANCE_TEXT_CAP == 64000
+
+
+# ---------------------------------------------------------------------------
+# Reverse list iteration — latest entry gets the budget first.
+# ---------------------------------------------------------------------------
+
+
+def test_lists_are_walked_in_reverse() -> None:
+    """Latest list entry leads the extracted blob.
+
+    Critical for chat history: the new user message lives at the end of
+    the messages list and must be visible even when prior turns would
+    otherwise fill the budget first.
+    """
+    out = _extract_governable_text(
+        [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}]
+    )
+    assert out.index("latest") < out.index("middle") < out.index("earliest")
+
+
+def test_long_chat_history_keeps_latest_user_message() -> None:
+    """A long history must not push the latest message out of the budget.
+
+    Regression for the prior 8K-cap + forward-walk combination, which
+    silently dropped the latest user message once the conversation
+    grew past ~7,800 chars of prior content.
+    """
+    bulky_prior = "x" * 2000
+    messages = [{"role": "user", "content": bulky_prior}] * 40  # ~80K chars
+    messages.append({"role": "user", "content": "Cost: $1,200 — latest"})
+
+    out = _extract_governable_text({"messages": messages})
+    assert "Cost: $1,200 — latest" in out
+
+
+# ---------------------------------------------------------------------------
+# latest_only — BEFORE_AGENT in a conversational agent
+# ---------------------------------------------------------------------------
+
+
+def test_latest_only_extracts_just_the_last_list_item() -> None:
+    """``latest_only=True`` drops every list entry but the last one."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "old message"},
+                {"role": "assistant", "content": "old response"},
+                {"role": "user", "content": "Cost: $1,200"},
+            ]
+        },
+        latest_only=True,
+    )
+    assert "Cost: $1,200" in out
+    assert "old message" not in out
+    assert "old response" not in out
+
+
+def test_latest_only_resets_inside_chosen_item() -> None:
+    """Multi-block content inside the latest message is still walked fully.
+
+    ``latest_only`` reduces the OUTER list (chat history) to its last
+    entry, but multi-block content (text + tool_call + thinking)
+    inside that latest message must still be extracted in full —
+    otherwise we'd lose answer text that arrives in a non-final block.
+    """
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "old"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {"type": "text", "text": "part A"},
+                        {
+                            "type": "function_call",
+                            "arguments": '{"answer":"part B"}',
+                        },
+                    ],
+                },
+            ]
+        },
+        latest_only=True,
+    )
+    assert "part A" in out
+    assert "part B" in out
+    assert "old" not in out
+
+
+def test_latest_only_top_level_list() -> None:
+    """``latest_only`` applies when the input itself is a list."""
+    out = _extract_governable_text(
+        [
+            {"content": "history item 1"},
+            {"content": "history item 2"},
+            {"content": "latest input"},
+        ],
+        latest_only=True,
+    )
+    assert "latest input" in out
+    assert "history item 1" not in out
+    assert "history item 2" not in out
+
+
+def test_latest_only_default_false_still_walks_all() -> None:
+    """Default behavior unchanged — AFTER_AGENT etc. still see everything."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "first"},
+                {"role": "user", "content": "second"},
+            ]
+        }
+    )
+    assert "first" in out
+    assert "second" in out
+
+
+def test_latest_only_empty_list_is_empty() -> None:
+    """Empty history → empty extraction."""
+    assert _extract_governable_text({"messages": []}, latest_only=True) == ""
+
+
+def test_messages_is_a_priority_content_key() -> None:
+    """``messages`` (plural) leads ahead of non-priority keys.
+
+    Without ``messages`` in the priority list, an input that also
+    carries siblings like ``thread_id`` / ``metadata`` could siphon
+    budget before the actual chat history is walked.
+    """
+    out = _extract_governable_text(
+        {
+            "thread_id": "abc-xyz",
+            "metadata": {"foo": "bar"},
+            "messages": [{"role": "user", "content": "primary content"}],
+        }
+    )
+    assert "primary content" in out
+    assert out.index("primary content") < (
+        out.find("abc-xyz") if "abc-xyz" in out else len(out)
+    )
diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py
new file mode 100644
index 0000000..ce09845
--- /dev/null
+++ b/tests/test_traces_severity.py
@@ -0,0 +1,269 @@
+"""Tests for trace-span verbosity / status semantics.
+
+``TracesAuditSink`` emits an OpenTelemetry span for every governance
+hook end and every rule evaluation. The contract follows §4 of the
+cross-product unification doc — verdict is split into ``evaluator_result``
+(what the rule decided, mode-independent) and ``action_applied`` (what
+actually happened, derived from evaluator_result + mode).
+
+Mode travels with the event (set by the evaluator from the per-runtime
+:attr:`UiPathGovernedRuntime.enforcement_mode` the host supplied) so
+parallel runtimes running different modes don't cross-contaminate the
+sink's view.
+
+- ``verbosityLevel = 4`` (Error) and ``StatusCode.ERROR`` fire **only**
+  when ``action_applied = DENY`` — i.e. the runtime actually blocked
+  the agent (ENFORCE mode + configured action ``deny``).
+- ``verbosityLevel = 3`` (Warning) and ``Status.UNSET`` for advisory
+  outcomes (``action_applied`` in ``{AUDIT, HITL}``). HITL is its own
+  spec bucket — escalation pauses for human review, it doesn't fail
+  the run, so it stays Warning even in ENFORCE mode.
+- Hook spans never set Status, regardless of mode or final_action.
+  They're summary containers; severity belongs on the per-rule span.
+- ``ALLOW`` / ``NONE`` results leave verbosityLevel unset (Orchestrator
+  default = 2, Information) and never call set_status.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+from uipath.core.governance import EnforcementMode
+
+from uipath.runtime.governance._audit.base import AuditEvent, EventType
+from uipath.runtime.governance._audit.traces import TracesAuditSink
+
+
+@pytest.fixture
+def captured_span(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
+    """Wire ``TracesAuditSink`` to a mock tracer and return the span mock."""
+    span = MagicMock(name="span")
+    tracer = MagicMock(name="tracer")
+    tracer.start_as_current_span.return_value.__enter__.return_value = span
+    tracer.start_as_current_span.return_value.__exit__.return_value = False
+    monkeypatch.setattr(TracesAuditSink, "_get_tracer", lambda self: tracer)
+    return span
+
+
+def _hook_event(final_action: str, mode: EnforcementMode) -> AuditEvent:
+    return AuditEvent(
+        event_type=EventType.HOOK_END,
+        agent_name="agent",
+        hook="after_model",
+        data={
+            "total_rules": 1,
+            "matched_rules": 1 if final_action != "allow" else 0,
+            "final_action": final_action,
+            "enforcement_mode": mode,
+        },
+    )
+
+
+def _rule_event(
+    matched: bool, action: str, mode: EnforcementMode = EnforcementMode.AUDIT
+) -> AuditEvent:
+    return AuditEvent(
+        event_type=EventType.RULE_EVALUATION,
+        agent_name="agent",
+        hook="after_model",
+        data={
+            "policy_id": "A.10.4",
+            "rule_name": "commitment-language",
+            "pack_name": "iso42001",
+            "matched": matched,
+            "action": action,
+            "enforcement_mode": mode,
+            "status": "MATCHED" if matched else "PASS",
+            "detail": "Customer-binding commitment detected.",
+        },
+    )
+
+
+def _span_attrs(span: MagicMock) -> dict[str, object]:
+    """Return a mapping of attribute name → value for set_attribute calls."""
+    attrs: dict[str, object] = {}
+    for call in span.set_attribute.call_args_list:
+        key, value = call.args
+        attrs[key] = value
+    return attrs
+
+
+# ---------------------------------------------------------------------------
+# Hook span — never marked ERROR
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "final_action,mode",
+    [
+        ("deny", EnforcementMode.ENFORCE),
+        ("deny", EnforcementMode.AUDIT),
+        ("audit", EnforcementMode.AUDIT),
+        ("escalate", EnforcementMode.AUDIT),
+        ("allow", EnforcementMode.AUDIT),
+    ],
+)
+def test_hook_span_never_sets_error(
+    captured_span: MagicMock, final_action: str, mode: EnforcementMode
+) -> None:
+    """Hook spans are summary containers — they never carry an ERROR Status."""
+    sink = TracesAuditSink()
+    sink.emit(_hook_event(final_action=final_action, mode=mode))
+    assert not captured_span.set_status.called, (
+        f"Hook span should never set_status; called with "
+        f"final_action={final_action!r}, mode={mode!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Rule span — enforce-mode DENY is the only Status.ERROR case
+# ---------------------------------------------------------------------------
+
+
+def test_enforce_mode_deny_is_error(captured_span: MagicMock) -> None:
+    """Enforce mode + action=deny = real block → verbosityLevel=4 + Status.ERROR."""
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.ENFORCE))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 4
+    assert attrs.get("uipath_governance.evaluator_result") == "DENY"
+    assert attrs.get("uipath_governance.action_applied") == "DENY"
+    assert attrs.get("uipath_governance.mode") == "ENFORCE"
+
+    assert captured_span.set_status.called, (
+        "Status.ERROR must fire for enforce-mode deny violation"
+    )
+    (status_arg,) = captured_span.set_status.call_args.args
+    from opentelemetry.trace import Status, StatusCode
+
+    assert isinstance(status_arg, Status)
+    assert status_arg.status_code is StatusCode.ERROR
+    assert "commitment-language" in status_arg.description
+    assert "deny" in status_arg.description
+
+
+def test_enforce_mode_escalate_is_hitl_warning(captured_span: MagicMock) -> None:
+    """Enforce mode + action=escalate = HITL pause, not a block.
+
+    HITL is its own spec bucket distinct from DENY — escalation pauses
+    for human review, the run isn't failed. So verbosityLevel stays at
+    Warning and Status is not marked ERROR.
+    """
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="escalate", mode=EnforcementMode.ENFORCE))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 3
+    assert attrs.get("uipath_governance.evaluator_result") == "HITL"
+    assert attrs.get("uipath_governance.action_applied") == "HITL"
+    assert attrs.get("uipath_governance.mode") == "ENFORCE"
+    assert not captured_span.set_status.called
+
+
+# ---------------------------------------------------------------------------
+# Rule span — advisory violations (audit mode, or audit-action rules)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "action,expected_evaluator",
+    [("deny", "DENY"), ("audit", "DENY"), ("escalate", "HITL")],
+)
+def test_audit_mode_violation_is_warning(
+    captured_span: MagicMock, action: str, expected_evaluator: str
+) -> None:
+    """Audit mode never blocks → action_applied=AUDIT, verbosityLevel=3.
+
+    Surfacing Status.ERROR for an audit-mode violation would falsely
+    mark the agent's run as failed when the runtime intentionally
+    let it through. evaluator_result still records the rule's actual
+    decision (DENY/HITL), independent of mode.
+    """
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action=action, mode=EnforcementMode.AUDIT))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 3
+    assert attrs.get("uipath_governance.evaluator_result") == expected_evaluator
+    assert attrs.get("uipath_governance.action_applied") == "AUDIT"
+    assert attrs.get("uipath_governance.mode") == "AUDIT"
+
+    assert not captured_span.set_status.called, (
+        f"Audit-mode {action} violation must NOT set Status.ERROR"
+    )
+
+
+def test_enforce_mode_audit_action_is_warning(captured_span: MagicMock) -> None:
+    """Enforce mode + action=audit is a per-rule audit override.
+
+    The rule's configured ``audit`` action means "log this match but
+    don't block" even when the global mode is ENFORCE. evaluator_result
+    is DENY (the rule decided to deny), but action_applied is AUDIT
+    (the per-rule override kicks in), so verbosity stays Warning.
+    """
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="audit", mode=EnforcementMode.ENFORCE))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 3
+    assert attrs.get("uipath_governance.evaluator_result") == "DENY"
+    assert attrs.get("uipath_governance.action_applied") == "AUDIT"
+    assert attrs.get("uipath_governance.mode") == "ENFORCE"
+    assert not captured_span.set_status.called
+
+
+# ---------------------------------------------------------------------------
+# Rule span — no violation, no verbosityLevel attribute (Orchestrator default = 2)
+# ---------------------------------------------------------------------------
+
+
+def test_unmatched_rule_no_verbosity_no_error(captured_span: MagicMock) -> None:
+    """Unmatched evaluations → evaluator_result=ALLOW, action_applied=NONE, quiet."""
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=False, action="deny", mode=EnforcementMode.ENFORCE))
+
+    attrs = _span_attrs(captured_span)
+    assert "verbosityLevel" not in attrs
+    assert attrs.get("uipath_governance.evaluator_result") == "ALLOW"
+    assert attrs.get("uipath_governance.action_applied") == "NONE"
+    assert not captured_span.set_status.called
+
+
+def test_matched_allow_action_no_verbosity(captured_span: MagicMock) -> None:
+    """A rule whose action is 'allow' is an explicit non-violation."""
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="allow", mode=EnforcementMode.ENFORCE))
+
+    attrs = _span_attrs(captured_span)
+    assert "verbosityLevel" not in attrs
+    assert attrs.get("uipath_governance.evaluator_result") == "ALLOW"
+    assert attrs.get("uipath_governance.action_applied") == "NONE"
+    assert not captured_span.set_status.called
+
+
+# ---------------------------------------------------------------------------
+# Cross-runtime isolation — the architectural motivation for the refactor
+# ---------------------------------------------------------------------------
+
+
+def test_two_events_carry_independent_modes(captured_span: MagicMock) -> None:
+    """Parallel runtimes (different modes) cannot cross-contaminate the sink.
+
+    Previously the sink read mode via a process-global; an ENFORCE
+    runtime's emit could clobber an AUDIT runtime's span. With mode on
+    the event, two consecutive emits with different modes each render
+    their own correct ``uipath_governance.mode`` value.
+    """
+    sink = TracesAuditSink()
+
+    sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.ENFORCE))
+    sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.AUDIT))
+
+    # Collect every set_attribute call ordered by emit.
+    calls = [c.args for c in captured_span.set_attribute.call_args_list]
+    modes = [v for k, v in calls if k == "uipath_governance.mode"]
+    actions_applied = [v for k, v in calls if k == "uipath_governance.action_applied"]
+    assert modes == ["ENFORCE", "AUDIT"]
+    assert actions_applied == ["DENY", "AUDIT"]