diff --git a/src/uipath/runtime/governance/_audit/__init__.py b/src/uipath/runtime/governance/_audit/__init__.py new file mode 100644 index 0000000..b00769c --- /dev/null +++ b/src/uipath/runtime/governance/_audit/__init__.py @@ -0,0 +1,12 @@ +"""Audit sink framework for governance events. + +Internal module. Provides a pluggable audit system that emits governance +events to one or more sinks. The only built-in sink is ``TracesAuditSink``, +which creates OpenTelemetry spans that uipath-core's exporter ships to the +Orchestrator Traces UI. This sink is always registered by every +:class:`AuditManager` and cannot be disabled by application code — it +carries the governance audit trail. + +Callers import from the submodules directly (``_audit.base``, ``_audit.traces``, +``_audit.factory``). This package exposes no aggregated symbols. +""" diff --git a/src/uipath/runtime/governance/_audit/base.py b/src/uipath/runtime/governance/_audit/base.py new file mode 100644 index 0000000..3364454 --- /dev/null +++ b/src/uipath/runtime/governance/_audit/base.py @@ -0,0 +1,729 @@ +"""Base classes and models for the audit sink framework. + +This module provides the core abstractions for the governance audit system: +- AuditEvent: The data model for audit events +- EventType: Constants for common event types +- AuditSink: Abstract base class for sink implementations +- AuditManager: Central hub for routing events to sinks + +The AuditManager uses a background thread to process events asynchronously, +avoiding blocking the main agent execution path during audit trace HTTP calls. +""" + +from __future__ import annotations + +import atexit +import json +import logging +import os +import queue +import threading +import weakref +from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from typing import Any + +from uipath.core.governance import EnforcementMode + +logger = logging.getLogger(__name__) + + +# Process-wide cleanup machinery for AuditManager instances. +# +# A single ``atexit`` hook walks a ``WeakSet`` of live managers on exit +# and flushes/closes each one. Two important properties: +# +# 1. **Bounded atexit registrations.** Per-instance ``atexit.register`` +# grows the interpreter's atexit list without bound — N runtimes → +# N hooks → N × shutdown-timeout total exit delay. One process-level +# hook is constant work regardless of how many managers were +# constructed. +# +# 2. **No strong reference to the manager.** ``WeakSet`` lets a disposed +# manager get garbage-collected; if it's already gone by exit time, +# we just skip it. The per-instance atexit hook held the manager +# alive until process exit, leaking memory in long-running +# ``uipath eval`` runs that build many runtimes serially. +_live_managers: weakref.WeakSet[AuditManager] = weakref.WeakSet() +_atexit_registered = False +_atexit_lock = threading.Lock() + + +def _process_cleanup_managers() -> None: + """Process-exit handler: flush + close every live AuditManager. + + Iteration over a snapshot — the WeakSet may mutate during cleanup + (close() touches sinks_lock, GC may fire). Bounded by each manager's + own flush / close timeouts. + """ + for manager in list(_live_managers): + try: + manager.flush(timeout=2.0) + manager.close() + except Exception as exc: # noqa: BLE001 - exit cleanup must not raise + logger.debug("Audit manager process cleanup error: %s", exc) + + +def _register_manager_for_cleanup(manager: AuditManager) -> None: + """Add ``manager`` to the cleanup set + ensure process atexit is wired. + + Double-checked under ``_atexit_lock`` so two concurrent first-time + constructions don't both register the process atexit handler. + """ + global _atexit_registered + _live_managers.add(manager) + if _atexit_registered: + return + with _atexit_lock: + if not _atexit_registered: + atexit.register(_process_cleanup_managers) + _atexit_registered = True + + +# ============================================================================= +# Audit Event Model +# ============================================================================= + + +@dataclass +class AuditEvent: + """Generic audit event that can be sent to any sink. + + Trace correlation is intentionally absent from this dataclass. + Sinks that need a trace id resolve one at their own boundary: + OTel-backed sinks let the SDK / exporter handle it, and HTTP + sinks defer to their injected provider, which resolves at + HTTP-call time. + + Attributes: + event_type: Type of event (e.g., "rule_evaluation", "hook_summary") + timestamp: When the event occurred (auto-set if not provided) + agent_name: Name of the agent being governed + hook: Lifecycle hook where event occurred (optional) + data: Event-specific data dictionary + metadata: Additional metadata for filtering/routing + """ + + event_type: str + agent_name: str = "unknown" + hook: str = "" + data: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for serialization.""" + result = asdict(self) + result["timestamp"] = self.timestamp.isoformat() + return result + + def to_json(self) -> str: + """Convert to JSON string.""" + return json.dumps(self.to_dict()) + + +class EventType: + """Constants for common event types.""" + + RULE_EVALUATION = "rule_evaluation" + HOOK_START = "hook_start" + HOOK_END = "hook_end" + SESSION_START = "session_start" + SESSION_END = "session_end" + POLICY_VIOLATION = "policy_violation" + POLICY_ALLOW = "policy_allow" + PACKS_LOADED = "packs_loaded" + + +# ============================================================================= +# Audit Sink Base Class +# ============================================================================= + + +class AuditSink(ABC): + """Abstract base class for audit output destinations. + + Subclass this to create custom audit sinks. Each sink receives + all audit events and decides how to handle them. + + Example: + class SlackAuditSink(AuditSink): + def __init__(self, webhook_url: str): + self.webhook_url = webhook_url + self._name = "slack" + + @property + def name(self) -> str: + return self._name + + def emit(self, event: AuditEvent) -> None: + if event.data.get("matched") and event.data.get("action") == "deny": + # Send to Slack on violations + requests.post(self.webhook_url, json=event.to_dict()) + + def flush(self) -> None: + pass + """ + + @property + @abstractmethod + def name(self) -> str: + """Unique name for this sink.""" + pass + + @abstractmethod + def emit(self, event: AuditEvent) -> None: + """Emit an audit event to this sink. + + Args: + event: The audit event to emit + + Note: + Implementations should handle errors gracefully and not + raise exceptions that would disrupt governance evaluation. + """ + pass + + def flush(self) -> None: + """Flush any buffered events. + + Override if sink buffers events before writing. + """ + return + + def close(self) -> None: + """Clean up resources. + + Override if sink holds resources that need cleanup. + """ + return + + def accepts(self, event: AuditEvent) -> bool: + """Check if this sink should receive the event. + + Override to filter events. Default accepts all events. + + Args: + event: The audit event to check + + Returns: + True if sink should receive event, False to skip + """ + return True + + +# ============================================================================= +# Audit Manager +# ============================================================================= + + +class AuditManager: + """Manages multiple audit sinks and routes events to them. + + Instance-scoped: each :class:`GovernanceRuntime` owns its own + manager. Parallel runtimes (``uipath eval``) don't share sinks, + workers, or per-sink failure state. + + Constructor automatically registers the always-on ``traces`` sink + (OpenTelemetry → Orchestrator audit UI). This sink writes the + governance audit trail and cannot be disabled by application code. + Additional sinks can be added via :meth:`register_sink`. + + Thread Safety: + Events are queued and processed by a background thread, making + :meth:`emit` non-blocking. This avoids blocking agent execution + during audit trace HTTP calls. + """ + + # Trip a sink after this many consecutive emit failures (circuit-breaker). + _SINK_FAILURE_THRESHOLD = 10 + # Bound the async queue so a stuck sink can't grow memory without limit. + # Matches the order of magnitude of a long-running agent's per-session + # audit volume; on overflow the oldest event is dropped to make room. + _DEFAULT_QUEUE_MAXSIZE = 10_000 + + def __init__( + self, + async_mode: bool = True, + queue_maxsize: int = _DEFAULT_QUEUE_MAXSIZE, + register_default_sinks: bool = True, + ) -> None: + """Initialize the audit manager. + + Args: + async_mode: If True (default), events are processed in a background + thread. If False, events are processed synchronously. + queue_maxsize: Max queued events in async mode. On overflow the + oldest queued event is dropped to make room. + register_default_sinks: If True (default), register the + always-on ``traces`` sink and an atexit cleanup + handler. Tests that want a bare manager can pass + ``False`` and register sinks explicitly. + """ + self._sinks: list[AuditSink] = [] + # Single lock guards _sinks, _sink_failures, _tripped_sinks — every + # collection mutated by both the worker thread and the emit caller. + self._sinks_lock = threading.Lock() + # Per-sink consecutive-failure counter, keyed by sink name. + self._sink_failures: dict[str, int] = {} + self._tripped_sinks: set[str] = set() + self._async_mode = async_mode + self._pid = os.getpid() + + # Background processing + self._queue: queue.Queue[AuditEvent | None] = queue.Queue(maxsize=queue_maxsize) + self._worker_thread: threading.Thread | None = None + self._shutdown = threading.Event() + + if self._async_mode: + self._start_worker() + + if register_default_sinks: + self._register_traces_sink() + # Process-level atexit (one shared handler, weakref-tracked + # set) instead of per-instance ``atexit.register(self.method)``: + # avoids unbounded atexit list growth and the strong reference + # that would otherwise pin a disposed manager until process + # exit. See module-level ``_process_cleanup_managers``. + _register_manager_for_cleanup(self) + + def _register_traces_sink(self) -> None: + """Register the always-on ``traces`` sink. + + The traces sink (OpenTelemetry spans to the Orchestrator audit + UI) is registered for every manager and cannot be disabled by + application code — it carries the governance audit trail. The + factory import is deferred to avoid a module-load cycle + (``factory`` imports back into this module). + """ + from .factory import create_sink + + sink = create_sink("traces") + if sink is not None: + self.register_sink(sink) + logger.info("Governance audit sink registered: traces") + + def _start_worker(self) -> None: + """Start the background worker thread.""" + if self._worker_thread is not None and self._worker_thread.is_alive(): + return + + self._shutdown.clear() + self._worker_thread = threading.Thread( + target=self._worker_loop, + name="governance-audit-worker", + daemon=True, + ) + self._worker_thread.start() + logger.debug("Background audit worker started") + + def _worker_loop(self) -> None: + """Background worker loop that processes queued events.""" + while not self._shutdown.is_set(): + # Wait for an event with a timeout so we can re-check shutdown. + try: + event = self._queue.get(timeout=0.5) + except queue.Empty: + continue + # Every successful get() must be paired with exactly one + # task_done() — including the shutdown sentinel and the case + # where _emit_sync raises — otherwise unfinished_tasks never + # drains and flush()/join() hangs. + try: + if event is None: + # Shutdown signal + break + self._emit_sync(event) + except Exception as e: + logger.warning("Audit worker error: %s", e) + finally: + self._queue.task_done() + + # Drain remaining events on shutdown + self._drain_queue() + + def _drain_queue(self) -> None: + """Process any remaining events in the queue.""" + while True: + try: + event = self._queue.get_nowait() + except queue.Empty: + break + # As in _worker_loop: pair every get() with one task_done(), + # even when _emit_sync raises, so shutdown accounting is sound. + try: + if event is not None: + self._emit_sync(event) + except Exception as e: + logger.warning("Audit drain error: %s", e) + finally: + self._queue.task_done() + + def _emit_sync(self, event: AuditEvent) -> None: + """Emit event synchronously to all sinks (called from worker thread).""" + with self._sinks_lock: + sinks = list(self._sinks) + tripped = set(self._tripped_sinks) + for sink in sinks: + if sink.name in tripped: + continue + try: + if sink.accepts(event): + sink.emit(event) + # Success — reset failure counter for this sink. + with self._sinks_lock: + if self._sink_failures.get(sink.name): + self._sink_failures[sink.name] = 0 + except Exception as e: + with self._sinks_lock: + fails = self._sink_failures.get(sink.name, 0) + 1 + self._sink_failures[sink.name] = fails + tripped_now = fails >= self._SINK_FAILURE_THRESHOLD + if tripped_now: + self._tripped_sinks.add(sink.name) + if tripped_now: + logger.error( + "Audit sink '%s' tripped after %d consecutive failures; " + "will be skipped for the rest of this process. Last error: %s", + sink.name, + fails, + e, + ) + else: + logger.warning( + "Audit sink '%s' failed to emit event (%d/%d): %s", + sink.name, + fails, + self._SINK_FAILURE_THRESHOLD, + e, + ) + + def register_sink(self, sink: AuditSink) -> None: + """Register an audit sink. + + Args: + sink: The sink to register + + Note: + Duplicate sinks (same name) are ignored. + The circuit-breaker failure counter is cleared so a freshly + registered sink doesn't inherit a previous instance's tripped + state. ``unregister_sink`` already clears these, but the + defensive reset here guards against external manipulation + of the internal counters (tests, future callers). + """ + with self._sinks_lock: + if any(s.name == sink.name for s in self._sinks): + logger.debug("Sink '%s' already registered, skipping", sink.name) + return + self._sinks.append(sink) + self._sink_failures.pop(sink.name, None) + self._tripped_sinks.discard(sink.name) + logger.info("Registered audit sink: %s", sink.name) + + def unregister_sink(self, name: str) -> bool: + """Unregister an audit sink by name. + + Args: + name: Name of the sink to remove + + Returns: + True if sink was removed, False if not found + """ + sink_to_close: AuditSink | None = None + with self._sinks_lock: + for i, sink in enumerate(self._sinks): + if sink.name == name: + sink_to_close = sink + del self._sinks[i] + self._sink_failures.pop(name, None) + self._tripped_sinks.discard(name) + break + if sink_to_close is not None: + try: + sink_to_close.close() + except Exception as e: + logger.warning("Audit sink '%s' failed to close: %s", name, e) + logger.info("Unregistered audit sink: %s", name) + return True + return False + + def get_sink(self, name: str) -> AuditSink | None: + """Get a registered sink by name.""" + with self._sinks_lock: + for sink in self._sinks: + if sink.name == name: + return sink + return None + + def list_sinks(self) -> list[str]: + """Get names of all registered sinks.""" + with self._sinks_lock: + return [s.name for s in self._sinks] + + def emit(self, event: AuditEvent) -> None: + """Emit an audit event to all registered sinks. + + In async mode (default), this queues the event for background + processing and returns immediately. This avoids blocking the + main agent execution path during audit trace HTTP calls. + + On post-fork callers (worker process inheriting the parent's + manager), the queue is reinitialized and the worker thread + re-spawned before enqueue — otherwise events would silently + accumulate in a queue no one is draining. + + Args: + event: The audit event to emit + """ + self._ensure_alive_after_fork() + + if self._async_mode: + # Non-blocking enqueue with drop-oldest backpressure: if the + # worker is wedged on a slow sink, this keeps memory bounded + # rather than growing without limit. + try: + self._queue.put_nowait(event) + except queue.Full: + try: + self._queue.get_nowait() + self._queue.task_done() + except queue.Empty: + pass + try: + self._queue.put_nowait(event) + except queue.Full: + # Worker is so far behind that the queue refilled + # between get_nowait and put_nowait — give up on + # this event rather than block. + pass + else: + # Synchronous processing + self._emit_sync(event) + + def _ensure_alive_after_fork(self) -> None: + """Reset queue and respawn worker if we're in a forked child. + + Double-checked under ``_sinks_lock``: a fresh-fork child where + multiple threads call :meth:`emit` concurrently could otherwise + each see the stale ``_pid`` and each rebuild ``_queue`` / + ``_shutdown`` / ``_worker_thread`` — one thread's writes would + clobber the other's, leaking the queue+worker pair. + """ + if os.getpid() == self._pid: + return # fast path: same process, no rebuild needed + with self._sinks_lock: + current_pid = os.getpid() + if current_pid == self._pid: + return # another thread won the rebuild race + # Child process inherited a dead worker_thread reference and + # a queue the parent owned. Rebuild both so child events drain. + self._pid = current_pid + self._queue = queue.Queue(maxsize=self._queue.maxsize) + self._shutdown = threading.Event() + self._worker_thread = None + if self._async_mode: + self._start_worker() + + def emit_rule_evaluation( + self, + policy_id: str, + rule_name: str, + pack_name: str, + hook: str, + matched: bool, + action: str, + enforcement_mode: EnforcementMode, + detail: str = "", + agent_name: str = "agent", + description: str = "", + ) -> None: + """Convenience method to emit a rule evaluation event. + + ``enforcement_mode`` travels on the event so sinks don't have to + read a process-global. With instance-scoped runtimes the global + wouldn't be authoritative anyway — parallel runtimes can run in + different modes simultaneously. + """ + self.emit( + AuditEvent( + event_type=EventType.RULE_EVALUATION, + agent_name=agent_name, + hook=hook, + data={ + "policy_id": policy_id, + "rule_name": rule_name, + "pack_name": pack_name, + "matched": matched, + "action": action, + "enforcement_mode": enforcement_mode, + "detail": detail, + "description": description, + "status": "MATCHED" if matched else "PASS", + }, + ) + ) + + def emit_hook_summary( + self, + hook: str, + agent_name: str, + total_rules: int, + matched_rules: int, + final_action: str, + enforcement_mode: EnforcementMode, + ) -> None: + """Convenience method to emit a hook summary event.""" + self.emit( + AuditEvent( + event_type=EventType.HOOK_END, + agent_name=agent_name, + hook=hook, + data={ + "total_rules": total_rules, + "matched_rules": matched_rules, + "final_action": final_action, + "enforcement_mode": enforcement_mode, + }, + ) + ) + + def emit_session_start( + self, + session_id: str, + agent_name: str, + packs: list[str], + enforcement_mode: EnforcementMode, + ) -> None: + """Convenience method to emit a session start event. + + Same ``enforcement_mode: EnforcementMode`` contract as + :meth:`emit_rule_evaluation` and :meth:`emit_hook_summary` + — every governance event carries the per-runtime mode so sinks + don't depend on a process-global. + """ + self.emit( + AuditEvent( + event_type=EventType.SESSION_START, + agent_name=agent_name, + data={ + "session_id": session_id, + "packs": packs, + "enforcement_mode": enforcement_mode, + }, + ) + ) + + def emit_session_end( + self, + session_id: str, + agent_name: str, + total_evaluations: int, + rules_matched: int, + rules_denied: int, + enforcement_mode: EnforcementMode, + ) -> None: + """Convenience method to emit a session end event.""" + self.emit( + AuditEvent( + event_type=EventType.SESSION_END, + agent_name=agent_name, + data={ + "session_id": session_id, + "total_evaluations": total_evaluations, + "rules_matched": rules_matched, + "rules_denied": rules_denied, + "enforcement_mode": enforcement_mode, + }, + ) + ) + + def flush(self, timeout: float = 5.0) -> None: + """Flush all pending events and sinks. + + In async mode, polls the queue until it drains or ``timeout`` + seconds elapse, whichever comes first. ``queue.Queue.join`` has + no timeout argument — using it would block indefinitely on a + wedged sink, which defeats the bounded-shutdown contract that + the process-exit handler (see :func:`_process_cleanup_managers`) + relies on. + + Args: + timeout: Maximum seconds to wait for queue to drain (default 5.0) + """ + if self._async_mode: + import time + + deadline = time.monotonic() + max(0.0, timeout) + poll_interval = min(0.05, timeout) if timeout > 0 else 0.0 + while time.monotonic() < deadline: + try: + if self._queue.unfinished_tasks == 0: + break + except Exception: # noqa: BLE001 - queue introspection is best-effort + break + time.sleep(poll_interval) + else: + # Loop didn't break — drain timed out. Log so a wedged + # sink is surfaced rather than swallowed. + try: + pending = self._queue.unfinished_tasks + except Exception: # noqa: BLE001 + pending = -1 + if pending: + logger.warning( + "Audit queue did not drain within %.2fs " + "(unfinished tasks=%s); sink may be wedged", + timeout, pending, + ) + + with self._sinks_lock: + sinks = list(self._sinks) + for sink in sinks: + try: + sink.flush() + except Exception as e: + logger.warning("Audit sink '%s' failed to flush: %s", sink.name, e) + + def close(self) -> None: + """Close all sinks and release resources. + + Stops the background worker thread and drains any remaining events. + Shutdown is bounded: ``_shutdown`` is the primary signal the + worker polls; the sentinel ``None`` enqueue is best-effort. If + the queue is full and the worker is wedged on a slow sink, + ``put_nowait`` fails fast rather than hanging process exit. + """ + if self._async_mode and self._worker_thread is not None: + # Signal shutdown first so the worker's next queue.get() loop + # iteration exits even if we can't enqueue the sentinel. + self._shutdown.set() + try: + self._queue.put_nowait(None) # Wake up worker + except queue.Full: + # Queue saturated by a stuck sink; the worker will see + # _shutdown on its next loop iteration once whatever it's + # blocked on completes (or the 2s join timeout fires). + logger.debug( + "Audit queue full at shutdown; relying on _shutdown signal" + ) + + # Wait for worker to finish (with timeout) + if self._worker_thread.is_alive(): + self._worker_thread.join(timeout=2.0) + + logger.debug("Background audit worker stopped") + + with self._sinks_lock: + sinks = list(self._sinks) + self._sinks.clear() + self._sink_failures.clear() + self._tripped_sinks.clear() + for sink in sinks: + try: + sink.close() + except Exception as e: + logger.warning("Audit sink '%s' failed to close: %s", sink.name, e) + + diff --git a/src/uipath/runtime/governance/_audit/factory.py b/src/uipath/runtime/governance/_audit/factory.py new file mode 100644 index 0000000..334f867 --- /dev/null +++ b/src/uipath/runtime/governance/_audit/factory.py @@ -0,0 +1,33 @@ +"""Factory function for creating audit sinks by name. + +Used by :class:`AuditManager` to construct the always-on ``traces`` +sink at initialization. +""" + +from __future__ import annotations + +import logging + +from .base import AuditSink + +logger = logging.getLogger(__name__) + + +def create_sink(name: str) -> AuditSink | None: + """Create an audit sink by name. + + Args: + name: Name of the sink to create (currently only ``traces``). + + Returns: + The created sink, or ``None`` if the name is unknown. + """ + name = name.lower() + + if name == "traces": + from .traces import TracesAuditSink + + return TracesAuditSink() + + logger.warning("Unknown audit sink: %s", name) + return None diff --git a/src/uipath/runtime/governance/_audit/traces.py b/src/uipath/runtime/governance/_audit/traces.py new file mode 100644 index 0000000..a529996 --- /dev/null +++ b/src/uipath/runtime/governance/_audit/traces.py @@ -0,0 +1,334 @@ +"""OpenTelemetry traces audit sink for Orchestrator integration. + +This sink creates OpenTelemetry spans for governance events. UiPath's +OTel exporter (``uipath.tracing._otel_exporters.LlmOpsHttpExporter`` via +``_SpanUtils.otel_span_to_uipath_span``) is what ships them to the +Orchestrator Traces UI and is also what reads ``UIPATH_TRACE_ID``, +``UIPATH_ORGANIZATION_ID``, ``UIPATH_TENANT_ID``, ``UIPATH_FOLDER_KEY`` +and ``UIPATH_JOB_KEY`` from the process environment and stamps them onto +the outgoing ``UiPathSpan``. We intentionally do **not** duplicate that +env-reading here — the exporter is the single source of truth for the +job-execution context. +""" + +from __future__ import annotations + +import importlib.metadata +import logging +from typing import Any + +from uipath.core.governance import EnforcementMode + +from .base import AuditEvent, AuditSink, EventType + +logger = logging.getLogger(__name__) + + +def _package_version() -> str: + """Return the installed ``uipath-runtime`` version (``unknown`` if absent).""" + try: + return importlib.metadata.version("uipath-runtime") + except importlib.metadata.PackageNotFoundError: + return "unknown" + + +# Stamped on every governance span as ``uipath_governance.version`` so +# consumers can correlate the trace payload shape with the runtime +# release that produced it. Resolved once at import time — the installed +# package version doesn't change for the life of the process. +SCHEMA_VERSION = _package_version() + +# Value for the ``type`` / ``span_type`` span attributes on every +# governance span. Matches ``SpanType.AGENT_RUN`` in uipath-agents-python +# — we use the string literal here (not a cross-package import) to keep +# uipath-runtime free of a uipath-agents dependency. If the agents-side +# registry adds new values, this constant is the single place to update. +SPAN_TYPE_AGENT_RUN = "agentRun" + +# Identifies this auditor on every governance span. Lets a downstream +# consumer distinguish traces emitted by the Python in-runtime governance +# checker from those produced by the governance-server (or any future +# language-specific governance SDK). Set as the ``source`` span +# attribute on every governance trace span. +GOVERNANCE_SOURCE = "governance-checker-python" + +# Shared attribute namespace for every key in the unified governance trace +# contract (§4 of the cross-product unification doc). Concatenated into +# each ``span.set_attribute`` call so the prefix appears in one place and +# a future rename (or alias) is a one-line change. +NS = "uipath_governance" + +# Unified-contract enum values (UPPER_SNAKE per §3 of the spec). +EVALUATOR_ALLOW = "ALLOW" +EVALUATOR_DENY = "DENY" +EVALUATOR_HITL = "HITL" + +ACTION_ALLOW = "ALLOW" +ACTION_DENY = "DENY" +ACTION_HITL = "HITL" +ACTION_AUDIT = "AUDIT" +ACTION_NONE = "NONE" + +def _resolve_mode(event: AuditEvent) -> EnforcementMode: + """Read the enforcement mode the evaluator stamped on the event. + + Mode travels with the event (set by :meth:`AuditManager.emit_rule_evaluation` + / :meth:`emit_hook_summary` from the per-runtime + :attr:`GovernanceRuntime.enforcement_mode`) so the sink doesn't + read a process-global that wouldn't be authoritative in a + parallel-runtime setup. + + Falls back to ``AUDIT`` only when the field is missing — that's a + contract violation by the emitter (every governance event must carry + the mode), but defaulting to the safe option avoids a sink crash. + """ + mode = event.data.get("enforcement_mode") + if isinstance(mode, EnforcementMode): + return mode + if isinstance(mode, str): + try: + return EnforcementMode(mode.lower()) + except ValueError: + pass + return EnforcementMode.AUDIT + + +def _derive_results( + matched: bool, configured_action: str, mode: EnforcementMode +) -> tuple[str, str]: + """Return ``(evaluator_result, action_applied)`` in spec vocabulary. + + ``evaluator_result`` is mode-independent — what the rule decided. The + rule's configured ``audit`` action collapses into a DENY decision + here; whether that DENY is actually applied is reflected in + ``action_applied``. + + ``action_applied`` is mode-driven. Currently only AUDIT mode is wired + in the runtime, so every non-allow result lands on ``AUDIT``; the + ENFORCE branch is kept so the contract is already correct when + ENFORCE arrives in a later phase. + + The configured ``audit`` rule-level action acts as a per-rule audit + override: even when global mode is ENFORCE, such a rule only ever + produces ``action_applied = AUDIT``. This preserves today's "audit + never blocks" behavior. + """ + action = configured_action.lower() + + if not matched or action == "allow": + return EVALUATOR_ALLOW, ACTION_NONE + + if action == "escalate": + evaluator = EVALUATOR_HITL + else: + evaluator = EVALUATOR_DENY + + # Per-rule audit override: emit AUDIT regardless of global mode. + if action == "audit": + return evaluator, ACTION_AUDIT + + if mode == EnforcementMode.ENFORCE: + return evaluator, ACTION_DENY if evaluator == EVALUATOR_DENY else ACTION_HITL + return evaluator, ACTION_AUDIT + +class TracesAuditSink(AuditSink): + """Audit sink that creates OpenTelemetry spans. + + Spans appear in UiPath Orchestrator Traces UI, providing structured + data for each governance evaluation. + """ + + def __init__(self) -> None: + """Initialize the sink with a deferred tracer and zero span count.""" + self._tracer: Any = None # Can be None, Tracer, or False + self._spans_created = 0 + + @property + def name(self) -> str: + """Constant sink identifier.""" + return "traces" + + def _get_tracer(self) -> Any: + """Get or create the OpenTelemetry tracer.""" + if self._tracer is None: + try: + from opentelemetry import trace + + self._tracer = trace.get_tracer("uipath.governance") + logger.info("OpenTelemetry tracer initialized for governance traces") + except ImportError: + # OpenTelemetry is supplied transitively by uipath-core; an + # ImportError here means the host install is broken or + # governance is running outside the UiPath SDK environment. + logger.warning( + "OpenTelemetry not available - governance traces disabled. " + "OTel is normally provided by uipath-core; reinstall the SDK." + ) + self._tracer = False + return self._tracer if self._tracer else None + + def emit(self, event: AuditEvent) -> None: + """Create a span for RULE_EVALUATION or HOOK_END events; drop others.""" + if event.event_type == EventType.RULE_EVALUATION: + self._emit_rule_span(event) + elif event.event_type == EventType.HOOK_END: + self._emit_hook_span(event) + + def _emit_hook_span(self, event: AuditEvent) -> None: + """Create a span for a hook summary (always emitted for each governance check).""" + tracer = self._get_tracer() + if tracer is None: + return + + try: + from opentelemetry import context + + data = event.data + hook = event.hook or "unknown" + span_name = f"governance.{hook.lower()}" + + # Use the current OTel context if one is active; otherwise + # start a root span. The governance span appears as a child + # of whichever span is current (e.g. the runtime's root + # span) or as a fresh root. + # + # We don't touch org/tenant/folder/job/trace ids here — the + # uipath OTel exporter resolves those at export time from the + # process env (see module docstring). + ctx = context.get_current() + + with tracer.start_as_current_span(span_name, context=ctx) as span: + # Required for Orchestrator Traces + span.set_attribute("type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("uipath.custom_instrumentation", True) + + # Identifies which agent emitted this audit trace. Lets + # downstream consumers (Orchestrator Traces UI, audit + # dashboards) filter governance spans by producer when + # multiple SDKs / governance backends co-exist. + span.set_attribute(f"{NS}.source", GOVERNANCE_SOURCE) + # Hook summary attributes. Mode comes from the event — the + # evaluator stamps it from the per-runtime instance, so the + # sink is correct for parallel runtimes running different + # modes. + mode = _resolve_mode(event) + final_action = data.get("final_action", "allow") + _, action_applied = _derive_results( + matched=final_action.lower() != "allow", + configured_action=final_action, + mode=mode, + ) + span.set_attribute(f"{NS}.hook", hook) + span.set_attribute(f"{NS}.action_applied", action_applied) + span.set_attribute(f"{NS}.mode", mode.value.upper()) + + # Hook spans are summary containers — they're left at + # Status.UNSET regardless of final_action. Severity is + # carried by the per-rule spans (see _emit_rule_span); + # marking the hook span as ERROR would falsely paint + # the entire lifecycle phase as failed when only a + # specific rule fired underneath. + + self._spans_created += 1 + + except Exception as e: + logger.warning("Failed to create governance hook span: %s", e) + + def _emit_rule_span(self, event: AuditEvent) -> None: + """Create a span for a rule evaluation.""" + tracer = self._get_tracer() + if tracer is None: + return + + try: + from opentelemetry import context + + data = event.data + policy_id = data.get("policy_id", "unknown") + span_name = f"{NS}.rule.{policy_id}" + + # See note in _emit_hook_span: rely on the current OTel context + # rather than fabricating a remote-parent span_id; and let the + # uipath OTel exporter populate the job-execution context. + ctx = context.get_current() + + with tracer.start_as_current_span(span_name, context=ctx) as span: + # Required for Orchestrator Traces + span.set_attribute("type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("uipath.custom_instrumentation", True) + + # Identifies which agent emitted this audit trace. Lets + # downstream consumers (Orchestrator Traces UI, audit + # dashboards) filter governance spans by producer when + # multiple SDKs / governance backends co-exist. + span.set_attribute(f"{NS}.source", GOVERNANCE_SOURCE) + + # Derive the spec-vocabulary verdict pair from the raw + # (matched, configured action, mode) tuple. Mode comes + # from the event (per-runtime instance) so parallel + # runtimes running different modes don't cross-contaminate. + # Single source of truth for the emitted attributes below + # AND the verbosityLevel/Status decision further down. + mode = _resolve_mode(event) + configured_action = data.get("action", "allow") + matched = bool(data.get("matched", False)) + evaluator_result, action_applied = _derive_results( + matched=matched, + configured_action=configured_action, + mode=mode, + ) + + # Governance attributes + span.set_attribute(f"{NS}.policy_id", policy_id) + span.set_attribute(f"{NS}.rule_name", data.get("rule_name", "")) + span.set_attribute(f"{NS}.pack_name", data.get("pack_name", "")) + span.set_attribute(f"{NS}.hook", event.hook) + span.set_attribute(f"{NS}.evaluator_result", evaluator_result) + span.set_attribute(f"{NS}.action_applied", action_applied) + span.set_attribute(f"{NS}.mode", mode.value.upper()) + span.set_attribute(f"{NS}.version", SCHEMA_VERSION) + + detail = data.get("detail", "") + if detail: + span.set_attribute(f"{NS}.evidence", detail[:500]) + + # Severity is driven off the derived ``action_applied``: + # + # - ``DENY`` — runtime actually blocked the agent → + # verbosityLevel=4 (Error) + Status.ERROR. The agent + # span genuinely failed. + # - ``AUDIT`` / ``HITL`` — advisory only; runtime did NOT + # block → verbosityLevel=3 (Warning), Status stays + # UNSET. The agent's span shouldn't be marked failed + # just because an advisory rule fired. + # - ``ALLOW`` / ``NONE`` — no verbosityLevel attribute + # (Orchestrator default = 2, Information). + if action_applied == ACTION_DENY: + span.set_attribute("verbosityLevel", 4) + try: + from opentelemetry.trace import Status, StatusCode + + span.set_status( + Status( + StatusCode.ERROR, + f"Policy violation: " + f"{data.get('rule_name', policy_id)} " + f"(action={configured_action.lower()})", + ) + ) + except ImportError: + pass + elif action_applied in (ACTION_AUDIT, ACTION_HITL): + span.set_attribute("verbosityLevel", 3) + + self._spans_created += 1 + + except Exception as e: + logger.warning("Failed to create governance span: %s", e) + + @property + def spans_created(self) -> int: + """Number of spans created.""" + return self._spans_created diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py new file mode 100644 index 0000000..713a05d --- /dev/null +++ b/src/uipath/runtime/governance/native/__init__.py @@ -0,0 +1,45 @@ +"""Native UiPath governance policy evaluator. + +YAML-defined rules evaluated in-process at each agent lifecycle hook. +The host fetches the policy pack via the +:class:`GovernancePolicyProvider` protocol and compiles it into a +:class:`PolicyIndex` with :func:`build_policy_index_from_yaml` *before* +constructing :class:`GovernanceRuntime` — so the runtime layer never +performs I/O at construction time. + +This subpackage owns: + +- :class:`GovernanceEvaluator` – the evaluator implementation. +- :func:`build_policy_index_from_yaml` – pure YAML → :class:`PolicyIndex` + compiler. +- The native policy model: :class:`Rule`, :class:`Check`, + :class:`Condition`, :class:`PolicyIndex`. + +Shared output types (``Action``, ``AuditRecord``, …) live in +:mod:`uipath.core.governance`. +""" + +from ._yaml_to_index import build_policy_index_from_yaml +from .evaluator import GovernanceEvaluator +from .models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, + Severity, +) + +__all__ = [ + "GovernanceEvaluator", + "build_policy_index_from_yaml", + # Native policy model + "Check", + "CheckContext", + "Condition", + "PolicyIndex", + "PolicyPack", + "Rule", + "Severity", +] diff --git a/src/uipath/runtime/governance/native/_yaml_to_index.py b/src/uipath/runtime/governance/native/_yaml_to_index.py index 3bf264c..9abdec3 100644 --- a/src/uipath/runtime/governance/native/_yaml_to_index.py +++ b/src/uipath/runtime/governance/native/_yaml_to_index.py @@ -1,10 +1,11 @@ """Runtime YAML → PolicyIndex parser. -Mirrors the shape produced by ``packs/compile_packs.py`` but builds the -PolicyIndex directly from parsed YAML data rather than generating Python -source. Used by :mod:`uipath.runtime.governance.native.loader` to -compile the YAML body returned by the registered policy provider into -an in-memory index at startup. +Mirrors the shape produced by ``packs/compile_packs.py`` but builds +the :class:`PolicyIndex` directly from parsed YAML data rather than +generating Python source. The host calls this to compile the YAML +body returned by :meth:`GovernancePolicyProvider.get_policy_async` +into an in-memory index, then hands the index to +:class:`GovernanceRuntime`. Accepts either a single YAML document (one pack) or a multi-document stream (``---``-separated packs). Unknown check types and malformed diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py new file mode 100644 index 0000000..f629902 --- /dev/null +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -0,0 +1,1102 @@ +"""Governance rule evaluator. + +Instance-scoped — every :class:`GovernanceRuntime` constructs its own +evaluator with explicit dependencies (audit manager, compensator, +enforcement mode). The evaluator does not reach across the runtime +layer through process-globals; the wiring layer composes the runtime +graph and the evaluator consumes what it's given. +""" + +from __future__ import annotations + +import logging +import math +import re +from collections import Counter +from datetime import datetime, timezone +from functools import lru_cache +from typing import Any + +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import ( + Action, + AuditRecord, + LifecycleHook, + RuleEvaluation, +) + +from uipath.runtime.governance._audit.base import AuditManager +from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, + disabled_guardrails, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + Rule, +) + +logger = logging.getLogger(__name__) + + +def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]: + """Build the ``data`` payload for the /runtime/govern compensating call. + + The server runs the guardrail check against the same content the + evaluator was looking at — so we forward whichever + :class:`CheckContext` field is populated for the active hook. Fields + not relevant to the hook are omitted to keep the payload tight. + """ + if context.hook in (LifecycleHook.BEFORE_AGENT,): + return {"content": context.agent_input} + if context.hook in (LifecycleHook.AFTER_AGENT,): + return {"content": context.agent_output} + if context.hook in (LifecycleHook.BEFORE_MODEL,): + payload: dict[str, Any] = {"content": context.model_input} + if context.messages: + payload["messages"] = context.messages + return payload + if context.hook in (LifecycleHook.AFTER_MODEL,): + return {"content": context.model_output} + if context.hook in (LifecycleHook.TOOL_CALL,): + return {"tool_name": context.tool_name, "tool_args": context.tool_args} + if context.hook in (LifecycleHook.AFTER_TOOL,): + return {"tool_name": context.tool_name, "tool_result": context.tool_result} + # Memory-write and unknown hooks: pass an empty content so the + # server still receives a structurally-valid payload. + return {"content": ""} + + +@lru_cache(maxsize=256) +def _compile_regex(pattern: str) -> re.Pattern[str] | None: + """Compile and cache a regex pattern. + + Args: + pattern: The regex pattern string + + Returns: + Compiled pattern or None if invalid + """ + try: + return re.compile(pattern) + except re.error as e: + logger.warning("Invalid regex pattern '%s': %s", pattern, e) + return None + + +# --- vaderSentiment: lazy-imported singleton --- +# Hard dependency, but lazy-loaded to keep import-time cost off the +# critical path. The except branch is defence against a corrupted +# install (file present in METADATA but module unimportable) — the +# operator no-ops rather than crashing the agent. +_VADER_UNINITIALIZED = object() +_vader_analyzer: Any = _VADER_UNINITIALIZED + + +def _get_vader_analyzer() -> Any: + """Return a cached SentimentIntensityAnalyzer, or None if unavailable.""" + global _vader_analyzer + if _vader_analyzer is _VADER_UNINITIALIZED: + try: + from vaderSentiment.vaderSentiment import ( # type: ignore[import-untyped] + SentimentIntensityAnalyzer, + ) + + _vader_analyzer = SentimentIntensityAnalyzer() + except ImportError: + logger.error( + "vaderSentiment failed to import despite being a hard dependency; " + "sentiment_concern checks will not fire. Reinstall uipath-core." + ) + _vader_analyzer = None + return _vader_analyzer + + +# --- chardet: lazy-imported module for encoding integrity (A.7.4) --- +# Hard dependency, lazy-loaded for symmetry with the other library +# wrappers. The except branch covers corrupted installs only. +_CHARDET_UNINITIALIZED = object() +_chardet_module: Any = _CHARDET_UNINITIALIZED + + +def _get_chardet() -> Any: + """Return the chardet module, or None if unavailable.""" + global _chardet_module + if _chardet_module is _CHARDET_UNINITIALIZED: + try: + import chardet + + _chardet_module = chardet + except ImportError: + logger.error( + "chardet failed to import despite being a hard dependency; " + "encoding_concern confidence check will not fire (stdlib " + "signals still apply). Reinstall uipath-core." + ) + _chardet_module = None + return _chardet_module + + +# --- Static patterns for encoding_concern (A.7.4) --- +# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when +# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252. +_MOJIBAKE_BIGRAMS: tuple[str, ...] = ( + "é", + "è", + "â", + "à ", + "ù", + "î", + "ô", + "ç", # accented vowels + "Ä", + "Ö", + "Ü", + "ß", # German umlauts / eszett + "’", + "“", + "â€\x9d", + "–", + "—", + "•", # smart quotes / dashes + "£", + "°", + "§", + "¶", + "©", + "®", # NBSP-leading symbols + "ï¿", + "¿½", # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1) + "ï»", + "»¿", # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1) +) + +# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw +# bytes leaked through a string layer rather than being decoded. +_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}") + + +# --- Static patterns for incident_concern (A.8.4) --- +# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape +# (categorical types over stack/status), but for string payloads from +# model output / tool result rather than exception objects. +_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "safety_refusal": [ + re.compile( + r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)" + r"\s+(?:help|assist|provide|answer|do\s+that))\b" + ), + re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"), + re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"), + ], + "tool_failure": [ + re.compile( + r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)" + ), + re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"), + re.compile(r"(?i)\b(timed?\s*out|timeout)\b"), + ], + "auth_failure": [ + re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"), + re.compile( + r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b" + ), + ], + "quota_exceeded": [ + re.compile(r"\b(429)\b"), + re.compile( + r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b" + ), + ], + "hallucination": [ + re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"), + re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"), + ], +} + +# --- Static patterns for commitment_concern (A.10.4) --- +# Commitment-language signals. The verb pattern covers both first-person +# promise verbs ("we will refund") and formal-business commitment markers +# common in proposal / SOW outputs ("Cost: $X", "fixed scope", +# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and +# deadline signals combine via OR semantics — see +# :meth:`_check_commitment_concern`. +_COMMITMENT_VERB_PATTERN = re.compile( + r"(?i)(" + # First-person promise / liability verbs + r"\brefund\b|\breimburse\b|" + r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|" + r"\bsla\b|" + r"\bwaive[d]?\b|" + r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|" + r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|" + r"handover|hand\s+over|ship)\b|" + # Proposal / SOW commitment markers + r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|" + r"\bcost\s*:\s*\$?\d|" + r"\bquote\s*:\s*\$?\d|" + r"\bdeliverables?\b|" + r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|" + r"\bI\s+propose\b" + r")" +) +# Currency-anchored amount detection. Requires a currency marker adjacent +# to the number so URL fragments (e.g. ``/667851``) don't false-positive. +# Covers symbol-then-number ($780) and number-then-code (780 USD). +# +# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched +# here — they fire on benign status / progress text ("75% complete", +# "99.9% uptime") under OR semantics. Real percentage-bearing +# commitments ("we'll give you a 20% discount", "refund 100%") still +# fire via the verb pattern. +_COMMITMENT_AMOUNT_FALLBACK = re.compile( + r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?" + r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|" + r"dollars?|euros?|pounds?|yen|rupees?)\b" +) +_COMMITMENT_DEADLINE_PATTERN = re.compile( + r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b" + r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b" +) + + +class GovernanceEvaluator: + """Evaluates governance rules against check contexts. + + Supports two enforcement modes: + + - ``AUDIT``: log all violations but never block (DENY collapses to + AUDIT in the final action). + - ``ENFORCE``: actually block on DENY rules — raises + :class:`GovernanceBlockException` and the agent stops. + + All dependencies (mode, audit manager, compensator) are injected + via the constructor. The evaluator does not consult any + process-global state — parallel runtimes (``uipath eval``) get + their own evaluator with their own audit + compensation pipelines. + """ + + def __init__( + self, + policy_index: PolicyIndex, + *, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, + audit_manager: AuditManager | None = None, + compensator: GuardrailCompensator | None = None, + ) -> None: + """Initialize with a compiled policy index and runtime-scoped deps. + + Args: + policy_index: The compiled :class:`PolicyIndex` to evaluate. + Typically read from :attr:`GovernanceRuntime.policy_index` + — the host built it from the provider's + :class:`PolicyResponse` via + :func:`build_policy_index_from_yaml`. + enforcement_mode: Mode the evaluator applies. Defaults to + ``AUDIT`` — the safe default for callers that don't + explicitly opt in to ENFORCE. The wiring layer should + pass ``runtime.enforcement_mode`` here so the evaluator + and the wrapping :class:`GovernanceRuntime` agree on a + single source of truth. + audit_manager: Per-runtime :class:`AuditManager`. When + ``None`` the evaluator runs silently (no audit events + emitted). Tests that don't care about emission can + leave this out. + compensator: Per-runtime :class:`GuardrailCompensator` + used to dispatch ``/runtime/govern`` POSTs for + guardrail-fallback rules. When ``None`` such dispatch + is skipped — the evaluator still records the matched + rules in the :class:`AuditRecord`. + """ + self._policy_index = policy_index + self._enforcement_mode = enforcement_mode + self._audit_manager = audit_manager + self._compensator = compensator + + @property + def policy_index(self) -> PolicyIndex: + """Return the compiled policy index this evaluator runs against.""" + return self._policy_index + + @property + def mode(self) -> EnforcementMode: + """The enforcement mode this evaluator applies.""" + return self._enforcement_mode + + def is_audit_mode(self) -> bool: + """Check if running in audit-only mode.""" + return self._enforcement_mode == EnforcementMode.AUDIT + + def evaluate(self, context: CheckContext) -> AuditRecord: + """Evaluate rules registered for ``context.hook`` against the context. + + Only rules whose ``hook`` field matches the current lifecycle hook + are evaluated — a ``tool_call`` rule does not fire on + ``before_model``, and vice versa. This avoids running checks + against fields the context cannot provide and keeps the audit + stream scoped to the active phase. + + The final action depends on the enforcement mode: + - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted. + - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block) + - ENFORCE mode: DENY rules result in DENY action AND a + :class:`GovernanceBlockException` is raised. + + Audit events (per-rule + hook summary) are emitted via the + :class:`AuditManager` injected at construction (skipped when + none was supplied). + + Args: + context: The check context with hook and content + + Returns: + AuditRecord with all evaluations and final action. + + Raises: + GovernanceBlockException: In ENFORCE mode when a DENY rule matches. + """ + mode = self._enforcement_mode + if mode == EnforcementMode.DISABLED: + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + hook=context.hook, + evaluations=[], + final_action=Action.ALLOW, + metadata={**context.metadata, "enforcement_mode": mode.value}, + ) + + rules = self._policy_index.get_rules_for_hook(context.hook) + + evaluations: list[RuleEvaluation] = [] + raw_action = Action.ALLOW # The action before mode adjustment + deny_would_fire = False # Track if DENY would have fired + + for rule in rules: + if not rule.enabled: + continue + + evaluation = self._evaluate_rule(rule, context) + evaluations.append(evaluation) + + if evaluation.matched: + # Take the most restrictive action. Use evaluation.action + # (which already folds in per-check overrides), not + # rule.action, so check-level overrides are honored here too. + eval_action = evaluation.action + if eval_action == Action.DENY: + raw_action = Action.DENY + deny_would_fire = True + elif eval_action == Action.ESCALATE and raw_action != Action.DENY: + raw_action = Action.ESCALATE + elif eval_action == Action.AUDIT and raw_action == Action.ALLOW: + raw_action = Action.AUDIT + + # Apply enforcement mode + final_action = self._apply_enforcement_mode(raw_action) + + # Build metadata with mode info + record_metadata = dict(context.metadata) + record_metadata["enforcement_mode"] = mode.value + if deny_would_fire and self.is_audit_mode(): + record_metadata["audit_mode_would_deny"] = True + + audit = AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + hook=context.hook, + evaluations=evaluations, + final_action=final_action, + metadata=record_metadata, + ) + + self._emit_audit(audit, mode) + + # For any guardrail mapped to UiPath but currently disabled, hand + # the disabled guardrails to the governance-server's + # /runtime/govern endpoint. The SERVER runs the guardrail check + # AND writes the trace (the payload carries traceId / src_timestamp + # / hook / agent so it can correlate) — the agent does NOT emit a + # trace itself, to avoid double-writing. Fire-and-forget on a + # daemon thread so a slow or unreachable endpoint never blocks + # the agent. + self._dispatch_compensation(audit, context) + + if final_action == Action.DENY: + raise GovernanceBlockException.from_audit_record(audit) + + return audit + + def _dispatch_compensation( + self, audit: AuditRecord, context: CheckContext + ) -> None: + """Schedule compensating governance for any matched fallback rules. + + Delegates to the injected :class:`GuardrailCompensator`. The + compensator owns concurrency, queue caps, exception isolation, + and graceful process-exit cancellation — this method just + builds the payload, logs the summary, and submits. + + No-op when no compensator was supplied at construction (e.g. + unit tests that don't care about the dispatch path). + """ + if self._compensator is None: + return + + try: + disabled = disabled_guardrails(audit, self._policy_index) + if not disabled: + return + + # Distinct validator names for the operator-facing log line. + validators = [rule.validator for rule in disabled] + + # Surface the disabled-guardrail fire-up: how many rules + # triggered the compensating call, and which validators + # they map to (e.g. pii_detection / prompt_injection / + # harmful_content). One line per dispatch so an operator + # can see the volume + breakdown at a glance. + logger.info( + "Compensating governance triggered: hook=%s, count=%d, validators=[%s]", + audit.hook.value, + len(disabled), + ", ".join(validators), + ) + + self._compensator.submit( + rules=disabled, + data=_compensation_data_for_hook(context), + hook=audit.hook.value, + src_timestamp=audit.timestamp.isoformat(), + agent_name=audit.agent_name, + runtime_id=audit.runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Failed to dispatch compensating governance call: %s", exc + ) + + def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: + """Emit per-rule and hook-summary events to the injected audit manager. + + No-op when no audit manager was supplied at construction. The + per-runtime :class:`AuditManager` handles sink-level circuit + breaking; emission errors stay there and never break evaluation. + """ + manager = self._audit_manager + if manager is None: + return + + hook_name = audit.hook.name + + # ``guardrail_fallback`` rules are server-traced: the agent POSTs + # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and + # the governance-server emits the audit event with the actual + # validator verdict. Emitting a Python-side ``rule_evaluation`` + # event here would produce a duplicate trace carrying no + # verdict, so filter these rules out of every event the Python + # evaluator emits (per-rule AND the hook summary's counts). + emittable = [ + ev for ev in audit.evaluations + if not self._is_guardrail_fallback_rule(ev.rule_id) + ] + + for evaluation in emittable: + manager.emit_rule_evaluation( + policy_id=evaluation.rule_id, + rule_name=evaluation.rule_name, + pack_name=evaluation.pack_name, + hook=hook_name, + matched=evaluation.matched, + action=evaluation.action.value if evaluation.matched else "allow", + enforcement_mode=mode, + detail=evaluation.detail, + agent_name=audit.agent_name, + description=evaluation.description, + ) + + manager.emit_hook_summary( + hook=hook_name, + agent_name=audit.agent_name, + total_rules=len(emittable), + matched_rules=sum(1 for ev in emittable if ev.matched), + final_action=audit.final_action.value, + enforcement_mode=mode, + ) + + def _is_guardrail_fallback_rule(self, rule_id: str) -> bool: + """Return True if the rule is a UiPath-compensating fallback rule. + + Such rules carry a ``guardrail_fallback`` condition; their audit + trace is emitted by the governance-server in response to the + ``/runtime/govern`` POST, so the Python evaluator must not emit + a duplicate trace for them. + """ + rule = self._policy_index.get_rule(rule_id) + if rule is None: + return False + for check in rule.checks: + for cond in check.conditions: + if cond.operator == "guardrail_fallback": + return True + return False + + def _apply_enforcement_mode(self, raw_action: Action) -> Action: + """Apply enforcement mode to the raw action. + + In AUDIT mode: + - DENY becomes AUDIT (log but don't block) + - ESCALATE becomes AUDIT (log but don't escalate) + - AUDIT stays AUDIT + - ALLOW stays ALLOW + + In ENFORCE mode: + - All actions pass through unchanged + """ + if self._enforcement_mode == EnforcementMode.AUDIT: + if raw_action in (Action.DENY, Action.ESCALATE): + return Action.AUDIT + return raw_action + + def evaluate_before_agent( + self, + agent_input: str, + agent_name: str, + runtime_id: str, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + agent_input=agent_input, + model_name=model_name, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_agent( + self, + agent_output: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + agent_output=agent_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_before_model( + self, + model_input: str, + agent_name: str, + runtime_id: str, + messages: list[dict[str, Any]] | None = None, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + model_input=model_input, + model_name=model_name, + messages=messages or [], + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_model( + self, + model_output: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + model_output=model_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_tool_call( + self, + tool_name: str, + tool_args: dict[str, Any], + agent_name: str, + runtime_id: str, + session_state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate TOOL_CALL rules.""" + context = CheckContext( + hook=LifecycleHook.TOOL_CALL, + agent_name=agent_name, + runtime_id=runtime_id, + tool_name=tool_name, + tool_args=tool_args, + session_state=session_state or {}, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_tool( + self, + tool_name: str, + tool_result: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_TOOL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_TOOL, + agent_name=agent_name, + runtime_id=runtime_id, + tool_name=tool_name, + tool_result=tool_result, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: + """Evaluate a single rule against the context.""" + if not rule.checks: + # No checks = always matches (for audit-only rules) + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=True, + detail="Rule has no conditions (always matches)", + pack_name=rule.pack_name, + action=rule.action, + description=rule.description, + ) + + check_results: list[dict[str, Any]] = [] + any_check_matched = False + # Resolve the rule's action from the MATCHED checks so per-check + # `action` overrides take effect. ``Check.action`` defaults to the + # rule's action (see _yaml_to_index), so for rules without an + # override this equals ``rule.action`` exactly. Take the most + # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW), + # mirroring evaluate()'s cross-rule aggregation. + matched_action = Action.ALLOW + + for check in rule.checks: + matched, detail = self._evaluate_check(check, context) + check_results.append( + { + "matched": matched, + "detail": detail, + "action": check.action.value, + } + ) + if matched: + any_check_matched = True + if check.action == Action.DENY: + matched_action = Action.DENY + elif ( + check.action == Action.ESCALATE + and matched_action != Action.DENY + ): + matched_action = Action.ESCALATE + elif ( + check.action == Action.AUDIT + and matched_action == Action.ALLOW + ): + matched_action = Action.AUDIT + + # Surface the FIRST matched check's message; falls back to the + # first check's detail (empty string when none matched) for + # backward compatibility with rules that have a single check. + first_matched_detail = next( + (cr["detail"] for cr in check_results if cr["matched"]), + check_results[0]["detail"] if check_results else "", + ) + + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=any_check_matched, + detail=first_matched_detail, + pack_name=rule.pack_name, + action=matched_action if any_check_matched else Action.ALLOW, + description=rule.description, + check_results=check_results, + ) + + def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]: + """Evaluate a single check against the context.""" + if not check.conditions: + return True, "No conditions (always matches)" + + results = [] + for condition in check.conditions: + matched = self._evaluate_condition(condition, context) + results.append(matched) + + if check.logic == "any": + final_match = any(results) + else: # "all" is default + final_match = all(results) + + detail = check.message if final_match else "" + return final_match, detail + + def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool: + """Evaluate a single condition against the context.""" + field_value = self._get_field_value(condition.field, context) + result = self._apply_operator(condition.operator, field_value, condition.value) + + if condition.negate: + result = not result + + return result + + def _get_field_value(self, field: str, context: CheckContext) -> Any: + """Get a field value from the context.""" + parts = field.split(".") + + # Start with context + value: Any = context + + for part in parts: + if hasattr(value, part): + value = getattr(value, part) + elif isinstance(value, dict) and part in value: + value = value[part] + else: + return None + + return value + + def _apply_operator( + self, operator: str, field_value: Any, check_value: Any + ) -> bool: + """Apply an operator to compare field value against check value.""" + # Handle existence checks before the None check + if operator == "exists": + return field_value is not None + if operator == "not_exists": + return field_value is None + + # guardrail_fallback fires only when the guardrail is mapped to + # UiPath but its policy is disabled. Config travels in + # ``check_value``; the rule's ``field`` is unused (so + # ``field_value`` is ``None`` here, which is expected — we must + # special-case this before the generic ``None`` short-circuit + # below). + if operator == "guardrail_fallback": + cfg = check_value if isinstance(check_value, dict) else {} + return bool(cfg.get("mapped_to_uipath", False)) and not bool( + cfg.get("policy_enabled", True) + ) + + if field_value is None: + return False + + # Numeric operators don't need stringification — short-circuit + # before `str(field_value)` (expensive for dict / large payloads). + if operator in ("gt", "gte", "lt", "lte"): + try: + lhs = float(field_value) + rhs = float(check_value) + except (ValueError, TypeError): + return False + if operator == "gt": + return lhs > rhs + if operator == "gte": + return lhs >= rhs + if operator == "lt": + return lhs < rhs + return lhs <= rhs + + field_str = str(field_value) + + match operator: + case "equals" | "eq": + return field_str == str(check_value) + + case "not_equals" | "ne": + return field_str != str(check_value) + + case "contains": + return str(check_value).lower() in field_str.lower() + + case "not_contains": + return str(check_value).lower() not in field_str.lower() + + case "regex" | "matches": + compiled = _compile_regex(str(check_value)) + if compiled is None: + return False + return bool(compiled.search(field_str)) + + case "in_list": + if isinstance(check_value, list): + return field_str in check_value + return False + + case "not_in_list": + if isinstance(check_value, list): + return field_str not in check_value + return True + + case "vader_concern": + # VADER compound score <= threshold. + # check_value: dict like {"threshold": -0.3} (default -0.3) + return self._check_vader_concern(field_str, check_value) + + case "encoding_concern": + # chardet-backed encoding integrity check (A.7.4). + # check_value: dict with optional `min_confidence` (default 0.5) + # and `max_replacement_ratio` (default 0.05). + return self._check_encoding_concern(field_str, check_value) + + case "entropy_concern": + # Shannon entropy outside expected range (A.7.4). + # check_value: dict with optional `min` (default 1.5) and + # `max` (default 7.5) bits/byte. Stdlib only. + return self._check_entropy_concern(field_str, check_value) + + case "incident_concern": + # Categorical incident detection (A.8.4). + # check_value: dict with optional `categories` list + # (subset of safety_refusal/tool_failure/auth_failure/ + # quota_exceeded/hallucination). Default: all categories. + return self._check_incident_concern(field_str, check_value) + + case "commitment_concern": + # Customer commitment language detection (A.10.4). + # check_value: dict with optional `require_amount` (default + # True) and `require_deadline` (default False). Fires when + # a commitment verb co-occurs with the configured signals. + return self._check_commitment_concern(field_str, check_value) + + case _: + logger.debug("Unknown operator: %s", operator) + return False + + @staticmethod + def _check_vader_concern(text: str, params: Any) -> bool: + """Return True if VADER compound score on `text` is <= threshold. + + Args: + text: Text to analyse. + params: Either a dict with `threshold` key, or a numeric threshold + directly. Default threshold is -0.3 (clearly-negative). + + Returns: + True iff vaderSentiment is available AND compound score <= threshold. + Returns False on empty input or if the library is not installed — + sentiment checks no-op rather than crash. + """ + if not text or not text.strip(): + return False + + analyzer = _get_vader_analyzer() + if analyzer is None: + return False + + if isinstance(params, dict): + threshold = float(params.get("threshold", -0.3)) + else: + try: + threshold = float(params) + except (TypeError, ValueError): + threshold = -0.3 + + try: + compound = float(analyzer.polarity_scores(text)["compound"]) + except Exception as exc: # pragma: no cover - defensive + logger.debug("VADER analysis failed: %s", exc) + return False + + return compound <= threshold + + @staticmethod + def _check_encoding_concern(text: str, params: Any) -> bool: + r"""Return True if `text` shows encoding integrity issues. + + Sums multiple deterministic corruption signals against text length: + - U+FFFD replacement characters (already-decoded lossy text) + - Literal ``�`` escape sequences carried through a JSON + / repr layer rather than being decoded + - Literal ``\xHH`` hex escapes (raw bytes leaked into a string) + - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``é``, ``’``) + If the corruption ratio exceeds ``max_replacement_ratio`` the + check fires. chardet (when installed) is consulted as a + secondary low-confidence signal. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + min_confidence = float(params.get("min_confidence", 0.5)) + max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05)) + min_corruption_events = int(params.get("min_corruption_events", 2)) + + length = max(len(text), 1) + + replacement_chars = text.count("�") + literal_ufffd_escapes = text.count("\\ufffd") + hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text)) + mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS) + + # Absolute count of distinct corruption *events* (one per + # U+FFFD, one per literal escape sequence, one per mojibake + # bigram). Even diluted by a lot of clean text, a few of these + # in production output is a strong signal. + corruption_events = ( + replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams + ) + if corruption_events >= min_corruption_events: + return True + + # Ratio-based fallback for cases below the absolute floor: still + # catches very short payloads where a single corruption char is + # disproportionate. + # Weight each event by its source-char span so denser corruption + # in shorter text trips the ratio sooner: + # U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars, + # mojibake bigram = 2 chars. + corruption_chars = ( + replacement_chars + + 6 * literal_ufffd_escapes + + 4 * hex_escapes + + 2 * mojibake_bigrams + ) + if corruption_chars / length > max_replacement_ratio: + return True + + # Secondary: chardet on the encoded bytes. For pure str input + # this almost always reports high UTF-8/ASCII confidence (the + # branch is intentionally permissive), but it does catch bytes + # routed through `repr()` or `__str__` of a `bytes` object that + # chardet recognises as a non-UTF8 encoding with low confidence. + chardet = _get_chardet() + if chardet is None: + return False + try: + detection = chardet.detect(text.encode("utf-8", errors="replace")) + confidence = float(detection.get("confidence") or 0.0) + except Exception as exc: # pragma: no cover - defensive + logger.debug("chardet detection failed: %s", exc) + return False + + return confidence < min_confidence + + @staticmethod + def _check_entropy_concern(text: str, params: Any) -> bool: + """Return True if Shannon entropy of `text` is outside an expected range. + + Stdlib-only. Entropy is computed in bits per symbol over byte + frequencies. English prose typically lands ~3.5–4.5 bits/byte; + binary noise approaches 8 bits/byte; constant/repetitive text + approaches 0. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + lo = float(params.get("min", 1.5)) + hi = float(params.get("max", 7.5)) + + data = text.encode("utf-8", errors="replace") + total = len(data) + if total == 0: + return False + + counts = Counter(data) + entropy = 0.0 + for c in counts.values(): + p = c / total + entropy -= p * math.log2(p) + + return entropy < lo or entropy > hi + + @staticmethod + def _check_incident_concern(text: str, params: Any) -> bool: + """Return True if `text` matches any configured incident pattern (A.8.4). + + Categories: safety_refusal, tool_failure, auth_failure, + quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to + restrict; default scans all categories. + """ + if not text or not text.strip(): + return False + + if isinstance(params, dict): + requested = params.get("categories") + else: + requested = None + + if not requested: + categories = list(_INCIDENT_PATTERNS.keys()) + else: + categories = [c for c in requested if c in _INCIDENT_PATTERNS] + + for category in categories: + for pattern in _INCIDENT_PATTERNS[category]: + if pattern.search(text): + return True + return False + + @staticmethod + def _check_commitment_concern(text: str, params: Any) -> bool: + """Return True if `text` carries customer-commitment language (A.10.4). + + OR semantics: a commitment-verb match always fires; when + ``require_amount`` is true, a currency-anchored amount alone also + fires; when ``require_deadline`` is true, a deadline phrase alone + also fires. With both flags false the rule matches on verb only + (verb-only mode). + + The verb pattern covers first-person promise verbs *and* proposal + / SOW commitment markers ("Cost: $X", "fixed scope", + "Deliverables", "Timeline: N days", "I propose"). The amount + pattern requires a currency marker adjacent to the number so URL + fragments don't false-positive. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + require_amount = bool(params.get("require_amount", True)) + require_deadline = bool(params.get("require_deadline", False)) + + verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text)) + + # Verb-only mode: neither supporting signal is enabled. + if not require_amount and not require_deadline: + return verb_match + + amount_match = require_amount and bool( + _COMMITMENT_AMOUNT_FALLBACK.search(text) + ) + deadline_match = require_deadline and bool( + _COMMITMENT_DEADLINE_PATTERN.search(text) + ) + return verb_match or amount_match or deadline_match diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py new file mode 100644 index 0000000..369c1b3 --- /dev/null +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -0,0 +1,312 @@ +"""Compensating governance for disabled centralized guardrails. + +When a ``guardrail_fallback`` rule fires (the guardrail is mapped to +UiPath but the centralized policy is disabled), the framework asks the +governance-server to run the real guardrail check via its +``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint. + +This module owns only the **local concerns**: a bounded background +pool that schedules the call without blocking the agent hook, and a +trace-id capture that runs on the caller thread before the worker hop +(the worker has no OpenTelemetry context). + +The actual HTTP call — URL composition, auth, headers, JSON +serialisation, env-backed job-context auto-fill — is the +:class:`uipath.core.governance.GovernanceCompensationProvider`'s job. +Callers inject a concrete provider implementation, and this module +just builds the :class:`GovernRequest` wire model and hands it off. + +The call is **fire-and-forget**: the server runs the guardrail AND +writes the audit trace from its side. The agent doesn't inspect the +response — it only cares about whether the call reached the server. + +The compensator is **instance-scoped**: each :class:`GovernanceRuntime` +owns its own pool and semaphore. ``uipath eval`` parallel runtimes +don't share workers, queue slots, or saturation state — one runtime's +spam can't silently drop another's compensation calls. + +The compensator does **not** read host env vars and does not resolve +trace ids itself. It propagates the caller's ``contextvars`` (which +hold the live OTel span) across the worker-thread hop via +:func:`contextvars.copy_context`, so the provider can resolve trace +context at HTTP-call time inside the captured context. +""" + +from __future__ import annotations + +import atexit +import contextvars +import logging +import threading +import weakref +from concurrent.futures import ThreadPoolExecutor +from typing import Any + +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, +) + +logger = logging.getLogger(__name__) + + +# ---------------------------------------------------------------------------- +# Process-wide cleanup machinery +# +# One ``atexit`` hook walks a ``WeakSet`` of live compensators on exit and +# closes each. Bounded atexit registrations (N runtimes → 1 hook, not N) and +# weakref tracking so a disposed compensator can be GC'd. Same pattern as +# :class:`uipath.runtime.governance._audit.base.AuditManager`. +# ---------------------------------------------------------------------------- + +_live_compensators: weakref.WeakSet[GuardrailCompensator] = weakref.WeakSet() +_atexit_registered = False +_atexit_lock = threading.Lock() + + +def _process_cleanup_compensators() -> None: + """Process-exit handler: close every live compensator.""" + for compensator in list(_live_compensators): + try: + compensator.close() + except Exception as exc: # noqa: BLE001 - exit cleanup must not raise + logger.debug("Compensator process cleanup error: %s", exc) + + +def _register_compensator_for_cleanup(compensator: GuardrailCompensator) -> None: + """Add ``compensator`` to the cleanup set + ensure atexit is wired once.""" + global _atexit_registered + _live_compensators.add(compensator) + if _atexit_registered: + return + with _atexit_lock: + if not _atexit_registered: + atexit.register(_process_cleanup_compensators) + _atexit_registered = True + + +# ---------------------------------------------------------------------------- +# Stateless helpers +# ---------------------------------------------------------------------------- + + +def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: + """Return per-rule metadata for each fired guardrail-fallback rule. + + A guardrail rule fires only when it is mapped to UiPath + (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) — + see the ``guardrail_fallback`` operator. The validator name (e.g. + ``pii_detection``) is read from the rule's ``guardrail_fallback`` + check config and used as the validator on the compensating call. + + One :class:`FiredRule` entry is emitted per matching + ``guardrail_fallback`` condition. Rules in this codebase declare a + single fallback condition each, so the returned list has one entry + per fired rule in practice; multi-condition rules would emit more + than one entry sharing the same ``rule_id``. + """ + out: list[FiredRule] = [] + for ev in audit.evaluations: + if not ev.matched: + continue + rule = policy_index.get_rule(ev.rule_id) + if rule is None: + continue + for check in rule.checks: + for cond in check.conditions: + if cond.operator != "guardrail_fallback": + continue + if not isinstance(cond.value, dict): + continue + # The ``guardrail_fallback`` operator at evaluation time + # only matches when ``mapped_to_uipath=True`` AND + # ``policy_enabled=False``. We re-check here defensively + # so a future code path that bypasses the evaluator (or + # a multi-condition rule that fired on a sibling check) + # can't trigger a compensation call for a guardrail + # that isn't actually disabled. + if not bool(cond.value.get("mapped_to_uipath", False)): + continue + if bool(cond.value.get("policy_enabled", True)): + continue + validator = str(cond.value.get("validator", "")) + if validator: + out.append( + FiredRule( + rule_id=ev.rule_id, + rule_name=ev.rule_name, + pack_name=getattr(rule, "pack_name", "") or "", + validator=validator, + ) + ) + return out + + +def _validators(rules: list[FiredRule]) -> list[str]: + """Distinct validator names from the fired rules, preserving order.""" + return list(dict.fromkeys(r.validator for r in rules if r.validator)) + + +# ---------------------------------------------------------------------------- +# GuardrailCompensator +# ---------------------------------------------------------------------------- + + +class GuardrailCompensator: + """Instance-scoped compensating-governance dispatcher. + + Each :class:`GovernanceRuntime` constructs one. Owns: + + - A :class:`ThreadPoolExecutor` (default 4 workers) that runs the + ``/runtime/govern`` POST off the agent's hook thread. + - A :class:`threading.BoundedSemaphore` (default cap = workers × 4) + that bounds total in-flight submissions (running + queued) so a + misbehaving agent firing compensation faster than the server can + absorb can't grow memory without limit. Saturated submissions are + dropped with a warning. + + Process exit cancels queued work via a single process-level atexit + handler (see :func:`_process_cleanup_compensators`); running tasks + finish bounded by the provider's HTTP timeout. + + Fire-and-forget: :meth:`submit` returns immediately. The actual HTTP + work is delegated to :meth:`GovernanceCompensationProvider.compensate` + — this class never touches URL/headers/auth/JSON itself. + """ + + _DEFAULT_MAX_WORKERS = 4 + # Queue depth multiplier — total in-flight cap = max_workers × this. + _INFLIGHT_OVERSUBSCRIPTION = 4 + + def __init__( + self, + provider: GovernanceCompensationProvider, + *, + max_workers: int = _DEFAULT_MAX_WORKERS, + inflight_oversubscription: int = _INFLIGHT_OVERSUBSCRIPTION, + ) -> None: + """Construct a compensator bound to one provider. + + The compensator does not carry a trace id. Trace-id resolution + is the provider's responsibility at HTTP-call time. To preserve + live OTel context across the thread-pool hop (worker threads + don't inherit ``contextvars``), :meth:`submit` runs the worker + callable inside a snapshot captured via + :func:`contextvars.copy_context` — so the caller's OTel span is + still visible when the provider runs on the worker. + + Args: + provider: The :class:`GovernanceCompensationProvider` that + actually fires the ``/runtime/govern`` POST. + max_workers: Concurrent worker threads in the pool. + inflight_oversubscription: How deep the work queue grows + before saturated submissions get dropped. Total cap is + ``max_workers * inflight_oversubscription``. + """ + self._provider = provider + self._inflight_cap = max_workers * inflight_oversubscription + self._pool = ThreadPoolExecutor( + max_workers=max_workers, + thread_name_prefix="governance-compensation", + ) + self._inflight = threading.BoundedSemaphore(self._inflight_cap) + _register_compensator_for_cleanup(self) + + def submit( + self, + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, + ) -> None: + """Schedule a /runtime/govern call on the bounded background pool. + + Fire-and-forget. Returns immediately; the call runs on a worker + thread. When the in-flight queue is saturated the call is + dropped with a warning and the agent continues. + + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; + the validators sent to the guardrail API are derived from it. + + The current :mod:`contextvars` context (which carries the live + OpenTelemetry span) is captured here and re-applied inside the + worker via :meth:`contextvars.Context.run`. This lets the + provider see the live OTel context on the worker thread — + without the snapshot the worker would inherit an empty context + and the provider could only resolve env-based trace ids. + + Never raises — including when the pool has already been shut down. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + if not self._inflight.acquire(blocking=False): + logger.warning( + "Compensation pool saturated (>%d in flight); dropping call " + "(validators=[%s])", + self._inflight_cap, + ", ".join(validators), + ) + return + + request = GovernRequest( + validators=validators, + rules=rules, + data=data, + hook=hook, + trace_id="", # the provider fills this from the captured context + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, + ) + + provider = self._provider + inflight = self._inflight + # Snapshot the caller's contextvars (OTel span lives in there + # for Python OTel >= 1.x). The worker runs inside this snapshot + # so the provider sees the live span at HTTP-call time. + ctx = contextvars.copy_context() + + def _run() -> None: + try: + provider.compensate(request) + except Exception as exc: # noqa: BLE001 - fail-open by contract + logger.warning( + "Compensation worker failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + finally: + inflight.release() + + try: + self._pool.submit(ctx.run, _run) + except RuntimeError as exc: + # Pool was shut down (atexit, dispose, or test teardown) — + # release the semaphore slot we took and log; never raise. + self._inflight.release() + logger.warning( + "Compensation pool unavailable (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + + def close(self) -> None: + """Cancel queued tasks. Running tasks finish bounded by the provider HTTP timeout. + + ``wait=False`` returns immediately so caller / process shutdown + isn't held up; ``cancel_futures=True`` drops anything not yet + running. Idempotent — calling close on an already-closed pool + is a logged no-op. + """ + try: + self._pool.shutdown(wait=False, cancel_futures=True) + except Exception as exc: # noqa: BLE001 - shutdown must not raise + logger.debug("Compensator shutdown error: %s", exc) diff --git a/src/uipath/runtime/governance/native/loader.py b/src/uipath/runtime/governance/native/loader.py deleted file mode 100644 index 5b45d21..0000000 --- a/src/uipath/runtime/governance/native/loader.py +++ /dev/null @@ -1,342 +0,0 @@ -"""Policy pack loader. - -Per-runtime policy loading: a :class:`PolicyLoader` instance owns one -provider plus the cached PolicyIndex and prefetch state. The runtime -never contacts the governance backend directly; the provider owns the -wire / transport (auth, retries, telemetry). When no provider is -supplied, or the provider raises / returns an empty body / yields zero -rules, the loader returns an empty PolicyIndex and the agent runs -without any rules. - -The loader holds **no module-level state**. ``uipath eval`` can spin up -multiple ``GovernanceRuntime`` instances in the same process and each -gets its own loader with its own provider, cache, and selector — no -cross-instance interference. -""" - -from __future__ import annotations - -import logging -import threading -import time -from collections import Counter - -import yaml -from uipath.core.governance import ( - EnforcementMode, - GovernancePolicyProvider, - PolicyContext, -) - -from uipath.runtime.governance.native._yaml_to_index import build_policy_index_from_yaml -from uipath.runtime.governance.native.models import PolicyIndex - -logger = logging.getLogger(__name__) - - -class PolicyLoader: - """Instance-scoped policy loader bound to one provider. - - Owns the policy-index cache, prefetch coordination, and the - conversational selector for a single :class:`GovernanceRuntime` - instance. Multiple loaders coexist in the same process without - clobbering each other. - - Typical lifecycle:: - - loader = PolicyLoader(provider, is_conversational=False) - loader.prefetch() # non-blocking, optional - index = loader.get_policy_index() # cached after first call - - When ``provider`` is ``None``, every load returns an empty - PolicyIndex without invoking anything. - """ - - # Upper bound on how long :meth:`get_policy_index` waits for an - # in-flight prefetch before falling back to an empty PolicyIndex. - # The provider owns its own transport timeouts; this is the runtime's - # ceiling on blocking the first hook fire. - _PROVIDER_WAIT_SECONDS = 10.0 - - def __init__( - self, - provider: GovernancePolicyProvider | None, - *, - is_conversational: bool | None = None, - ) -> None: - """Construct a per-runtime policy loader. - - Args: - provider: Policy source. ``None`` means no policies will be - loaded — the loader yields an empty PolicyIndex. - is_conversational: Whether the hosted agent is - conversational. Travels in the :class:`PolicyContext` - so the provider can select the matching policy view. - ``None`` leaves the selector unset — the provider - applies its default. - """ - self._provider = provider - self._is_conversational = is_conversational - self._policy_index: PolicyIndex | None = None - # Enforcement mode supplied by the provider on the most recent - # load. ``None`` until the first load lands (or whenever the - # provider omits a mode); :attr:`enforcement_mode` returns - # ``AUDIT`` in that case. Instance-scoped so parallel runtimes - # (e.g. ``uipath eval``) don't clobber each other. - self._enforcement_mode: EnforcementMode | None = None - # ``_prefetch_event`` is set once the background load finishes - # (success OR failure); callers of ``get_policy_index`` wait on - # it. ``_prefetch_lock`` guards the start-once semantics so - # concurrent ``prefetch`` calls don't kick off duplicate threads. - self._prefetch_event: threading.Event | None = None - self._prefetch_lock = threading.Lock() - - def prefetch(self) -> None: - """Kick off a background load of the policy index. - - Non-blocking. Designed to be called as early as possible (at - :class:`GovernanceRuntime` init) so the policy fetch overlaps - with the rest of agent setup. The result lands in this loader's - cache; :meth:`get_policy_index` waits on the prefetch when it's - in flight. - - Idempotent: subsequent calls while the first is running are - no-ops, and calls after completion are no-ops. No-op when no - provider is supplied — there's nothing to fetch. - """ - if self._provider is None: - return - - with self._prefetch_lock: - if self._policy_index is not None: - return # already loaded - if self._prefetch_event is not None: - return # already in flight - event = threading.Event() - self._prefetch_event = event - - def _worker() -> None: - try: - loaded = self.load_policy_index() - except Exception as exc: # noqa: BLE001 - logged; first hook will retry sync - logger.warning("Policy prefetch failed: %s", exc) - else: - with self._prefetch_lock: - # Only publish if we're still the live prefetch. - # ``clear_cache`` nulls ``_prefetch_event`` to retire - # an in-flight worker; in that case the loaded value - # belongs to a stale generation and must be dropped - # rather than clobbering the just-cleared state. - if self._prefetch_event is event: - self._policy_index = loaded - finally: - event.set() - - threading.Thread( - target=_worker, - name="governance-policy-prefetch", - daemon=True, - ).start() - - def get_policy_index(self) -> PolicyIndex: - """Get the cached policy index, loading if necessary. - - Resolution order on first call: - 1. If a prefetch (see :meth:`prefetch`) is in flight, wait - for it to complete (bounded by ``_PROVIDER_WAIT_SECONDS``). - 2. Synchronously call :meth:`load_policy_index` (which invokes - the provider). - 3. Empty PolicyIndex when no provider is supplied or the - provider fails / returns nothing. - - Result is cached for the loader's lifetime; per-hook evaluation - never touches the network. Call :meth:`clear_cache` to force a - refetch (mainly for tests). - """ - if self._policy_index is not None: - return self._policy_index - - event = self._prefetch_event - if event is not None: - completed = event.wait(timeout=self._PROVIDER_WAIT_SECONDS) - if completed and self._policy_index is not None: - return self._policy_index - if not completed: - # Timeout: cache an empty index so we don't re-wait the - # full timeout on every subsequent hook. - logger.warning( - "Policy prefetch did not complete in %.1fs; " - "agent will run without any policies", - self._PROVIDER_WAIT_SECONDS, - ) - self._policy_index = PolicyIndex() - return self._policy_index - - # Completed but produced no PolicyIndex — the worker hit an - # unexpected error. Do NOT cache the empty result: caching - # would permanently disable governance for the loader's - # lifetime even though a later prefetch / clear_cache could - # still recover. Return an empty index for this call only. - logger.warning( - "Policy prefetch completed but produced no PolicyIndex " - "(see prior WARN for the root cause); agent will run " - "without any policies for this call" - ) - return PolicyIndex() - - # No prefetch was started (direct callers / tests). Sync load. - self._policy_index = self.load_policy_index() - return self._policy_index - - def load_policy_index(self) -> PolicyIndex: - """Synchronously load and parse the policy index. - - Returns: - PolicyIndex parsed from the provider response. Empty - PolicyIndex when no provider is supplied, the provider - raises, the YAML is malformed, or the response yields - zero rules. - """ - start = time.perf_counter() - - index = ( - self._load_from_provider(self._provider) - if self._provider is not None - else None - ) - - if index is not None: - self._log_index_summary(index) - logger.info( - "Policy index ready: source=provider, total_ms=%.1f", - (time.perf_counter() - start) * 1000, - ) - return index - - reason = self._empty_index_reason() - logger.info( - "Policy index ready: source=empty (%s), total_ms=%.1f", - reason, - (time.perf_counter() - start) * 1000, - ) - return PolicyIndex() - - def _empty_index_reason(self) -> str: - """Diagnose why policy loading produced nothing.""" - if self._provider is None: - return "no policy provider supplied" - return "provider returned no policies (error / empty body / zero rules)" - - def _load_from_provider( - self, provider: GovernancePolicyProvider - ) -> PolicyIndex | None: - """Fetch and parse the policy index via the supplied provider. - - Applies the provider-supplied enforcement mode as a side effect. - Returns ``None`` when the provider raises, when the YAML is - malformed, or when the resulting index has no rules — caller - returns an empty PolicyIndex in those cases. - - Takes ``provider`` as a parameter (rather than reading - ``self._provider``) so the type system can prove the call site - is non-None — :meth:`load_policy_index` guards on ``None`` and - passes the narrowed value through. - """ - start = time.perf_counter() - - ctx = PolicyContext(is_conversational=self._is_conversational) - - try: - response = provider.get_policy(ctx) - except Exception as exc: # noqa: BLE001 - fail-open by contract - logger.warning("Policy provider get_policy failed: %s", exc) - return None - - if response.mode is not None: - self._enforcement_mode = response.mode - logger.info("Enforcement mode set from provider: %s", response.mode.value) - - if not response.policies: - logger.warning( - "Policy provider returned empty policies field; " - "agent will run without any policies" - ) - return None - - try: - index = build_policy_index_from_yaml(response.policies) - except yaml.YAMLError as exc: - logger.warning("Policy YAML from provider was malformed: %s", exc) - return None - except Exception as exc: # noqa: BLE001 - never let load break agent startup - logger.warning("Failed to build PolicyIndex from provider YAML: %s", exc) - return None - - if index.total_rules == 0: - logger.warning( - "Policy YAML from provider yielded zero rules; " - "agent will run without any policies" - ) - return None - - elapsed_ms = (time.perf_counter() - start) * 1000 - logger.info( - "Loaded policy index from provider: packs=%s, rules=%d, elapsed_ms=%.1f", - index.pack_names, - index.total_rules, - elapsed_ms, - ) - return index - - def _log_index_summary(self, index: PolicyIndex) -> None: - """Log summary of loaded policy index.""" - hook_counts: Counter[str] = Counter() - for rule in index.all_rules: - hook_counts[rule.hook.value] += 1 - - logger.debug( - "Policy packs: %s, total rules: %d, by hook: %s", - index.pack_names, - index.total_rules, - dict(hook_counts), - ) - - @property - def enforcement_mode(self) -> EnforcementMode: - """Active enforcement mode for this loader. - - The canonical source is whatever the policy provider supplied on - the most recent load. Until that load lands (or if the provider - omits a mode), the default is :attr:`EnforcementMode.AUDIT` — - evaluate and log without blocking. Defaulting to AUDIT avoids - the chicken-and-egg where a DISABLED default would short-circuit - evaluation before the background load could ever opt the tenant - in. - """ - return ( - self._enforcement_mode - if self._enforcement_mode is not None - else EnforcementMode.AUDIT - ) - - @property - def available_packs(self) -> list[str]: - """Pack names from the currently loaded policy index. - - Returns whatever the provider supplied on the most recent load. - Empty list if no index has been loaded yet. - """ - if self._policy_index is None: - return [] - return self._policy_index.pack_names - - def clear_cache(self) -> None: - """Clear the cached policy index and any in-flight prefetch state. - - Next call to :meth:`get_policy_index` will reload from the - provider. - """ - with self._prefetch_lock: - self._policy_index = None - self._prefetch_event = None - logger.debug("Policy index cache cleared") diff --git a/src/uipath/runtime/governance/native/models.py b/src/uipath/runtime/governance/native/models.py index 125e75e..b8d4adc 100644 --- a/src/uipath/runtime/governance/native/models.py +++ b/src/uipath/runtime/governance/native/models.py @@ -74,12 +74,16 @@ class Rule: @dataclass class CheckContext: - """Context passed to rule evaluation.""" + """Context passed to rule evaluation. + + ``trace_id`` is intentionally absent — trace correlation is + resolved by the wire-side provider at HTTP-call time, not carried + through the runtime evaluation context. + """ hook: LifecycleHook agent_name: str runtime_id: str - trace_id: str # Content fields (populated based on hook) agent_input: str = "" diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index c8f9dd9..ab3d177 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -1,36 +1,45 @@ """Governance runtime wrapper. -Wraps a :class:`UiPathRuntimeProtocol` delegate so policy data is sourced -through a :class:`GovernancePolicyProvider`. The provider owns the wire -/ transport (auth, retries, telemetry); the runtime only consumes the -parsed :class:`PolicyResponse`. There is no direct backend fallback — -when ``policy_provider`` is ``None`` the agent runs without any -governance policies. - -The wiring layer (uipath CLI) decides whether to construct -``GovernanceRuntime`` at all (feature flag, project config, etc.) and -passes ``is_conversational`` explicitly when it knows the agent type. -The runtime layer does not introspect the delegate's private attributes -to discover that. - -**Staging caveat — policy loading only, no enforcement yet.** This -module is the policy-loading scaffold: ``__init__`` constructs an -instance-scoped :class:`PolicyLoader` and kicks off a background -prefetch. ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` are -pure passthroughs — no per-hook policy evaluation runs. The evaluator -and framework adapter wiring that consumes the loader's policy index -lands in a follow-up slice. Customers constructing -:class:`GovernanceRuntime` today get policy loading without policy -enforcement; this is intentional and will change when the evaluator -slice merges. +Wraps a :class:`UiPathRuntimeProtocol` delegate and carries a resolved +policy snapshot — a :class:`PolicyIndex` and :class:`EnforcementMode` +supplied by the caller. The wrapper performs no I/O at construction, +holds no background thread, retains no policy provider, and reads no +host environment variables. + +The caller (typically the host CLI) is expected to: + +- ``await provider.get_policy_async(PolicyContext(...))`` itself, +- compile the response YAML via + :func:`uipath.runtime.governance.native.build_policy_index_from_yaml`, +- skip wrapping entirely when the response mode is + :attr:`EnforcementMode.DISABLED`, +- pass the resolved ``PolicyIndex`` and ``EnforcementMode`` into the + constructor. + +The wrapper owns the BEFORE_AGENT / AFTER_AGENT lifecycle boundary +when an evaluator is supplied at construction. Framework adapters +intentionally skip chain-level events so nested chain runs don't fire +duplicate boundary evaluations; the runtime layer is the unambiguous +"one invocation = one boundary" point, so it owns those hooks. Per-step +hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) are fired by +adapters that observe per-step events. + +Trace-id is intentionally **not** carried on this wrapper. The +governance compensator captures the live OTel context across the +thread-pool hop via :func:`contextvars.copy_context`, and the +injected provider resolves the canonical trace id at HTTP-call time. +The runtime layer is fully env-free for this path. """ from __future__ import annotations +import json import logging from typing import Any, AsyncGenerator -from uipath.core.governance import GovernancePolicyProvider +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.serialization import serialize_object from uipath.runtime.base import ( UiPathExecuteOptions, @@ -38,89 +47,166 @@ UiPathStreamOptions, ) from uipath.runtime.events import UiPathRuntimeEvent -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import PolicyIndex from uipath.runtime.result import UiPathRuntimeResult from uipath.runtime.schema import UiPathRuntimeSchema logger = logging.getLogger(__name__) -class GovernanceRuntime: +def _serialize_payload(payload: Any) -> str: + """Serialize an agent input / output to a string for evaluator checks. + + The native evaluator's BEFORE_AGENT / AFTER_AGENT checks scan a + flat string. ``None`` becomes ``""``, ``str`` passes through (so + regex / sentiment checks don't see JSON quotes around the bare + text), and everything else is normalized via + :func:`uipath.core.serialization.serialize_object` (handles + Pydantic / dataclass / datetime / nested structures) and then + JSON-encoded. + """ + if payload is None: + return "" + if isinstance(payload, str): + return payload + try: + return json.dumps(serialize_object(payload)) + except Exception: # noqa: BLE001 — last-resort string fallback + return str(payload) + + +class UiPathGovernedRuntime: """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate. - Constructs an instance-scoped :class:`PolicyLoader` bound to the - supplied provider and kicks off a non-blocking prefetch so the - policy pack overlaps with the rest of agent setup. When - ``policy_provider`` is ``None``, the loader yields an empty - PolicyIndex and the agent runs without any governance policies for - the lifetime of this instance. - - **Policy loading only — no enforcement yet.** ``execute`` / ``stream`` - / ``get_schema`` / ``dispose`` are passthroughs to the delegate; no - per-hook policy evaluation runs in this slice. The evaluator and - framework adapter wiring that consumes the loader's policy index is - staged separately. + Holds a caller-resolved :class:`PolicyIndex` and + :class:`EnforcementMode` for the lifetime of the instance. + ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` forward to + the delegate. + + When ``evaluator`` is supplied, :meth:`execute` and :meth:`stream` + fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after a + successful return. Without an evaluator the wrapper is a pure + pass-through. """ def __init__( self, delegate: UiPathRuntimeProtocol, - policy_provider: GovernancePolicyProvider | None, + policy_index: PolicyIndex, + enforcement_mode: EnforcementMode, *, - is_conversational: bool | None = None, + evaluator: GovernanceEvaluator | None = None, + agent_name: str = "", + runtime_id: str = "", ): - """Initialize the governance runtime. + """Initialize the governance runtime with a resolved policy snapshot. Args: delegate: The wrapped runtime to forward execution to. - policy_provider: Source of the policy pack. ``None`` means - no policies will be loaded — the agent runs without - governance for the lifetime of this instance. - is_conversational: Whether the hosted agent is - conversational. Forwarded into the provider's - :class:`PolicyContext` so it can pick the right policy - view (conversational vs autonomous). ``None`` (default) - leaves the selector unset — the provider applies its - default. The wiring layer (uipath CLI) is expected to - pass the concrete value when it knows the agent type. + policy_index: Resolved :class:`PolicyIndex` built from the + provider's :class:`PolicyResponse`. Pass an empty + ``PolicyIndex()`` to attach the wrapper without any + rules (useful when the wrapper exists for audit + emission only). + enforcement_mode: Resolved :class:`EnforcementMode` from + the provider's :class:`PolicyResponse`. The caller is + expected to skip wrapping entirely when the response + mode is :attr:`EnforcementMode.DISABLED`; this + constructor does not check. + evaluator: Optional :class:`GovernanceEvaluator` that + drives BEFORE_AGENT / AFTER_AGENT inside + :meth:`execute` / :meth:`stream`. When ``None`` the + wrapper is a pure passthrough — the caller is expected + to fire those evaluations itself. + agent_name: Name of the agent (the runtime's entrypoint). + Passed through to the evaluator's hook methods. + runtime_id: Runtime-instance id (conversation id, job id, + or a synthetic per-run id). Passed through so + per-runtime state routes cleanly. """ self._delegate = delegate - self._loader = PolicyLoader( - policy_provider, - is_conversational=is_conversational, - ) - self._loader.prefetch() - - @property - def loader(self) -> PolicyLoader: - """The instance-scoped policy loader. - - Exposed so adapters / evaluators wired into this runtime can - call :meth:`PolicyLoader.get_policy_index` at hook time. + self._policy_index = policy_index + self._enforcement_mode = enforcement_mode + self._evaluator = evaluator + self._agent_name = agent_name + self._runtime_id = runtime_id + + def _fire_before_agent(self, input: Any) -> None: + """Fire BEFORE_AGENT when an evaluator is wired; otherwise no-op. + + ``GovernanceBlockException`` propagates — that's how + ENFORCE-mode DENY rules halt a run. Anything else is logged + and swallowed so a governance bug never breaks the agent. """ - return self._loader + if self._evaluator is None: + return + try: + self._evaluator.evaluate_before_agent( + agent_input=_serialize_payload(input), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 — never break a run on audit failure + logger.warning("BEFORE_AGENT governance evaluation failed: %s", exc) + + def _fire_after_agent(self, result: UiPathRuntimeResult) -> None: + """Fire AFTER_AGENT against ``result.output``. + + Same exception policy as :meth:`_fire_before_agent`. + """ + if self._evaluator is None: + return + try: + self._evaluator.evaluate_after_agent( + agent_output=_serialize_payload(result.output), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 + logger.warning("AFTER_AGENT governance evaluation failed: %s", exc) async def execute( self, input: dict[str, Any] | None = None, options: UiPathExecuteOptions | None = None, ) -> UiPathRuntimeResult: - """Execute the delegate. Policy evaluation hooks are wired separately.""" - return await self._delegate.execute(input, options=options) + """Execute the delegate, firing BEFORE_AGENT / AFTER_AGENT around it. + + AFTER_AGENT fires only on successful return — if the delegate + raises, there's no output to evaluate. + """ + self._fire_before_agent(input) + result = await self._delegate.execute(input, options=options) + self._fire_after_agent(result) + return result async def stream( self, input: dict[str, Any] | None = None, options: UiPathStreamOptions | None = None, ) -> AsyncGenerator[UiPathRuntimeEvent, None]: - """Stream events from the delegate. Hooks are wired separately.""" + """Stream events from the delegate, firing BEFORE_AGENT first. + + AFTER_AGENT fires once a :class:`UiPathRuntimeResult` event is + observed in the stream — that's the runtime's contract for + signalling a completed invocation. Intermediate state events + pass through untouched. + """ + self._fire_before_agent(input) async for event in self._delegate.stream(input, options=options): + if isinstance(event, UiPathRuntimeResult): + self._fire_after_agent(event) yield event async def get_schema(self) -> UiPathRuntimeSchema: - """Passthrough schema for the delegate.""" + """Forward schema lookup to the delegate.""" return await self._delegate.get_schema() async def dispose(self) -> None: - """Dispose the delegate.""" + """Forward disposal to the delegate.""" await self._delegate.dispose() diff --git a/tests/_helpers.py b/tests/_helpers.py deleted file mode 100644 index 2d3d924..0000000 --- a/tests/_helpers.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Shared test-only helpers. - -Keeps test concerns out of the production governance package: shared -stubs live here rather than inside the production modules. - -The enforcement-mode reset helper is gone because the mode is now -instance-scoped on :class:`PolicyLoader` — tests that want a clean -slate just construct a fresh loader instead of touching a global. -""" - -from __future__ import annotations - -import time - -from uipath.core.governance import PolicyContext, PolicyResponse - - -class StubPolicyProvider: - """Minimal in-memory :class:`GovernancePolicyProvider` for tests. - - Records every :class:`PolicyContext` it receives so tests can assert - on the selector that travelled to the provider. Either returns a - pre-canned :class:`PolicyResponse` or raises a pre-canned exception; - the optional ``slow`` knob lets tests exercise the prefetch-wait - path. - """ - - def __init__( - self, - response: PolicyResponse | None = None, - raises: Exception | None = None, - slow: float = 0.0, - ): - self.calls: list[PolicyContext] = [] - self._response = response - self._raises = raises - self._slow = slow - - def get_policy(self, context: PolicyContext) -> PolicyResponse: - self.calls.append(context) - if self._slow: - time.sleep(self._slow) - if self._raises is not None: - raise self._raises - assert self._response is not None - return self._response diff --git a/tests/conftest.py b/tests/conftest.py index ba76eca..a6c5cd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,7 @@ def temp_dir() -> Generator[str, None, None]: yield tmp_dir -# Governance state — provider, conversational selector, policy cache, -# enforcement mode — is owned by each :class:`PolicyLoader` instance, -# so no autouse cross-test reset is needed. Tests that want a clean -# slate just construct a fresh loader. +# Governance state is held inline on the :class:`UiPathGovernedRuntime` +# instance — the host passes a resolved :class:`PolicyIndex` + +# :class:`EnforcementMode` into the constructor, no module-level +# state, no cross-test reset needed. diff --git a/tests/test_audit_manager_lifecycle.py b/tests/test_audit_manager_lifecycle.py new file mode 100644 index 0000000..66db0b8 --- /dev/null +++ b/tests/test_audit_manager_lifecycle.py @@ -0,0 +1,194 @@ +"""Lifecycle tests for :class:`AuditManager`. + +Pins the production-readiness invariants of the audit manager: + +- Process cleanup uses a single ``atexit`` handler that walks a + ``WeakSet`` — so creating many managers in one process doesn't + bloat the atexit list and doesn't pin managers in memory. +- The fork-rebuild path is lock-protected: two threads in a + freshly-forked child can't both rebuild the queue/worker + concurrently. +""" + +from __future__ import annotations + +import gc +import os +import threading +from typing import Any +from unittest.mock import patch + +import pytest + +from uipath.runtime.governance._audit import base as audit_base +from uipath.runtime.governance._audit.base import AuditManager + + +def _bare_manager() -> AuditManager: + """Build a manager with no default sinks (no traces sink, no atexit-set add).""" + return AuditManager(async_mode=False, register_default_sinks=False) + + +# --------------------------------------------------------------------------- +# atexit accounting: one process-level hook, no per-instance accumulation +# --------------------------------------------------------------------------- + + +def test_default_managers_register_once_in_process_atexit() -> None: + """Creating N managers must NOT add N entries to interpreter atexit. + + Regression: per-instance ``atexit.register(self._atexit_cleanup)`` + grew the atexit list linearly and held a strong ref to each manager. + The fix routes everyone through one process-level cleanup hook. + """ + with patch.object(audit_base.atexit, "register") as mock_register: + # Reset module state so the assertion is deterministic + # regardless of test-order side effects. + audit_base._atexit_registered = False + try: + AuditManager(async_mode=False) # first → registers + AuditManager(async_mode=False) # second → reuses + AuditManager(async_mode=False) # third → reuses + assert mock_register.call_count == 1, ( + "Each AuditManager must NOT register its own atexit handler" + ) + finally: + # Drop test managers from the cleanup set before leaving. + audit_base._live_managers.clear() + + +def test_register_default_sinks_false_skips_cleanup_set() -> None: + """Bare managers (tests) are not tracked for process cleanup.""" + m = _bare_manager() + assert m not in audit_base._live_managers + + +def test_disposed_manager_can_be_garbage_collected() -> None: + """The WeakSet must NOT keep a disposed manager alive. + + Regression: per-instance atexit held a strong ref → disposed + managers leaked until process exit. With ``WeakSet`` + a single + process hook, dropping the last reference lets the manager GC. + """ + import weakref + + manager = AuditManager(async_mode=False) + ref = weakref.ref(manager) + + # Sanity: it's tracked while alive. + assert manager in audit_base._live_managers + + # Drop the local strong ref + force collection. + del manager + gc.collect() + + # The WeakSet entry must be gone (or about to be). + assert ref() is None, ( + "AuditManager was kept alive — strong reference leak in cleanup machinery" + ) + + +def test_process_cleanup_handles_already_closed_manager() -> None: + """If a manager was explicitly closed, the process hook is a no-op for it. + + A manager that called close() during normal lifecycle should not + raise from the process-level cleanup — sink list is empty, worker + is already joined. + """ + m = AuditManager(async_mode=False) + m.close() + # Must not raise. + audit_base._process_cleanup_managers() + + +# --------------------------------------------------------------------------- +# Fork-rebuild safety +# --------------------------------------------------------------------------- + + +def test_ensure_alive_after_fork_is_idempotent_under_concurrent_emit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Two threads in a fresh-fork child must not both rebuild the queue. + + Without the lock, both threads observed the stale ``_pid``, both + constructed a new ``queue.Queue`` / ``threading.Event`` / + ``threading.Thread``, and the later writer leaked the earlier + one's queue+worker. With the lock the loser sees the updated + ``_pid`` after acquiring and returns. + """ + m = AuditManager(async_mode=True, register_default_sinks=False) + + # Capture the post-construction queue + worker so we can detect + # whether multiple rebuild winners occurred. + original_queue = m._queue + original_worker = m._worker_thread + + # Simulate a fork by mutating the recorded pid. We do NOT actually + # fork; we just put the manager into "I think I'm in a stale + # process" state. + m._pid = -1 + + barrier = threading.Barrier(8) + seen_queues: set[int] = set() + seen_workers: set[int] = set() + lock = threading.Lock() + + def worker() -> None: + barrier.wait() + m._ensure_alive_after_fork() + with lock: + seen_queues.add(id(m._queue)) + seen_workers.add(id(m._worker_thread)) + + threads = [threading.Thread(target=worker) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join(timeout=5.0) + + # Exactly one queue + worker survived the race. + assert len(seen_queues) == 1, ( + f"Multiple queues survived fork-rebuild race: {seen_queues}" + ) + assert len(seen_workers) == 1, ( + f"Multiple workers survived fork-rebuild race: {seen_workers}" + ) + # And the survivor is NOT the original (we did rebuild). + assert original_queue is not m._queue + assert original_worker is not m._worker_thread + assert m._pid == os.getpid() + + m.close() + + +def test_ensure_alive_after_fork_fast_path_when_pid_unchanged() -> None: + """Same-process call must NOT rebuild — sanity check on the fast path.""" + m = AuditManager(async_mode=True, register_default_sinks=False) + original_queue = m._queue + original_worker = m._worker_thread + + m._ensure_alive_after_fork() # same PID — no-op + + assert m._queue is original_queue + assert m._worker_thread is original_worker + m.close() + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _clean_module_state() -> Any: + """Test isolation for the module-level cleanup machinery. + + Sweep the WeakSet between tests so leftovers from one test don't + show up in another. Don't reset ``_atexit_registered`` — once + Python's ``atexit`` accepts a handler, we shouldn't unregister it + just for tests, and the tests above that check registration count + do their own reset under a patched ``atexit.register``. + """ + yield + audit_base._live_managers.clear() diff --git a/tests/test_audit_register_sink.py b/tests/test_audit_register_sink.py new file mode 100644 index 0000000..19c9996 --- /dev/null +++ b/tests/test_audit_register_sink.py @@ -0,0 +1,108 @@ +"""Tests for ``AuditManager.register_sink`` failure-counter semantics. + +A re-registered same-name sink must NOT inherit the previous instance's +tripped circuit-breaker state. ``unregister_sink`` already clears these +counters, but ``register_sink`` also clears them on a successful add as +defense-in-depth (covers tests / external callers that touch the +internal counter dicts directly). +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from uipath.runtime.governance._audit.base import ( + AuditEvent, + AuditManager, + AuditSink, + EventType, +) + + +class _NoopSink(AuditSink): + """Sink that records emit calls and never raises.""" + + def __init__(self, name: str = "test-sink") -> None: + self._name = name + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return self._name + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _event() -> AuditEvent: + return AuditEvent(event_type=EventType.RULE_EVALUATION, agent_name="a") + + +@pytest.fixture +def manager() -> Any: + """Build a fresh, sync-mode AuditManager with no default sinks. + + ``register_default_sinks=False`` keeps the traces sink (and the + per-instance atexit hook) out of the test, so assertions about + registered sinks see only what the test puts there. + """ + return AuditManager(async_mode=False, register_default_sinks=False) + + +def test_register_clears_stale_failure_counter(manager: AuditManager) -> None: + """A new sink with a name that previously tripped starts fresh.""" + # Simulate prior instance having tripped the circuit-breaker without + # going through unregister (e.g. test code or external code that + # mutated the counters directly). + manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD + manager._tripped_sinks.add("test-sink") + + new_sink = _NoopSink(name="test-sink") + manager.register_sink(new_sink) + + # Counter and tripped-set must be cleared. + assert manager._sink_failures.get("test-sink", 0) == 0 + assert "test-sink" not in manager._tripped_sinks + + # And the new sink actually receives events (would be skipped if + # still considered tripped). + manager.emit(_event()) + assert len(new_sink.events) == 1 + + +def test_register_does_not_clear_for_duplicate(manager: AuditManager) -> None: + """Re-registering an already-present sink is a no-op (no counter reset).""" + sink = _NoopSink(name="test-sink") + manager.register_sink(sink) + + # Simulate the existing sink having accumulated some failures. + manager._sink_failures["test-sink"] = 3 + + # A second register call with the same name should NOT clear those + # failures — the duplicate-check fires before the reset. + duplicate = _NoopSink(name="test-sink") + manager.register_sink(duplicate) + + assert manager._sink_failures["test-sink"] == 3 + + +def test_unregister_then_register_starts_fresh(manager: AuditManager) -> None: + """The full lifecycle: register → trip → unregister → register again.""" + sink = _NoopSink(name="test-sink") + manager.register_sink(sink) + manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD + manager._tripped_sinks.add("test-sink") + + manager.unregister_sink("test-sink") + # Unregister already clears. + assert "test-sink" not in manager._tripped_sinks + + new_sink = _NoopSink(name="test-sink") + manager.register_sink(new_sink) + assert manager._sink_failures.get("test-sink", 0) == 0 + assert "test-sink" not in manager._tripped_sinks + + manager.emit(_event()) + assert len(new_sink.events) == 1 diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py new file mode 100644 index 0000000..a46149b --- /dev/null +++ b/tests/test_commitment_concern.py @@ -0,0 +1,205 @@ +"""Tests for the commitment_concern check (A.10.4). + +The check now uses OR semantics: a verb match, an amount match, or a +deadline match is each sufficient when its enabling flag is on. With +both flags false the rule matches verb-only. + +The verb pattern also covers proposal / SOW style commitment markers +("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose") +so formal-business commitments without first-person verbs still fire. + +Amount detection requires a currency marker adjacent to the number to +prevent URL fragments (forum-post IDs, image dimensions, etc.) from +false-positiving. +""" + +from __future__ import annotations + +import pytest + +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator + +# --------------------------------------------------------------------------- +# The proposal-style sample that originally slipped through the rule. +# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables", +# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID. +# Triple-quoted so we keep the line breaks the model produced. +# --------------------------------------------------------------------------- +SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851). + +Here's how I propose we turn your software around quickly: + +Plan +- Triage (logs + reproduce) +- Quick stabilization + +Deliverables +- Defect triage report + +Timeline: 4 days total +- Day 1: Triage + reproduction + +Cost: $780 (fixed for the above scope) +""" + + +@pytest.mark.parametrize( + "text", + [ + "Cost: $780 (fixed for the above scope)", + "Deliverables: a, b, c", + "Timeline: 4 days total for the whole engagement", + "I propose we turn this around in a week", + "We will refund the difference", + "I'll deliver the report by Friday", + "the warranty covers parts only", + "fixed price of one hundred dollars", + ], +) +def test_verb_match_alone_fires(text: str) -> None: + """Each verb-style commitment marker fires on its own (verb-only mode).""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": False, "require_deadline": False} + ) + is True + ) + + +def test_full_proposal_sample_fires() -> None: + """The originally-missed proposal output now fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + SAMPLE_PROPOSAL, + {"require_amount": False, "require_deadline": False}, + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "$780", + "We charge USD 1,200 per seat", + "The fee is 500 EUR", + ], +) +def test_amount_alone_fires_when_require_amount_true(text: str) -> None: + """Currency-anchored amount alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "Task is 75% complete.", + "We maintain 99.9% uptime.", + "Battery at 50%.", + "Score: 12%.", + ], +) +def test_bare_percentage_does_not_fire(text: str) -> None: + """Status-only percentages must not trigger commitment_concern. + + Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount + regex, which fired on benign status / progress text. Real + percentage-bearing commitments ("we'll give a 20% discount") + still fire via the verb pattern. + """ + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is False + ) + + +def test_percentage_with_verb_still_fires() -> None: + """A commitment verb co-occurring with a percentage still fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "We will refund 100% of the purchase price.", + {"require_amount": True, "require_deadline": False}, + ) + is True + ) + + +def test_amount_alone_does_not_fire_when_require_amount_false() -> None: + """Amount-only text is silent when require_amount=False and no verb.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "The list price is $780.", + {"require_amount": False, "require_deadline": False}, + ) + is False + ) + + +def test_deadline_alone_fires_when_require_deadline_true() -> None: + """Deadline phrase alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "Will be done within 5 days.", + {"require_amount": False, "require_deadline": True}, + ) + is True + ) + + +def test_url_fragment_digits_do_not_false_positive() -> None: + """A long URL with embedded digits is not a 'commitment'. + + Catches the prior price-parser misbehaviour where Price.fromstring() + picked up forum-post IDs (e.g. ``667851``) and conflated them with + unrelated currency symbols elsewhere in the text. + """ + text = ( + "See https://forum.example.com/t/topic/667851 for details — " + "no commitment language here." + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +@pytest.mark.parametrize( + "text", + [ + "", + " ", + "Just chatting about the weather today.", + "The product is durable and well-made.", + ], +) +def test_no_signal_does_not_fire(text: str) -> None: + """Text without any commitment signal stays silent regardless of flags.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +def test_non_dict_params_treated_as_defaults() -> None: + """``params`` of the wrong type degrades to defaults rather than crashing.""" + assert ( + GovernanceEvaluator._check_commitment_concern("we will refund", None) + is True + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + "no verbs here", "garbage" + ) + is False + ) diff --git a/tests/test_enforcement_mode_default.py b/tests/test_enforcement_mode_default.py deleted file mode 100644 index 78230fd..0000000 --- a/tests/test_enforcement_mode_default.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Tests for the default enforcement-mode resolution on :class:`PolicyLoader`. - -The default is :attr:`EnforcementMode.AUDIT` so the wrapper attaches at -runtime construction and the background policy load can run. If the -provider later returns ``disabled``, the loader records it and -:attr:`enforcement_mode` flips. - -Resolution (per :attr:`PolicyLoader.enforcement_mode`): -1. The provider-supplied value on the most recent load. -2. Default :attr:`EnforcementMode.AUDIT`. -""" - -from __future__ import annotations - -from uipath.core.governance import EnforcementMode, PolicyResponse - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader - - -def test_default_mode_is_audit() -> None: - """No provider-supplied mode yet → AUDIT. - - AUDIT is the default so the wrapper attaches and the background - policy fetch can run. The backend can flip the mode to DISABLED - on fetch when the tenant has no policies. - """ - loader = PolicyLoader(None) - assert loader.enforcement_mode is EnforcementMode.AUDIT - - -def test_provider_disabled_wins_over_default() -> None: - """A provider supplying DISABLED overrides the AUDIT default.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.DISABLED - - -def test_provider_enforce_wins_over_default() -> None: - """A provider supplying ENFORCE flips the loader to enforce.""" - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_loader_with_none_mode_response_keeps_previous_value() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode. - - The wire response model treats ``None`` as "no opinion" — the loader - must not overwrite a real value with it. Otherwise a transient - provider response could silently demote a tenant's enforcement - posture. - """ - p1 = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - # A second provider response that omits mode should not flip back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse( - mode=None, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader.clear_cache() - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_two_loaders_carry_independent_enforcement_modes() -> None: - """The whole point of the refactor: parallel loaders don't share mode. - - Previously :func:`set_enforcement_mode` wrote a module global, so an - ENFORCE-mode loader and a DISABLED-mode loader running concurrently - in the same process clobbered each other (last writer wins). - Instance-scoped mode means each loader's mode is read-isolated. - """ - p_enforce = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: e\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - p_disabled = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - - enforce_loader = PolicyLoader(p_enforce) - disabled_loader = PolicyLoader(p_disabled) - - enforce_loader.load_policy_index() - disabled_loader.load_policy_index() - - assert enforce_loader.enforcement_mode is EnforcementMode.ENFORCE - assert disabled_loader.enforcement_mode is EnforcementMode.DISABLED diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py new file mode 100644 index 0000000..2039182 --- /dev/null +++ b/tests/test_evaluator.py @@ -0,0 +1,420 @@ +"""Tests for the audit + enforcement behavior of GovernanceEvaluator. + +The evaluator's three load-bearing responsibilities: + +1. DISABLED enforcement mode short-circuits — no rules evaluated, no + audit events emitted, no exceptions raised. +2. AUDIT mode evaluates rules and emits audit events, but transforms + matched DENY actions into AUDIT so execution continues. +3. ENFORCE mode evaluates, emits audit, and raises + :class:`GovernanceBlockException` when a DENY rule matches. + +Plus a fail-safe contract: a misbehaving audit sink must not stop +evaluation from completing or propagate as an exception. The +evaluator is constructed with explicit dependencies (audit manager, +enforcement mode); no process-globals are involved. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance._audit.base import ( + AuditEvent, + AuditManager, + AuditSink, + EventType, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + + +class _CapturingSink(AuditSink): + """Audit sink that records every event for assertions.""" + + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _deny_rule_on_input_contains(needle: str) -> Rule: + """Build a rule that DENIES when agent_input contains ``needle``.""" + return Rule( + rule_id="TEST-01", + name="Test deny on input", + clause="A.1.1", + hook=LifecycleHook.BEFORE_AGENT, + action=Action.DENY, + checks=[ + Check( + conditions=[ + Condition( + operator="contains", + field="agent_input", + value=needle, + ) + ], + action=Action.DENY, + message=f"Input must not contain {needle!r}", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + """Wrap a single rule in a one-pack PolicyIndex.""" + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def _ctx(agent_input: str) -> CheckContext: + return CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name="test-agent", + runtime_id="run-1", + agent_input=agent_input, + ) + + +def _build_evaluator( + rule: Rule, + mode: EnforcementMode, + audit_manager: AuditManager | None = None, +) -> GovernanceEvaluator: + """Construct an evaluator with explicit deps — no process-globals involved.""" + return GovernanceEvaluator( + _build_index_with(rule), + enforcement_mode=mode, + audit_manager=audit_manager, + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def audit_setup() -> Any: + """Per-test :class:`AuditManager` + capturing sink — no default sinks. + + Returns ``(manager, sink)`` so a test can build evaluators with the + manager and inspect emitted events through the sink. Synchronous + mode keeps assertions deterministic. + """ + manager = AuditManager(async_mode=False, register_default_sinks=False) + sink = _CapturingSink() + manager.register_sink(sink) + yield manager, sink + manager.close() + + +# --------------------------------------------------------------------------- +# DISABLED mode +# --------------------------------------------------------------------------- + + +def test_disabled_mode_short_circuits_with_empty_record(audit_setup: Any) -> None: + """DISABLED returns an empty AuditRecord and emits nothing.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.DISABLED, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("definitely contains secret")) + + assert audit.evaluations == [] + assert audit.final_action == Action.ALLOW + assert audit.metadata["enforcement_mode"] == "disabled" + assert sink.events == [] + + +def test_disabled_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: + """Even when a DENY rule WOULD match, DISABLED never raises.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.DISABLED, + audit_manager=manager, + ) + + # Must not raise. + evaluator.evaluate(_ctx("this is blocked")) + + +# --------------------------------------------------------------------------- +# AUDIT mode +# --------------------------------------------------------------------------- + + +def test_audit_mode_transforms_deny_to_audit(audit_setup: Any) -> None: + """AUDIT mode evaluates rules but never returns a DENY final_action.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("contains secret data")) + + assert len(audit.evaluations) == 1 + assert audit.evaluations[0].matched is True + assert audit.evaluations[0].action == Action.DENY # raw rule action preserved + assert audit.final_action == Action.AUDIT # mode-adjusted + assert audit.metadata["audit_mode_would_deny"] is True + + +def test_audit_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: + """AUDIT mode never raises GovernanceBlockException, even on a DENY hit.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("this is blocked")) # must not raise + + +def test_audit_mode_emits_per_rule_and_summary_events(audit_setup: Any) -> None: + """One rule_evaluation event per rule + one hook_summary per evaluate().""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("contains secret")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert rule_events[0].hook == "BEFORE_AGENT" + assert rule_events[0].data["policy_id"] == "TEST-01" + assert rule_events[0].data["matched"] is True + assert rule_events[0].data["action"] == "deny" + # Mode travels on every event (PR #122 contract). + assert rule_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT + + assert len(summary_events) == 1 + assert summary_events[0].data["matched_rules"] == 1 + assert summary_events[0].data["final_action"] == "audit" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT + + +def test_audit_mode_unmatched_rule_logged_as_allow(audit_setup: Any) -> None: + """Unmatched rules still emit a rule_evaluation event with action='allow'.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("benign user query")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + assert len(rule_events) == 1 + assert rule_events[0].data["matched"] is False + assert rule_events[0].data["action"] == "allow" + + +# --------------------------------------------------------------------------- +# ENFORCE mode +# --------------------------------------------------------------------------- + + +def test_enforce_mode_raises_on_deny_match(audit_setup: Any) -> None: + """ENFORCE mode raises GovernanceBlockException when a DENY rule matches.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + with pytest.raises(GovernanceBlockException) as exc_info: + evaluator.evaluate(_ctx("input is blocked")) + + exc = exc_info.value + assert exc.rule_id == "TEST-01" + assert exc.rule_name == "Test deny on input" + assert exc.audit_record is not None + assert exc.audit_record.final_action == Action.DENY + + +def test_enforce_mode_emits_audit_before_raising(audit_setup: Any) -> None: + """The audit trail must be emitted even when the call raises.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + with pytest.raises(GovernanceBlockException): + evaluator.evaluate(_ctx("contains blocked")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert summary_events[0].data["final_action"] == "deny" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.ENFORCE + + +def test_enforce_mode_returns_record_when_no_rule_matches(audit_setup: Any) -> None: + """No DENY hit → no raise; the AuditRecord is returned normally.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("benign query")) + + assert audit.final_action == Action.ALLOW + assert audit.evaluations[0].matched is False + + +# --------------------------------------------------------------------------- +# Sink-failure isolation + no-audit-manager case +# --------------------------------------------------------------------------- + + +def test_sink_failure_does_not_propagate_or_block_evaluation( + audit_setup: Any, +) -> None: + """A broken sink must not make evaluate() raise or lose its return value. + + Contract: AuditManager wraps each sink's emit() in try/except with a + per-sink failure counter (circuit-breaker), so a sink exception + never propagates back to the evaluator. + """ + manager, capturing_sink = audit_setup + + class _BrokenSink(AuditSink): + @property + def name(self) -> str: + return "broken" + + def emit(self, event: AuditEvent) -> None: + raise RuntimeError("sink broke") + + manager.register_sink(_BrokenSink()) + + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + # Must complete without raising even with a broken sink registered. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + # The non-broken capturing sink still got its events. + assert any( + e.event_type == EventType.RULE_EVALUATION for e in capturing_sink.events + ) + + +def test_no_audit_manager_short_circuits_emission() -> None: + """``audit_manager=None`` is a no-op — evaluation still completes. + + Replaces the previous test that mocked ``get_audit_manager`` to + raise. With explicit injection, the equivalent "no manager + available" state is simply ``audit_manager=None`` at construction. + """ + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=None, + ) + + # Must complete, return record, and not raise. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + assert audit.evaluations[0].matched is True + + +# --------------------------------------------------------------------------- +# Protocol conformance smoke test +# --------------------------------------------------------------------------- + + +def test_governance_evaluator_satisfies_evaluator_protocol() -> None: + """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected. + + Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol — + an explicit assignment to the protocol-typed variable documents the + structural contract. + """ + from uipath.core.adapters import EvaluatorProtocol + + evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex()) + assert isinstance(evaluator, EvaluatorProtocol) + + +def test_evaluator_protocol_methods_resolvable_on_concrete() -> None: + """Every method the protocol declares must be callable on the concrete impl.""" + from uipath.core.adapters import EvaluatorProtocol + + evaluator: Any = GovernanceEvaluator(PolicyIndex()) + for method_name in ( + "evaluate_before_agent", + "evaluate_after_agent", + "evaluate_before_model", + "evaluate_after_model", + "evaluate_tool_call", + "evaluate_after_tool", + ): + assert callable(getattr(evaluator, method_name)) + # The variable annotation also asserts type compatibility at runtime + # because EvaluatorProtocol is @runtime_checkable. + assert isinstance(evaluator, EvaluatorProtocol) diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py new file mode 100644 index 0000000..32e83c6 --- /dev/null +++ b/tests/test_evaluator_operators.py @@ -0,0 +1,672 @@ +"""Tests for ``GovernanceEvaluator`` operators and field resolution. + +Covers each operator implemented in :meth:`_apply_operator` plus the +``_check_*`` helper functions (vader, encoding, entropy, incident, +commitment) and the ``evaluate_*`` dispatchers. +""" + +from __future__ import annotations + +import pytest +from uipath.core.governance import EnforcementMode +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance.native.evaluator import ( + _INCIDENT_PATTERNS, + GovernanceEvaluator, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _evaluator() -> GovernanceEvaluator: + """Build a GovernanceEvaluator with an empty PolicyIndex (operators only). + + AUDIT is the default mode; operator tests don't care about + enforcement and we don't need an audit manager for purely + operator-level assertions. + """ + return GovernanceEvaluator(policy_index=PolicyIndex()) + + +def _ctx(**fields) -> CheckContext: + """Construct a CheckContext with sensible defaults plus overrides.""" + defaults = dict( + hook=LifecycleHook.AFTER_MODEL, + agent_name="agent", + runtime_id="rt-1", + ) + defaults.update(fields) + return CheckContext(**defaults) + + +def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule: + return Rule( + rule_id="r1", + name="r1", + clause="", + hook=LifecycleHook.AFTER_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition(operator=operator, field=field, value=value, negate=negate) + ], + ) + ], + ) + + +# Mode is per-instance now — tests construct evaluators with the mode +# they need via the ``enforcement_mode`` kwarg. No process-globals to +# reset. + + +# --------------------------------------------------------------------------- +# Field resolution — _get_field_value +# --------------------------------------------------------------------------- + + +def test_get_field_value_top_level_attr() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + assert ev._get_field_value("model_output", ctx) == "hello" + + +def test_get_field_value_dotted_path_into_dict() -> None: + ev = _evaluator() + ctx = _ctx(session_state={"tool_calls": 7}) + assert ev._get_field_value("session_state.tool_calls", ctx) == 7 + + +def test_get_field_value_missing_segment_returns_none() -> None: + ev = _evaluator() + ctx = _ctx() + assert ev._get_field_value("nonexistent", ctx) is None + assert ev._get_field_value("session_state.absent", ctx) is None + + +# --------------------------------------------------------------------------- +# Existence / guardrail_fallback (special-cased before the None check) +# --------------------------------------------------------------------------- + + +def test_exists_true_when_value_present() -> None: + ev = _evaluator() + ctx = _ctx(model_output="x") + assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True + + +def test_exists_false_when_missing() -> None: + ev = _evaluator() + assert ev._apply_operator("exists", None, None) is False + + +def test_not_exists_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_exists", None, None) is True + assert ev._apply_operator("not_exists", "x", None) is False + + +def test_guardrail_fallback_mapped_and_disabled_fires() -> None: + ev = _evaluator() + result = ev._apply_operator( + "guardrail_fallback", + None, + {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"}, + ) + assert result is True + + +@pytest.mark.parametrize( + "cfg", + [ + {"mapped_to_uipath": False, "policy_enabled": False}, + {"mapped_to_uipath": True, "policy_enabled": True}, + {"mapped_to_uipath": False, "policy_enabled": True}, + ], +) +def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, cfg) is False + + +def test_guardrail_fallback_non_dict_value_silent() -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, "string") is False + + +# --------------------------------------------------------------------------- +# None-field short-circuit (everything except exists / guardrail_fallback) +# --------------------------------------------------------------------------- + + +def test_other_operators_short_circuit_when_field_is_none() -> None: + ev = _evaluator() + for op in ("contains", "regex", "in_list", "gt"): + assert ev._apply_operator(op, None, "anything") is False, op + + +# --------------------------------------------------------------------------- +# Numeric operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("gt", 5, 3, True), + ("gt", 3, 5, False), + ("gt", 3, 3, False), + ("gte", 3, 3, True), + ("gte", 2, 3, False), + ("lt", 1, 3, True), + ("lt", 3, 3, False), + ("lte", 3, 3, True), + ("lte", 4, 3, False), + ], +) +def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_numeric_operators_handle_string_coercion() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "5", "3") is True + + +def test_numeric_operators_return_false_on_uncoercible() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "not-a-number", 3) is False + assert ev._apply_operator("gt", 3, "not-a-number") is False + + +# --------------------------------------------------------------------------- +# String operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("equals", "abc", "abc", True), + ("equals", "abc", "ABC", False), # equals is case-sensitive + ("eq", "x", "x", True), + ("not_equals", "abc", "xyz", True), + ("ne", "x", "x", False), + ("contains", "Hello World", "world", True), # case-insensitive + ("contains", "Hello", "xyz", False), + ("not_contains", "Hello", "xyz", True), + ("not_contains", "Hello", "hello", False), + ], +) +def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_regex_matches_pattern() -> None: + ev = _evaluator() + assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True + + +def test_regex_matches_alias() -> None: + """``matches`` is documented as a synonym for ``regex``.""" + ev = _evaluator() + assert ev._apply_operator("matches", "abc-123", r"\d+") is True + + +def test_regex_invalid_pattern_returns_false() -> None: + """Malformed regex is logged and silently returns False.""" + ev = _evaluator() + assert ev._apply_operator("regex", "anything", "(unclosed") is False + + +# --------------------------------------------------------------------------- +# List operators +# --------------------------------------------------------------------------- + + +def test_in_list_membership() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True + assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False + + +def test_in_list_non_list_value_returns_false() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "x", "not a list") is False + + +def test_not_in_list_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True + assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False + + +def test_not_in_list_non_list_value_returns_true() -> None: + """``not_in_list`` against a non-list value safely returns True + (nothing is in a non-list).""" + ev = _evaluator() + assert ev._apply_operator("not_in_list", "x", "not a list") is True + + +# --------------------------------------------------------------------------- +# Unknown operator +# --------------------------------------------------------------------------- + + +def test_unknown_operator_returns_false() -> None: + """Unknown operator strings log a debug message and return False.""" + ev = _evaluator() + assert ev._apply_operator("never_heard_of_this", "x", "y") is False + + +# --------------------------------------------------------------------------- +# Negate flag — flips the result +# --------------------------------------------------------------------------- + + +def test_condition_negate_flips_result() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + # contains "hello" → matches; negate inverts to False. + cond = Condition( + operator="contains", field="model_output", value="hello", negate=True, + ) + assert ev._evaluate_condition(cond, ctx) is False + cond2 = Condition( + operator="contains", field="model_output", value="world", negate=True, + ) + assert ev._evaluate_condition(cond2, ctx) is True + + +# --------------------------------------------------------------------------- +# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions +# --------------------------------------------------------------------------- + + +def test_empty_check_conditions_always_match() -> None: + """A check with no conditions trivially matches — surfaces rule shape bugs.""" + ev = _evaluator() + check = Check(conditions=[], logic="all") + matched, _ = ev._evaluate_check(check, _ctx()) + assert matched is True + + +def test_check_logic_all_requires_every_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="a"), + Condition(operator="contains", field="model_output", value="missing"), + ], + logic="all", + ) + matched, _ = ev._evaluate_check(check, _ctx(model_output="a only")) + assert matched is False + + +def test_check_logic_any_requires_one_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="present"), + Condition(operator="contains", field="model_output", value="absent"), + ], + logic="any", + ) + matched, detail = ev._evaluate_check(check, _ctx(model_output="present text")) + assert matched is True + # detail is the check's message on match; empty by default in our builder. + assert detail == "" + + +# --------------------------------------------------------------------------- +# VADER sentiment +# --------------------------------------------------------------------------- + + +def test_vader_concern_negative_text_fires() -> None: + """A clearly-negative sentence trips the default threshold of -0.3.""" + assert ( + GovernanceEvaluator._check_vader_concern( + "I absolutely hate this terrible, awful product.", {"threshold": -0.3} + ) + is True + ) + + +def test_vader_concern_positive_text_does_not_fire() -> None: + assert ( + GovernanceEvaluator._check_vader_concern( + "This is wonderful and I love it!", {"threshold": -0.3} + ) + is False + ) + + +def test_vader_concern_empty_text_silent() -> None: + assert GovernanceEvaluator._check_vader_concern("", {}) is False + assert GovernanceEvaluator._check_vader_concern(" ", {}) is False + + +def test_vader_concern_threshold_as_scalar() -> None: + """``params`` may be a bare number; the operator coerces.""" + assert ( + GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True + ) + + +def test_vader_concern_invalid_threshold_falls_back() -> None: + """Non-numeric scalar params fall back to the documented default.""" + # "garbage" -> default -0.3 → should still classify clear negative + assert ( + GovernanceEvaluator._check_vader_concern( + "I hate this awful, terrible thing", "garbage" + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Encoding integrity +# --------------------------------------------------------------------------- + + +def test_encoding_concern_clean_text_silent() -> None: + assert ( + GovernanceEvaluator._check_encoding_concern( + "Just a normal English sentence with no corruption.", {} + ) + is False + ) + + +def test_encoding_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_encoding_concern("", {}) is False + + +def test_encoding_concern_replacement_chars_fire() -> None: + """U+FFFD replacement chars are a strong corruption signal.""" + text = "Hello � � world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_mojibake_bigrams_fire() -> None: + """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape.""" + text = "é é hello é" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_hex_escape_literals_fire() -> None: + """Literal ``\\xHH`` sequences mean raw bytes leaked into a string.""" + text = r"Hello \x80 \x81 \x82 world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Entropy (stdlib only — deterministic) +# --------------------------------------------------------------------------- + + +def test_entropy_concern_normal_english_does_not_fire() -> None: + """English prose entropy lands ~3.5–4.5 bits/byte — inside default range.""" + text = "The quick brown fox jumps over the lazy dog." * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is False + ) + + +def test_entropy_concern_low_entropy_fires() -> None: + """Highly repetitive text approaches 0 bits/byte.""" + text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is True + ) + + +def test_entropy_concern_high_entropy_fires() -> None: + """Random-ish bytes approach 8 bits/byte.""" + # Build text with many distinct chars to push entropy high. + text = "".join(chr(c) for c in range(32, 127)) * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0}) + is True + ) + + +def test_entropy_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_entropy_concern("", {}) is False + + +def test_entropy_concern_non_dict_params_uses_defaults() -> None: + """Non-dict params don't crash; defaults apply.""" + # Normal English prose still won't trip the default min=1.5, max=7.5 range. + text = "The quick brown fox jumps over the lazy dog." + assert ( + GovernanceEvaluator._check_entropy_concern(text, "garbage") is False + ) + + +# --------------------------------------------------------------------------- +# Incident taxonomy (regex-based, deterministic) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "text,expected_category", + [ + ("I cannot help with that.", "safety_refusal"), + ("I'm sorry, but I cannot answer.", "safety_refusal"), + ("500 internal server error", "tool_failure"), + ("Connection refused", "tool_failure"), + ("timed out", "tool_failure"), + ("401 unauthorized", "auth_failure"), + ("authentication failed", "auth_failure"), + ("429", "quota_exceeded"), + ("rate limit exceeded", "quota_exceeded"), + ("I made that up", "hallucination"), + ("I don't actually know", "hallucination"), + ], +) +def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None: + """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar.""" + assert expected_category in _INCIDENT_PATTERNS + assert GovernanceEvaluator._check_incident_concern(text, {}) is True + + +def test_incident_concern_unmatched_silent() -> None: + assert ( + GovernanceEvaluator._check_incident_concern( + "All systems operating normally.", {} + ) + is False + ) + + +def test_incident_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_incident_concern("", {}) is False + + +def test_incident_concern_category_filter() -> None: + """Limit scanning to a subset of categories via ``categories`` param.""" + # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled, + # the scanner should miss it. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["quota_exceeded"]} + ) + is False + ) + # With auth_failure enabled, it fires. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["auth_failure"]} + ) + is True + ) + + +def test_incident_concern_unknown_category_silently_dropped() -> None: + """Categories the system doesn't know about are silently ignored.""" + # Only the unknown category is requested — falls back to no categories, + # so even matching text doesn't fire. + result = GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["unknown_cat_xyz"]} + ) + assert result is False + + +# --------------------------------------------------------------------------- +# evaluate_* dispatchers — verify they build the right CheckContext +# --------------------------------------------------------------------------- + + +def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]: + """Patch evaluate() to capture the context it receives instead of running rules.""" + captured: dict = {} + ev = _evaluator() + + def _fake_evaluate(ctx): # type: ignore[no-untyped-def] + captured["ctx"] = ctx + from datetime import datetime, timezone + + from uipath.core.governance.models import AuditRecord + + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=ctx.agent_name, + runtime_id=ctx.runtime_id, + hook=ctx.hook, + evaluations=[], + final_action=Action.ALLOW, + ) + + ev.evaluate = _fake_evaluate # type: ignore[assignment] + return ev, captured + + +def test_evaluate_before_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_agent( + agent_input="user-text", + agent_name="a", + runtime_id="r", + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_AGENT + assert ctx.agent_input == "user-text" + assert ctx.model_name == "gpt-5" + + +def test_evaluate_after_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_agent( + agent_output="reply", agent_name="a", runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_AGENT + assert ctx.agent_output == "reply" + + +def test_evaluate_before_model_carries_messages() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_model( + model_input="prompt", + agent_name="a", + runtime_id="r", + messages=[{"role": "user", "content": "hi"}], + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_MODEL + assert ctx.model_input == "prompt" + assert ctx.messages == [{"role": "user", "content": "hi"}] + + +def test_evaluate_after_model_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_model( + model_output="resp", agent_name="a", runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_MODEL + assert ctx.model_output == "resp" + + +def test_evaluate_tool_call_carries_args() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_tool_call( + tool_name="search", + tool_args={"q": "x"}, + agent_name="a", + runtime_id="r", + session_state={"tool_calls": 1}, + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.TOOL_CALL + assert ctx.tool_name == "search" + assert ctx.tool_args == {"q": "x"} + assert ctx.session_state == {"tool_calls": 1} + + +def test_evaluate_after_tool_carries_result() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_tool( + tool_name="search", + tool_result="some-data", + agent_name="a", + runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_TOOL + assert ctx.tool_name == "search" + assert ctx.tool_result == "some-data" + + +# --------------------------------------------------------------------------- +# DISABLED mode — evaluate() short-circuits without emitting audit +# --------------------------------------------------------------------------- + + +def test_disabled_mode_returns_empty_audit_record() -> None: + """DISABLED mode short-circuits the rule loop and audit emission.""" + rule = _rule_with_condition("contains", "model_output", "anything") + pack = PolicyPack(name="p", version="1", description="", rules=[rule]) + idx = PolicyIndex() + idx.add_pack(pack) + ev = GovernanceEvaluator( + policy_index=idx, enforcement_mode=EnforcementMode.DISABLED + ) + + audit = ev.evaluate(_ctx(model_output="contains anything")) + assert audit.final_action == Action.ALLOW + assert audit.evaluations == [] diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py index 810a881..23654a7 100644 --- a/tests/test_governance_runtime.py +++ b/tests/test_governance_runtime.py @@ -1,25 +1,29 @@ -"""Tests for the GovernanceRuntime wrapper and the provider loader path. - -The runtime no longer introspects the delegate's private attributes to -discover the conversational flag — the wiring layer passes it -explicitly. The runtime also no longer reads the governance feature -flag: the wiring layer decides whether to construct -:class:`GovernanceRuntime` at all. +"""Tests for :class:`UiPathGovernedRuntime` — pure resolved-policy wrapper. + +The runtime takes an already-resolved :class:`PolicyIndex` + +:class:`EnforcementMode` at construction (the host fetched the policy +asynchronously via the :class:`GovernancePolicyProvider` and compiled +the YAML). Tests here confirm the wrapper holds the snapshot and +passes execution straight through to the delegate. + +``trace_id`` is intentionally NOT on this wrapper — the injected +provider resolves it at HTTP-call time and the compensator captures +live OTel context across the pool hop via +``contextvars.copy_context``. Tests that previously asserted +``runtime.trace_id`` were dropped along with the property. """ from __future__ import annotations from typing import Any -from uipath.core.governance import ( - EnforcementMode, - PolicyResponse, -) +from uipath.core.governance import EnforcementMode -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native import ( + build_policy_index_from_yaml, +) from uipath.runtime.governance.native.models import PolicyIndex -from uipath.runtime.governance.runtime import GovernanceRuntime +from uipath.runtime.governance.runtime import UiPathGovernedRuntime SIMPLE_POLICY_YAML = """ standard: provider-pack @@ -33,107 +37,28 @@ """ -# Each test constructs a fresh ``PolicyLoader`` / ``GovernanceRuntime`` -# — no module-level state to reset. - - # --------------------------------------------------------------------------- -# PolicyLoader — provider plumbing (mode application, context, errors) +# build_policy_index_from_yaml — host-side compile path # --------------------------------------------------------------------------- -def test_loader_builds_index_and_applies_mode() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - - loader = PolicyLoader(provider) - index = loader.load_policy_index() - +def test_build_policy_index_from_yaml_compiles_pack() -> None: + """The host uses this to turn the provider's YAML response into the snapshot.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) assert isinstance(index, PolicyIndex) assert index.total_rules == 1 assert "provider-pack" in index.pack_names - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - -def test_loader_passes_is_conversational_in_context() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider, is_conversational=True).load_policy_index() - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is True - - -def test_loader_omits_is_conversational_when_unset() -> None: - """``is_conversational=None`` (the default) leaves the selector unset.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider).load_policy_index() - - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is None - - -def test_loader_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_empty_policies() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies="") - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - -def test_loader_returns_empty_on_zero_rules() -> None: - empty_pack_yaml = "standard: empty\nrules: []\n" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=empty_pack_yaml) - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_malformed_yaml() -> None: - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.AUDIT, policies="key: : invalid: : yaml" - ) - ) - index = PolicyLoader(provider).load_policy_index() +def test_build_policy_index_from_yaml_empty_yields_empty_index() -> None: + """Empty YAML compiles to an empty PolicyIndex — host can pass straight through.""" + index = build_policy_index_from_yaml("") + assert isinstance(index, PolicyIndex) assert index.total_rules == 0 -def test_loader_does_not_change_mode_when_response_mode_is_none() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode.""" - p1 = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # Next load via a different provider that returns mode=None must not - # demote the loader's mode back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse(mode=None, policies=SIMPLE_POLICY_YAML) - ) - loader.clear_cache() - loader.load_policy_index() - - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # --------------------------------------------------------------------------- -# GovernanceRuntime — passthroughs + loader wiring +# UiPathGovernedRuntime — passthroughs # --------------------------------------------------------------------------- @@ -163,57 +88,53 @@ async def dispose(self) -> None: self.disposed = True -def test_governance_runtime_exposes_loader_bound_to_provider() -> None: - """The wrapper builds an instance-scoped PolicyLoader carrying the provider.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) +def _make_runtime( + delegate: _StubDelegate | None = None, + *, + policy_index: PolicyIndex | None = None, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, +) -> UiPathGovernedRuntime: + """Build a runtime with sensible test defaults.""" + return UiPathGovernedRuntime( + delegate or _StubDelegate(), + policy_index if policy_index is not None else PolicyIndex(), + enforcement_mode, ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - assert isinstance(runtime.loader, PolicyLoader) - assert runtime.loader._provider is provider - - -def test_governance_runtime_forwards_is_conversational_to_loader() -> None: - """The constructor's explicit ``is_conversational`` reaches PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - runtime = GovernanceRuntime( - _StubDelegate(), policy_provider=provider, is_conversational=True - ) - # Force the prefetch to land — load synchronously so we can read calls[0]. - runtime.loader.get_policy_index() +# --------------------------------------------------------------------------- +# Snapshot stored internally — not exposed as a public property +# --------------------------------------------------------------------------- - assert provider.calls, "provider.get_policy was never invoked" - assert provider.calls[0].is_conversational is True +def test_resolved_policy_index_is_held_for_evaluator_use() -> None: + """The wrapper stores the resolved snapshot; the evaluator reads it.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) + runtime = _make_runtime(policy_index=index) + # Internal attribute — verify the wrapper kept the exact instance. + assert runtime._policy_index is index -def test_governance_runtime_loader_default_selector_is_none() -> None: - """Omitting ``is_conversational`` leaves the selector unset on PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - runtime.loader.get_policy_index() +def test_enforcement_mode_is_held_for_evaluator_use() -> None: + """The wrapper stores the mode supplied at construction.""" + runtime = _make_runtime(enforcement_mode=EnforcementMode.ENFORCE) + assert runtime._enforcement_mode is EnforcementMode.ENFORCE - assert provider.calls[0].is_conversational is None +def test_empty_policy_index_is_a_valid_construction() -> None: + """``PolicyIndex()`` with no packs is acceptable — wrapper attaches without rules.""" + runtime = _make_runtime(policy_index=PolicyIndex()) + assert runtime._policy_index.total_rules == 0 -def test_governance_runtime_with_none_provider_yields_empty_index() -> None: - """No provider → loader yields an empty PolicyIndex, no provider invocation.""" - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None) - index = runtime.loader.get_policy_index() - assert index.total_rules == 0 +# --------------------------------------------------------------------------- +# Passthrough behavior +# --------------------------------------------------------------------------- async def test_governance_runtime_execute_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) result = await runtime.execute({"x": 1}) @@ -223,7 +144,7 @@ async def test_governance_runtime_execute_delegates() -> None: async def test_governance_runtime_stream_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) events = [e async for e in runtime.stream({"x": 1})] @@ -233,7 +154,7 @@ async def test_governance_runtime_stream_delegates() -> None: async def test_governance_runtime_schema_and_dispose_delegate() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) assert await runtime.get_schema() == "schema" await runtime.dispose() diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py new file mode 100644 index 0000000..ef6046a --- /dev/null +++ b/tests/test_guardrail_compensation.py @@ -0,0 +1,503 @@ +"""Tests for the instance-scoped GuardrailCompensator. + +The runtime layer owns only the bounded background pool and the +contextvars propagation that keeps live OTel context visible on the +worker thread. HTTP/auth/URL/header concerns — including ``trace_id`` +resolution — live behind the +:class:`uipath.core.governance.GovernanceCompensationProvider` protocol +and are exercised in the concrete provider's own tests. + +These tests cover: + +- ``disabled_guardrails`` — distilling fired ``guardrail_fallback`` rules + into per-rule wire metadata. +- ``GuardrailCompensator.submit`` — pool routing, in-flight + backpressure, shutdown safety, wire-model assembly, and the + ``contextvars.copy_context()`` propagation that keeps the agent's + OTel span visible inside the worker callable. +- Cross-instance isolation — two compensators do not share a pool or + semaphore. +- Process-level cleanup — one ``atexit`` registration, weak refs only. +""" + +from __future__ import annotations + +import gc +import threading +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, +) + +from uipath.runtime.governance.native import guardrail_compensation +from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, + disabled_guardrails, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _provider() -> MagicMock: + """Mock satisfying the GovernanceCompensationProvider protocol.""" + return MagicMock(spec=GovernanceCompensationProvider) + + +def _rules( + *validators: str, + rule_id: str = "R1", + rule_name: str = "n", + pack: str = "p", +) -> list[FiredRule]: + """Build a list of FiredRule wire models — one per validator.""" + return [ + FiredRule( + rule_id=rule_id, + rule_name=rule_name, + pack_name=pack, + validator=v, + ) + for v in validators + ] + + +def _run_inline(compensator: GuardrailCompensator) -> None: + """Replace the pool's ``submit`` with synchronous execution. + + Lets tests assert provider behavior deterministically without + relying on wait()/sleep(). + """ + + def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: + # The compensator submits ``ctx.run, _run`` (the bound method + # of a captured context plus the callable). Mirror that here so + # the captured context still wraps the worker callable. + if args: + fn(*args, **kwargs) + else: + fn() + + compensator._pool.submit = _sync_submit # type: ignore[method-assign] + + +@pytest.fixture(autouse=True) +def _close_dangling_compensators() -> Any: + """Best-effort teardown: close any compensator weak-refs still in the set. + + Each test should call ``compensator.close()``, but a failing + assertion mid-test could leak. The sweep prevents pytest from + hanging at exit on a leftover worker pool. + """ + yield + for compensator in list(guardrail_compensation._live_compensators): + try: + compensator.close() + except Exception: # noqa: BLE001 - best-effort teardown + pass + guardrail_compensation._live_compensators.clear() + + +# --------------------------------------------------------------------------- +# disabled_guardrails +# --------------------------------------------------------------------------- + + +def test_disabled_guardrails_returns_fired_rule_for_matched_disabled_guardrail() -> None: + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[ + SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") + ] + ) + policy_index = SimpleNamespace( + get_rule=lambda rid: rule if rid == "R1" else None + ) + + out = disabled_guardrails(audit, policy_index) + + assert len(out) == 1 + fr = out[0] + assert isinstance(fr, FiredRule) + assert fr.rule_id == "R1" + assert fr.rule_name == "PII guardrail" + assert fr.pack_name == "" + assert fr.validator == "pii_detection" + + +def test_disabled_guardrails_skips_unmatched_evaluations() -> None: + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: None) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_non_guardrail_conditions() -> None: + cond = SimpleNamespace(operator="regex", value="some-pattern") + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_enabled_guardrails() -> None: + """Mapped to UiPath AND enabled → no compensation needed.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": True, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_unmapped_guardrails() -> None: + """Not mapped to UiPath → server can't fall back; skip.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": False, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +# --------------------------------------------------------------------------- +# GuardrailCompensator.submit — short-circuits + pool routing + backpressure +# --------------------------------------------------------------------------- + + +def test_submit_empty_rules_short_circuits() -> None: + """No rules → no pool submit, no provider call.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit([], {}, "before_model", "ts", "a", "r") + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_no_validators_short_circuits() -> None: + """Rules with empty validator strings → no call (nothing to dispatch).""" + provider = _provider() + compensator = GuardrailCompensator(provider) + rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")] + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit(rules, {}, "before_model", "ts", "a", "r") + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_routes_through_pool() -> None: + """A non-empty rules list submits a single task to the pool.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "ts", + "agent", + "run", + ) + mock_pool.submit.assert_called_once() + + +def test_submit_drops_when_pool_saturated() -> None: + """When the in-flight semaphore is exhausted, the call is dropped.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + + # Force the semaphore into "exhausted" state. + drained = threading.BoundedSemaphore(1) + drained.acquire() # next acquire(blocking=False) returns False + compensator._inflight = drained + + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( + _rules("pii_detection"), + {}, + "before_model", + "ts", + "agent", + "run", + ) + + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_swallows_pool_shutdown_runtimeerror() -> None: + """If the pool was shut down, submit must not raise.""" + + class _ShutdownPool: + def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: + raise RuntimeError("cannot schedule new futures after shutdown") + + compensator = GuardrailCompensator(_provider()) + compensator._pool = _ShutdownPool() # type: ignore[assignment] + compensator._inflight = threading.BoundedSemaphore(4) + + # Must not raise. + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + +# --------------------------------------------------------------------------- +# GuardrailCompensator.submit — wire-model assembly + provider invocation +# --------------------------------------------------------------------------- + + +def test_submit_invokes_provider_with_govern_request() -> None: + """The provider receives a GovernRequest carrying every wire field. + + ``trace_id`` is left empty on the wire — the injected provider + resolves it at HTTP-call time. + """ + provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + rules = _rules("pii_detection", "harmful_content") + + compensator.submit( + rules, + {"content": "x"}, + "before_model", + "2026-06-06T00:00:00Z", + "langchain", + "patch-langchain", + ) + + provider.compensate.assert_called_once() + (request,) = provider.compensate.call_args.args + assert isinstance(request, GovernRequest) + # distinct validators drive the guardrail API call + assert request.validators == ["pii_detection", "harmful_content"] + assert request.rules == rules + assert request.data == {"content": "x"} + assert request.hook == "before_model" + # ``trace_id`` is intentionally empty — the provider resolves at HTTP time. + assert request.trace_id == "" + assert request.src_timestamp == "2026-06-06T00:00:00Z" + assert request.agent_name == "langchain" + assert request.runtime_id == "patch-langchain" + # Job-context fields are left for the provider to auto-fill from env. + assert request.folder_key is None + assert request.job_key is None + assert request.process_key is None + assert request.reference_id is None + assert request.agent_version is None + + +def test_submit_dedupes_validators() -> None: + """Multiple rules with the same validator collapse on the wire.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2") + + compensator.submit(rules, {}, "before_model", "ts", "a", "r") + + (request,) = provider.compensate.call_args.args + assert request.validators == ["pii_detection"] + # Per-rule metadata is preserved (one record per rule even with shared validator). + assert len(request.rules) == 2 + + +def test_submit_swallows_provider_errors() -> None: + """A provider exception must never propagate to the caller / agent.""" + provider = _provider() + provider.compensate.side_effect = RuntimeError("network down") + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + + # Must not raise. + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + provider.compensate.assert_called_once() + + +def test_submit_releases_semaphore_on_provider_error() -> None: + """Provider failure must not leak a semaphore slot.""" + provider = _provider() + provider.compensate.side_effect = RuntimeError("transient") + # 4 workers × 1 oversubscription = 4 slots total. + compensator = GuardrailCompensator(provider, inflight_oversubscription=1) + _run_inline(compensator) + + # Fire 8 — all 8 must reach the provider; the semaphore must release + # on each error so the next submit can acquire. + for _ in range(8): + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + assert provider.compensate.call_count == 8, ( + "All 8 submissions should fire — semaphore must release on error" + ) + + +# --------------------------------------------------------------------------- +# contextvars propagation — live OTel context visible inside the worker +# --------------------------------------------------------------------------- + + +def test_submit_propagates_otel_context_to_worker_thread() -> None: + """The worker callable runs inside the caller's contextvars snapshot. + + Without ``contextvars.copy_context()``, a worker thread started by + ``ThreadPoolExecutor`` would see an empty OTel context — the + the provider could only resolve env-based trace ids on the worker. + With the snapshot, the worker sees the same live span the agent + hook saw, so the provider can resolve the agent's actual trace id. + """ + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + provider = _provider() + compensator = GuardrailCompensator(provider) + + done = threading.Event() + captured: dict[str, Any] = {} + + def _capture(request: GovernRequest) -> None: + # Runs on the worker thread but inside the captured context — + # the agent's live span should still be visible here. + ctx = trace.get_current_span().get_span_context() + captured["worker_trace_id_hex"] = ( + format(ctx.trace_id, "032x") if ctx.is_valid else "" + ) + captured["worker_thread_name"] = threading.current_thread().name + done.set() + + provider.compensate.side_effect = _capture + + with tracer.start_as_current_span("agent-run") as span: + expected = format(span.get_span_context().trace_id, "032x") + compensator.submit( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "2026-06-06T00:00:00Z", + "agent", + "rt", + ) + assert done.wait(timeout=2.0), "compensation worker never ran" + + # Worker ran on the dedicated pool thread (not the caller). + assert captured["worker_thread_name"].startswith("governance-compensation") + # And the captured contextvars context propagated the OTel span across + # the thread hop — the worker sees the same trace_id the agent saw. + assert captured["worker_trace_id_hex"] == expected + + +# --------------------------------------------------------------------------- +# Cross-instance isolation — the architectural motivation for the refactor +# --------------------------------------------------------------------------- + + +def test_two_compensators_do_not_share_pool_or_semaphore() -> None: + """Parallel runtimes cannot saturate each other's compensation pool.""" + p1 = _provider() + p2 = _provider() + c1 = GuardrailCompensator(p1) + c2 = GuardrailCompensator(p2) + + assert c1._pool is not c2._pool + assert c1._inflight is not c2._inflight + + # Drain c1's semaphore to its cap; c2 must remain unaffected. + drained = threading.BoundedSemaphore(1) + drained.acquire() + c1._inflight = drained + + _run_inline(c2) + c2.submit(_rules("pii_detection"), {}, "before_model", "ts", "a", "r") + p2.compensate.assert_called_once() + p1.compensate.assert_not_called() + + +# --------------------------------------------------------------------------- +# Lifecycle — bounded atexit + weakref tracking (mirrors AuditManager pattern) +# --------------------------------------------------------------------------- + + +def test_three_compensators_register_one_process_atexit_hook() -> None: + """N compensators → 1 atexit registration, not N. + + Regression: a per-instance ``atexit.register(self.close)`` would + grow the atexit list linearly. The fix routes everyone through one + process-level cleanup hook keyed by a WeakSet. + """ + with patch.object(guardrail_compensation.atexit, "register") as mock_register: + guardrail_compensation._atexit_registered = False + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + assert mock_register.call_count == 1, ( + "Each compensator must NOT register its own atexit handler" + ) + + +def test_disposed_compensator_can_be_garbage_collected() -> None: + """The WeakSet must NOT keep a disposed compensator alive.""" + import weakref + + compensator = GuardrailCompensator(_provider()) + ref = weakref.ref(compensator) + + assert compensator in guardrail_compensation._live_compensators + + compensator.close() + del compensator + gc.collect() + + assert ref() is None, ( + "GuardrailCompensator kept alive — strong reference leak in cleanup machinery" + ) + + +def test_process_cleanup_handles_already_closed_compensator() -> None: + """If a compensator was explicitly closed, the process hook is a no-op for it.""" + c = GuardrailCompensator(_provider()) + c.close() + # Must not raise. + guardrail_compensation._process_cleanup_compensators() + + +def test_close_is_idempotent() -> None: + """Calling close() twice is a logged no-op, not a crash.""" + c = GuardrailCompensator(_provider()) + c.close() + c.close() # must not raise diff --git a/tests/test_loader.py b/tests/test_loader.py deleted file mode 100644 index 87e453b..0000000 --- a/tests/test_loader.py +++ /dev/null @@ -1,307 +0,0 @@ -"""Tests for the policy loader. - -Provider-only world: each :class:`PolicyLoader` is instance-scoped and -bound to one :class:`GovernancePolicyProvider`. Tests here cover the -caching, prefetch coordination, and fallback-to-empty behavior -independent of any specific provider. End-to-end provider plumbing -(mode application, YAML parsing, runtime wrapper integration) lives in -:mod:`tests.test_governance_runtime`. - -The loader no longer reads the governance feature flag — deciding -whether governance attaches at all is the wiring layer's concern, not -the loader's. -""" - -from __future__ import annotations - -import threading -import time -from typing import Any -from unittest.mock import patch - -from uipath.core.governance import ( - EnforcementMode, - PolicyContext, - PolicyResponse, -) - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native import loader as loader_mod -from uipath.runtime.governance.native.loader import PolicyLoader -from uipath.runtime.governance.native.models import PolicyIndex - -SIMPLE_POLICY_YAML = """ -standard: test-pack -version: "1.0" -rules: - - id: r1 - hook: before_model - checks: - - type: regex - patterns: ["leak"] -""" - - -def _ok_response() -> PolicyResponse: - return PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - - -# Each test constructs a fresh ``PolicyLoader`` — no shared state to reset. - - -# --------------------------------------------------------------------------- -# _empty_index_reason — diagnostic string for the "no policies" log -# --------------------------------------------------------------------------- - - -def test_empty_index_reason_no_provider() -> None: - msg = PolicyLoader(None)._empty_index_reason() - assert "no policy provider" in msg - - -def test_empty_index_reason_with_provider() -> None: - msg = PolicyLoader(StubPolicyProvider(response=_ok_response()))._empty_index_reason() - assert "provider returned no policies" in msg - - -# --------------------------------------------------------------------------- -# load_policy_index — synchronous entry point -# --------------------------------------------------------------------------- - - -def test_load_policy_index_empty_when_no_provider() -> None: - """No provider supplied → empty PolicyIndex.""" - index = PolicyLoader(None).load_policy_index() - assert isinstance(index, PolicyIndex) - assert index.total_rules == 0 - - -def test_load_policy_index_uses_provider() -> None: - provider = StubPolicyProvider(response=_ok_response()) - - index = PolicyLoader(provider).load_policy_index() - - assert isinstance(index, PolicyIndex) - assert "test-pack" in index.pack_names - assert len(provider.calls) == 1 - - -def test_load_policy_index_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -# --------------------------------------------------------------------------- -# get_policy_index — caching -# --------------------------------------------------------------------------- - - -def test_get_policy_index_caches_after_first_call() -> None: - """A second call returns the cached index without re-invoking the provider.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - a = loader.get_policy_index() - b = loader.get_policy_index() - - assert a is b - assert len(provider.calls) == 1 - - -def test_get_policy_index_sync_load_when_no_prefetch() -> None: - """Without a prefetch in flight, get_policy_index synchronously loads.""" - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_empty_with_no_provider() -> None: - """No provider supplied → cached empty index, provider never invoked.""" - loader = PolicyLoader(None) - a = loader.get_policy_index() - b = loader.get_policy_index() - assert a is b - assert a.total_rules == 0 - - -# --------------------------------------------------------------------------- -# Prefetch — idempotency + completion + timeout -# --------------------------------------------------------------------------- - - -def test_prefetch_no_op_when_provider_is_none() -> None: - """No provider → prefetch is a no-op (no thread, no event).""" - loader = PolicyLoader(None) - loader.prefetch() - assert loader._prefetch_event is None - - -def test_prefetch_is_idempotent() -> None: - """Second call while first is in flight is a no-op (no second thread).""" - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - first_event = loader._prefetch_event - loader.prefetch() - assert loader._prefetch_event is first_event - block.set() - if first_event is not None: - first_event.wait(timeout=2.0) - - -def test_prefetch_no_op_when_index_already_loaded() -> None: - """If the index is already cached, prefetch is a no-op.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - loader.get_policy_index() # populate the cache - - loader.prefetch() - - assert len(provider.calls) == 1 - - -def test_get_policy_index_waits_for_prefetch_then_returns() -> None: - """When a prefetch is in flight, get_policy_index waits for completion.""" - started = threading.Event() - release = threading.Event() - - def _fetch(context: PolicyContext) -> PolicyResponse: - started.set() - release.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_fetch)})() - loader = PolicyLoader(provider) - - loader.prefetch() - assert started.wait(timeout=2.0) - threading.Thread( - target=lambda: (time.sleep(0.05), release.set()), daemon=True - ).start() - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_logs_when_prefetch_completes_with_empty_index() -> None: - """The 'completed but produced no PolicyIndex' branch fires on provider failure. - - Manually wire a completed event without populating ``_policy_index`` — - simulates a prefetch worker that hit an unexpected error after the - event was claimed but before the index was set. - """ - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - event = threading.Event() - event.set() - loader._prefetch_event = event - - with patch.object(loader_mod.logger, "warning") as mock_warning: - index = loader.get_policy_index() - - assert index.total_rules == 0 - assert any( - "completed but produced no PolicyIndex" in str(call.args[0]) - for call in mock_warning.call_args_list - ) - - -# --------------------------------------------------------------------------- -# available_packs / clear_cache -# --------------------------------------------------------------------------- - - -def test_available_packs_before_load_returns_empty() -> None: - assert PolicyLoader(None).available_packs == [] - - -def test_available_packs_after_load() -> None: - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - loader.get_policy_index() - assert "test-pack" in loader.available_packs - - -def test_clear_cache_forces_refetch() -> None: - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - loader.get_policy_index() - loader.clear_cache() - loader.get_policy_index() - - assert len(provider.calls) == 2 - - -def test_clear_cache_drops_in_flight_worker_result() -> None: - """A worker spawned before ``clear_cache`` must not clobber state after it. - - The race: ``prefetch()`` starts a worker, ``clear_cache()`` retires - the prefetch event, then the worker finishes and (incorrectly, - before the fix) writes its loaded index back over the cleared - cache. With the fix the worker checks ``_prefetch_event is event`` - before publishing and discards its result when orphaned. - """ - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - captured_event = loader._prefetch_event - assert captured_event is not None # prefetch actually started - - # Retire the in-flight worker. - loader.clear_cache() - assert loader._policy_index is None - assert loader._prefetch_event is None - - # Release the worker; let it finish and try to publish. - block.set() - assert captured_event.wait(timeout=2.0) - - # The orphan worker's result must NOT land in the cache. - assert loader._policy_index is None - - -# --------------------------------------------------------------------------- -# Cross-instance isolation — the whole point of instance-scoped state -# --------------------------------------------------------------------------- - - -def test_two_loaders_do_not_share_cache() -> None: - """Concurrent loaders maintain independent caches. - - ``uipath eval`` runs multiple runtimes in parallel; each gets its - own loader and must not leak its cached PolicyIndex into the next. - """ - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - l1 = PolicyLoader(p1) - l2 = PolicyLoader(p2) - - l1.get_policy_index() - l2.get_policy_index() - - assert len(p1.calls) == 1 - assert len(p2.calls) == 1 - - -def test_two_loaders_carry_independent_conversational_selectors() -> None: - """Each loader threads its own selector into PolicyContext.""" - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - PolicyLoader(p1, is_conversational=True).load_policy_index() - PolicyLoader(p2, is_conversational=False).load_policy_index() - - assert p1.calls[0].is_conversational is True - assert p2.calls[0].is_conversational is False diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py new file mode 100644 index 0000000..e163932 --- /dev/null +++ b/tests/test_text_extraction.py @@ -0,0 +1,307 @@ +"""Tests for ``_extract_governable_text`` content extraction. + +Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent`` +and ``_check_after_agent``. Pulls clean text out of structured shapes +(dicts, list-of-blocks, pydantic models) instead of letting dict-repr +noise leak into the regex-scanned blob. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import pytest + +# The wrapper lands in a later slice of the governance stack; skip (don't +# error at collection) when it isn't present yet. +_wrapper = pytest.importorskip( + "uipath.runtime.governance.wrapper", + reason="governance wrapper not yet present in this slice", +) +_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP +_extract_governable_text = _wrapper._extract_governable_text + + +def test_plain_string_passes_through() -> None: + assert _extract_governable_text("hello world") == "hello world" + + +def test_none_returns_empty() -> None: + assert _extract_governable_text(None) == "" + + +def test_dict_with_content_key_extracts_content_first() -> None: + """The classic coded-agent output shape — content comes through clean.""" + out = _extract_governable_text( + {"content": "Estimated cost: $780", "_meta": {"id": "abc"}} + ) + assert out.startswith("Estimated cost: $780") + # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``. + assert "{'content'" not in out + assert "'_meta'" not in out + + +def test_dict_priority_keys_lead() -> None: + """``content`` / ``text`` / etc. lead before remaining keys.""" + out = _extract_governable_text( + {"trailing_meta": "noise-meta", "content": "primary-text"} + ) + assert out.index("primary-text") < out.index("noise-meta") + + +def test_list_of_text_blocks_concatenates() -> None: + """Anthropic-style content blocks.""" + out = _extract_governable_text( + [ + {"type": "text", "text": "first part"}, + {"type": "image", "source": {"data": "..."}}, + {"type": "text", "text": "second part"}, + ] + ) + assert "first part" in out + assert "second part" in out + + +def test_openai_function_call_shape_extracts_arguments() -> None: + """``arguments`` field on OpenAI-style function-call blocks.""" + out = _extract_governable_text( + [ + { + "type": "function_call", + "name": "end_execution", + "arguments": '{"content":"Cost: $1,200"}', + "id": "fc_abc", + } + ] + ) + assert "Cost: $1,200" in out + + +def test_numeric_scalars_are_skipped() -> None: + """Numbers / booleans aren't governance text — they shouldn't pad the blob.""" + out = _extract_governable_text( + {"content": "hello", "count": 42, "ok": True, "rate": 3.14} + ) + assert out == "hello" + + +def test_pydantic_like_model_dump_is_walked() -> None: + """Anything with ``model_dump()`` is walked as its dict form.""" + + class Stub: + def model_dump(self) -> dict: + return {"content": "from pydantic"} + + assert _extract_governable_text(Stub()) == "from pydantic" + + +def test_dataclass_via_dict_method() -> None: + """Objects exposing a ``dict()`` callable also walk via that path.""" + + class Stub: + def dict(self) -> dict: + return {"content": "from dict"} + + assert _extract_governable_text(Stub()) == "from dict" + + +def test_plain_object_attribute_fallback() -> None: + """Public attributes on opaque objects feed the walker.""" + + @dataclass + class Result: + content: str + _private: str = "ignored" + + out = _extract_governable_text(Result(content="visible")) + assert "visible" in out + assert "ignored" not in out + + +def test_cycle_in_structure_does_not_recurse_forever() -> None: + a: dict = {"content": "outer"} + b: dict = {"loop": a} + a["loop"] = b + # Should return without recursing infinitely. + out = _extract_governable_text(a) + assert "outer" in out + + +def test_text_is_capped_at_budget() -> None: + """Long content is truncated so a runaway payload can't dominate scans.""" + big = "x" * (_GOVERNANCE_TEXT_CAP + 1000) + out = _extract_governable_text(big) + assert len(out) == _GOVERNANCE_TEXT_CAP + + +def test_nested_dict_content_extracted() -> None: + """LangGraph-style state with messages nested under a key.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "Cost: $50"}, + ] + } + ) + assert "Cost: $50" in out + + +def test_unknown_block_type_with_no_text_returns_empty() -> None: + """Image-only block with no text payload contributes nothing.""" + out = _extract_governable_text( + [{"type": "image", "source": {"type": "base64", "data": "..."}}] + ) + # Could be empty or contain just the base64 data — but should NOT + # contain Python dict syntax characters that the old path emitted. + assert "{'type'" not in out + + +# --------------------------------------------------------------------------- +# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat). +# --------------------------------------------------------------------------- + + +def test_budget_cap_is_64k() -> None: + """Documents the cap so a future drop won't go unnoticed.""" + assert _GOVERNANCE_TEXT_CAP == 64000 + + +# --------------------------------------------------------------------------- +# Reverse list iteration — latest entry gets the budget first. +# --------------------------------------------------------------------------- + + +def test_lists_are_walked_in_reverse() -> None: + """Latest list entry leads the extracted blob. + + Critical for chat history: the new user message lives at the end of + the messages list and must be visible even when prior turns would + otherwise fill the budget first. + """ + out = _extract_governable_text( + [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}] + ) + assert out.index("latest") < out.index("middle") < out.index("earliest") + + +def test_long_chat_history_keeps_latest_user_message() -> None: + """A long history must not push the latest message out of the budget. + + Regression for the prior 8K-cap + forward-walk combination, which + silently dropped the latest user message once the conversation + grew past ~7,800 chars of prior content. + """ + bulky_prior = "x" * 2000 + messages = [{"role": "user", "content": bulky_prior}] * 40 # ~80K chars + messages.append({"role": "user", "content": "Cost: $1,200 — latest"}) + + out = _extract_governable_text({"messages": messages}) + assert "Cost: $1,200 — latest" in out + + +# --------------------------------------------------------------------------- +# latest_only — BEFORE_AGENT in a conversational agent +# --------------------------------------------------------------------------- + + +def test_latest_only_extracts_just_the_last_list_item() -> None: + """``latest_only=True`` drops every list entry but the last one.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old message"}, + {"role": "assistant", "content": "old response"}, + {"role": "user", "content": "Cost: $1,200"}, + ] + }, + latest_only=True, + ) + assert "Cost: $1,200" in out + assert "old message" not in out + assert "old response" not in out + + +def test_latest_only_resets_inside_chosen_item() -> None: + """Multi-block content inside the latest message is still walked fully. + + ``latest_only`` reduces the OUTER list (chat history) to its last + entry, but multi-block content (text + tool_call + thinking) + inside that latest message must still be extracted in full — + otherwise we'd lose answer text that arrives in a non-final block. + """ + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "part A"}, + { + "type": "function_call", + "arguments": '{"answer":"part B"}', + }, + ], + }, + ] + }, + latest_only=True, + ) + assert "part A" in out + assert "part B" in out + assert "old" not in out + + +def test_latest_only_top_level_list() -> None: + """``latest_only`` applies when the input itself is a list.""" + out = _extract_governable_text( + [ + {"content": "history item 1"}, + {"content": "history item 2"}, + {"content": "latest input"}, + ], + latest_only=True, + ) + assert "latest input" in out + assert "history item 1" not in out + assert "history item 2" not in out + + +def test_latest_only_default_false_still_walks_all() -> None: + """Default behavior unchanged — AFTER_AGENT etc. still see everything.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "first"}, + {"role": "user", "content": "second"}, + ] + } + ) + assert "first" in out + assert "second" in out + + +def test_latest_only_empty_list_is_empty() -> None: + """Empty history → empty extraction.""" + assert _extract_governable_text({"messages": []}, latest_only=True) == "" + + +def test_messages_is_a_priority_content_key() -> None: + """``messages`` (plural) leads ahead of non-priority keys. + + Without ``messages`` in the priority list, an input that also + carries siblings like ``thread_id`` / ``metadata`` could siphon + budget before the actual chat history is walked. + """ + out = _extract_governable_text( + { + "thread_id": "abc-xyz", + "metadata": {"foo": "bar"}, + "messages": [{"role": "user", "content": "primary content"}], + } + ) + assert "primary content" in out + assert out.index("primary content") < ( + out.find("abc-xyz") if "abc-xyz" in out else len(out) + ) diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py new file mode 100644 index 0000000..ce09845 --- /dev/null +++ b/tests/test_traces_severity.py @@ -0,0 +1,269 @@ +"""Tests for trace-span verbosity / status semantics. + +``TracesAuditSink`` emits an OpenTelemetry span for every governance +hook end and every rule evaluation. The contract follows §4 of the +cross-product unification doc — verdict is split into ``evaluator_result`` +(what the rule decided, mode-independent) and ``action_applied`` (what +actually happened, derived from evaluator_result + mode). + +Mode travels with the event (set by the evaluator from the per-runtime +:attr:`UiPathGovernedRuntime.enforcement_mode` the host supplied) so +parallel runtimes running different modes don't cross-contaminate the +sink's view. + +- ``verbosityLevel = 4`` (Error) and ``StatusCode.ERROR`` fire **only** + when ``action_applied = DENY`` — i.e. the runtime actually blocked + the agent (ENFORCE mode + configured action ``deny``). +- ``verbosityLevel = 3`` (Warning) and ``Status.UNSET`` for advisory + outcomes (``action_applied`` in ``{AUDIT, HITL}``). HITL is its own + spec bucket — escalation pauses for human review, it doesn't fail + the run, so it stays Warning even in ENFORCE mode. +- Hook spans never set Status, regardless of mode or final_action. + They're summary containers; severity belongs on the per-rule span. +- ``ALLOW`` / ``NONE`` results leave verbosityLevel unset (Orchestrator + default = 2, Information) and never call set_status. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest +from uipath.core.governance import EnforcementMode + +from uipath.runtime.governance._audit.base import AuditEvent, EventType +from uipath.runtime.governance._audit.traces import TracesAuditSink + + +@pytest.fixture +def captured_span(monkeypatch: pytest.MonkeyPatch) -> MagicMock: + """Wire ``TracesAuditSink`` to a mock tracer and return the span mock.""" + span = MagicMock(name="span") + tracer = MagicMock(name="tracer") + tracer.start_as_current_span.return_value.__enter__.return_value = span + tracer.start_as_current_span.return_value.__exit__.return_value = False + monkeypatch.setattr(TracesAuditSink, "_get_tracer", lambda self: tracer) + return span + + +def _hook_event(final_action: str, mode: EnforcementMode) -> AuditEvent: + return AuditEvent( + event_type=EventType.HOOK_END, + agent_name="agent", + hook="after_model", + data={ + "total_rules": 1, + "matched_rules": 1 if final_action != "allow" else 0, + "final_action": final_action, + "enforcement_mode": mode, + }, + ) + + +def _rule_event( + matched: bool, action: str, mode: EnforcementMode = EnforcementMode.AUDIT +) -> AuditEvent: + return AuditEvent( + event_type=EventType.RULE_EVALUATION, + agent_name="agent", + hook="after_model", + data={ + "policy_id": "A.10.4", + "rule_name": "commitment-language", + "pack_name": "iso42001", + "matched": matched, + "action": action, + "enforcement_mode": mode, + "status": "MATCHED" if matched else "PASS", + "detail": "Customer-binding commitment detected.", + }, + ) + + +def _span_attrs(span: MagicMock) -> dict[str, object]: + """Return a mapping of attribute name → value for set_attribute calls.""" + attrs: dict[str, object] = {} + for call in span.set_attribute.call_args_list: + key, value = call.args + attrs[key] = value + return attrs + + +# --------------------------------------------------------------------------- +# Hook span — never marked ERROR +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "final_action,mode", + [ + ("deny", EnforcementMode.ENFORCE), + ("deny", EnforcementMode.AUDIT), + ("audit", EnforcementMode.AUDIT), + ("escalate", EnforcementMode.AUDIT), + ("allow", EnforcementMode.AUDIT), + ], +) +def test_hook_span_never_sets_error( + captured_span: MagicMock, final_action: str, mode: EnforcementMode +) -> None: + """Hook spans are summary containers — they never carry an ERROR Status.""" + sink = TracesAuditSink() + sink.emit(_hook_event(final_action=final_action, mode=mode)) + assert not captured_span.set_status.called, ( + f"Hook span should never set_status; called with " + f"final_action={final_action!r}, mode={mode!r}" + ) + + +# --------------------------------------------------------------------------- +# Rule span — enforce-mode DENY is the only Status.ERROR case +# --------------------------------------------------------------------------- + + +def test_enforce_mode_deny_is_error(captured_span: MagicMock) -> None: + """Enforce mode + action=deny = real block → verbosityLevel=4 + Status.ERROR.""" + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.ENFORCE)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 4 + assert attrs.get("uipath_governance.evaluator_result") == "DENY" + assert attrs.get("uipath_governance.action_applied") == "DENY" + assert attrs.get("uipath_governance.mode") == "ENFORCE" + + assert captured_span.set_status.called, ( + "Status.ERROR must fire for enforce-mode deny violation" + ) + (status_arg,) = captured_span.set_status.call_args.args + from opentelemetry.trace import Status, StatusCode + + assert isinstance(status_arg, Status) + assert status_arg.status_code is StatusCode.ERROR + assert "commitment-language" in status_arg.description + assert "deny" in status_arg.description + + +def test_enforce_mode_escalate_is_hitl_warning(captured_span: MagicMock) -> None: + """Enforce mode + action=escalate = HITL pause, not a block. + + HITL is its own spec bucket distinct from DENY — escalation pauses + for human review, the run isn't failed. So verbosityLevel stays at + Warning and Status is not marked ERROR. + """ + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="escalate", mode=EnforcementMode.ENFORCE)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 3 + assert attrs.get("uipath_governance.evaluator_result") == "HITL" + assert attrs.get("uipath_governance.action_applied") == "HITL" + assert attrs.get("uipath_governance.mode") == "ENFORCE" + assert not captured_span.set_status.called + + +# --------------------------------------------------------------------------- +# Rule span — advisory violations (audit mode, or audit-action rules) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "action,expected_evaluator", + [("deny", "DENY"), ("audit", "DENY"), ("escalate", "HITL")], +) +def test_audit_mode_violation_is_warning( + captured_span: MagicMock, action: str, expected_evaluator: str +) -> None: + """Audit mode never blocks → action_applied=AUDIT, verbosityLevel=3. + + Surfacing Status.ERROR for an audit-mode violation would falsely + mark the agent's run as failed when the runtime intentionally + let it through. evaluator_result still records the rule's actual + decision (DENY/HITL), independent of mode. + """ + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action=action, mode=EnforcementMode.AUDIT)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 3 + assert attrs.get("uipath_governance.evaluator_result") == expected_evaluator + assert attrs.get("uipath_governance.action_applied") == "AUDIT" + assert attrs.get("uipath_governance.mode") == "AUDIT" + + assert not captured_span.set_status.called, ( + f"Audit-mode {action} violation must NOT set Status.ERROR" + ) + + +def test_enforce_mode_audit_action_is_warning(captured_span: MagicMock) -> None: + """Enforce mode + action=audit is a per-rule audit override. + + The rule's configured ``audit`` action means "log this match but + don't block" even when the global mode is ENFORCE. evaluator_result + is DENY (the rule decided to deny), but action_applied is AUDIT + (the per-rule override kicks in), so verbosity stays Warning. + """ + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="audit", mode=EnforcementMode.ENFORCE)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 3 + assert attrs.get("uipath_governance.evaluator_result") == "DENY" + assert attrs.get("uipath_governance.action_applied") == "AUDIT" + assert attrs.get("uipath_governance.mode") == "ENFORCE" + assert not captured_span.set_status.called + + +# --------------------------------------------------------------------------- +# Rule span — no violation, no verbosityLevel attribute (Orchestrator default = 2) +# --------------------------------------------------------------------------- + + +def test_unmatched_rule_no_verbosity_no_error(captured_span: MagicMock) -> None: + """Unmatched evaluations → evaluator_result=ALLOW, action_applied=NONE, quiet.""" + sink = TracesAuditSink() + sink.emit(_rule_event(matched=False, action="deny", mode=EnforcementMode.ENFORCE)) + + attrs = _span_attrs(captured_span) + assert "verbosityLevel" not in attrs + assert attrs.get("uipath_governance.evaluator_result") == "ALLOW" + assert attrs.get("uipath_governance.action_applied") == "NONE" + assert not captured_span.set_status.called + + +def test_matched_allow_action_no_verbosity(captured_span: MagicMock) -> None: + """A rule whose action is 'allow' is an explicit non-violation.""" + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="allow", mode=EnforcementMode.ENFORCE)) + + attrs = _span_attrs(captured_span) + assert "verbosityLevel" not in attrs + assert attrs.get("uipath_governance.evaluator_result") == "ALLOW" + assert attrs.get("uipath_governance.action_applied") == "NONE" + assert not captured_span.set_status.called + + +# --------------------------------------------------------------------------- +# Cross-runtime isolation — the architectural motivation for the refactor +# --------------------------------------------------------------------------- + + +def test_two_events_carry_independent_modes(captured_span: MagicMock) -> None: + """Parallel runtimes (different modes) cannot cross-contaminate the sink. + + Previously the sink read mode via a process-global; an ENFORCE + runtime's emit could clobber an AUDIT runtime's span. With mode on + the event, two consecutive emits with different modes each render + their own correct ``uipath_governance.mode`` value. + """ + sink = TracesAuditSink() + + sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.ENFORCE)) + sink.emit(_rule_event(matched=True, action="deny", mode=EnforcementMode.AUDIT)) + + # Collect every set_attribute call ordered by emit. + calls = [c.args for c in captured_span.set_attribute.call_args_list] + modes = [v for k, v in calls if k == "uipath_governance.mode"] + actions_applied = [v for k, v in calls if k == "uipath_governance.action_applied"] + assert modes == ["ENFORCE", "AUDIT"] + assert actions_applied == ["DENY", "AUDIT"]